torch-rb 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -1
- data/codegen/generate_functions.rb +6 -6
- data/codegen/native_functions.yaml +462 -190
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +23 -1
- data/lib/torch/nn/functional_attention.rb +5 -5
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/tensor.rb +3 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -4
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,12 +175,24 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
@@ -470,6 +482,7 @@
|
|
470
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
471
483
|
dispatch:
|
472
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
473
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
474
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
475
488
|
tags: pointwise
|
@@ -536,8 +549,8 @@
|
|
536
549
|
structured_delegate: add.out
|
537
550
|
variants: function, method
|
538
551
|
dispatch:
|
539
|
-
SparseCPU, SparseCUDA: add_sparse
|
540
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
552
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse
|
553
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
541
554
|
MkldnnCPU: mkldnn_add
|
542
555
|
ZeroTensor: add_zerotensor
|
543
556
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
@@ -548,8 +561,8 @@
|
|
548
561
|
variants: method
|
549
562
|
structured_delegate: add.out
|
550
563
|
dispatch:
|
551
|
-
SparseCPU, SparseCUDA: add_sparse_
|
552
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
564
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
565
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
553
566
|
MkldnnCPU: mkldnn_add_
|
554
567
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
555
568
|
tags: pointwise
|
@@ -562,10 +575,10 @@
|
|
562
575
|
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
563
576
|
ScalarOnly: add (Bool)
|
564
577
|
dispatch:
|
565
|
-
SparseCPU: add_out_sparse_cpu
|
578
|
+
SparseCPU, SparseMeta: add_out_sparse_cpu
|
566
579
|
SparseCUDA: add_out_sparse_cuda
|
567
|
-
SparseCsrCPU:
|
568
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
569
582
|
MkldnnCPU: mkldnn_add_out
|
570
583
|
MPS: add_out_mps
|
571
584
|
tags: pointwise
|
@@ -763,7 +776,7 @@
|
|
763
776
|
dispatch:
|
764
777
|
CompositeExplicitAutograd: arange
|
765
778
|
|
766
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
767
780
|
# name is already taken. Disabled because of CI job failures.
|
768
781
|
# FIXME: enable this
|
769
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1220,6 +1233,13 @@
|
|
1220
1233
|
CompositeExplicitAutograd: copysign_out
|
1221
1234
|
tags: pointwise
|
1222
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1223
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1224
1244
|
device_check: NoCheck # TensorIterator
|
1225
1245
|
variants: function, method
|
@@ -1621,6 +1641,7 @@
|
|
1621
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1622
1642
|
dispatch:
|
1623
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1624
1645
|
|
1625
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1626
1647
|
variants: function
|
@@ -1729,6 +1750,7 @@
|
|
1729
1750
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1730
1751
|
variants: function
|
1731
1752
|
dispatch:
|
1753
|
+
Meta: copy_meta
|
1732
1754
|
CompositeExplicitAutogradNonFunctional: copy
|
1733
1755
|
tags: core
|
1734
1756
|
|
@@ -1847,7 +1869,10 @@
|
|
1847
1869
|
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1848
1870
|
dispatch:
|
1849
1871
|
CUDA: cudnn_convolution
|
1850
|
-
|
1872
|
+
|
1873
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1874
|
+
dispatch:
|
1875
|
+
CUDA: cudnn_convolution_out
|
1851
1876
|
|
1852
1877
|
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1853
1878
|
dispatch:
|
@@ -2346,7 +2371,7 @@
|
|
2346
2371
|
Meta: empty_meta_symint
|
2347
2372
|
MkldnnCPU: empty_mkldnn
|
2348
2373
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2349
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2374
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2350
2375
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2351
2376
|
tags: core
|
2352
2377
|
|
@@ -2452,7 +2477,7 @@
|
|
2452
2477
|
CompositeExplicitAutograd: empty_like
|
2453
2478
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2454
2479
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2455
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2480
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2456
2481
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2457
2482
|
autogen: empty_like.out
|
2458
2483
|
|
@@ -2954,12 +2979,14 @@
|
|
2954
2979
|
dispatch:
|
2955
2980
|
CPU: _fft_r2c_mkl
|
2956
2981
|
CUDA: _fft_r2c_cufft
|
2982
|
+
MPS: _fft_r2c_mps
|
2957
2983
|
|
2958
2984
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2959
2985
|
variants: function
|
2960
2986
|
dispatch:
|
2961
2987
|
CPU: _fft_r2c_mkl_out
|
2962
2988
|
CUDA: _fft_r2c_cufft_out
|
2989
|
+
MPS: _fft_r2c_mps_out
|
2963
2990
|
|
2964
2991
|
# Complex to real inverse FFT
|
2965
2992
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2967,12 +2994,14 @@
|
|
2967
2994
|
dispatch:
|
2968
2995
|
CPU: _fft_c2r_mkl
|
2969
2996
|
CUDA: _fft_c2r_cufft
|
2997
|
+
MPS: _fft_c2r_mps
|
2970
2998
|
|
2971
2999
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2972
3000
|
variants: function
|
2973
3001
|
dispatch:
|
2974
3002
|
CPU: _fft_c2r_mkl_out
|
2975
3003
|
CUDA: _fft_c2r_cufft_out
|
3004
|
+
MPS: _fft_c2r_mps_out
|
2976
3005
|
|
2977
3006
|
# Standard complex to complex FFT (forward or backward)
|
2978
3007
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2980,12 +3009,14 @@
|
|
2980
3009
|
dispatch:
|
2981
3010
|
CPU: _fft_c2c_mkl
|
2982
3011
|
CUDA: _fft_c2c_cufft
|
3012
|
+
MPS: _fft_c2c_mps
|
2983
3013
|
|
2984
3014
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3015
|
variants: function
|
2986
3016
|
dispatch:
|
2987
3017
|
CPU: _fft_c2c_mkl_out
|
2988
3018
|
CUDA: _fft_c2c_cufft_out
|
3019
|
+
MPS: _fft_c2c_mps_out
|
2989
3020
|
|
2990
3021
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2991
3022
|
device_check: NoCheck
|
@@ -3097,6 +3128,7 @@
|
|
3097
3128
|
structured: True
|
3098
3129
|
dispatch:
|
3099
3130
|
CPU, CUDA: isin_Tensor_Tensor_out
|
3131
|
+
MPS: isin_Tensor_Tensor_out_mps
|
3100
3132
|
|
3101
3133
|
- func: isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3102
3134
|
variants: function
|
@@ -3238,6 +3270,8 @@
|
|
3238
3270
|
autogen: native_layer_norm_backward.out
|
3239
3271
|
tags: core
|
3240
3272
|
|
3273
|
+
- func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
|
3274
|
+
|
3241
3275
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3242
3276
|
variants: function, method
|
3243
3277
|
dispatch:
|
@@ -3302,14 +3336,39 @@
|
|
3302
3336
|
dispatch:
|
3303
3337
|
CUDA: _cslt_compress
|
3304
3338
|
|
3305
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
|
3339
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3306
3340
|
dispatch:
|
3307
3341
|
CUDA: _cslt_sparse_mm
|
3308
3342
|
|
3309
|
-
- func:
|
3343
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3344
|
+
dispatch:
|
3345
|
+
CUDA: _cslt_sparse_mm_search
|
3346
|
+
|
3347
|
+
- func: _sparse_semi_structured_tile(Tensor input, str algorithm="", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _sparse_semi_structured_tile
|
3350
|
+
|
3351
|
+
- func: _sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)
|
3352
|
+
dispatch:
|
3353
|
+
CUDA: _sparse_semi_structured_apply
|
3354
|
+
|
3355
|
+
- func: _sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor
|
3356
|
+
dispatch:
|
3357
|
+
CUDA: _sparse_semi_structured_apply_dense
|
3358
|
+
|
3359
|
+
# DEPRECATED: Use torch.__sparse_semi_structured_mm/torch._sparse_semi_structured_addmm instead
|
3360
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3310
3361
|
dispatch:
|
3311
3362
|
CUDA: _sparse_semi_structured_linear
|
3312
3363
|
|
3364
|
+
- func: _sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor
|
3365
|
+
dispatch:
|
3366
|
+
CUDA: _sparse_semi_structured_mm
|
3367
|
+
|
3368
|
+
- func: _sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor
|
3369
|
+
dispatch:
|
3370
|
+
CUDA: _sparse_semi_structured_addmm
|
3371
|
+
|
3313
3372
|
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3314
3373
|
dispatch:
|
3315
3374
|
CUDA: _mixed_dtypes_linear
|
@@ -4050,20 +4109,30 @@
|
|
4050
4109
|
|
4051
4110
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4052
4111
|
dispatch:
|
4112
|
+
CPU: _int_mm_cpu
|
4053
4113
|
CUDA: _int_mm_cuda
|
4054
4114
|
|
4055
4115
|
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
4056
4116
|
dispatch:
|
4117
|
+
CPU: _int_mm_out_cpu
|
4057
4118
|
CUDA: _int_mm_out_cuda
|
4058
4119
|
|
4059
4120
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4060
4121
|
dispatch:
|
4122
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4061
4123
|
CUDA: _convert_weight_to_int4pack_cuda
|
4062
4124
|
|
4063
4125
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4064
4126
|
dispatch:
|
4127
|
+
CPU: _weight_int4pack_mm_cpu
|
4128
|
+
MPS: _weight_int4pack_mm_mps
|
4065
4129
|
CUDA: _weight_int4pack_mm_cuda
|
4066
4130
|
|
4131
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4132
|
+
dispatch:
|
4133
|
+
CPU: _weight_int8pack_mm_cpu
|
4134
|
+
MPS: _weight_int8pack_mm_mps
|
4135
|
+
|
4067
4136
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4068
4137
|
python_module: sparse
|
4069
4138
|
|
@@ -4439,7 +4508,6 @@
|
|
4439
4508
|
MPS: pixel_shuffle_mps
|
4440
4509
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4441
4510
|
autogen: pixel_shuffle.out
|
4442
|
-
tags: core
|
4443
4511
|
|
4444
4512
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4445
4513
|
dispatch:
|
@@ -4810,7 +4878,7 @@
|
|
4810
4878
|
device_guard: False
|
4811
4879
|
dispatch:
|
4812
4880
|
CompositeImplicitAutograd: reshape_symint
|
4813
|
-
CompositeImplicitAutogradNestedTensor:
|
4881
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4814
4882
|
|
4815
4883
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4816
4884
|
variants: function
|
@@ -4969,6 +5037,7 @@
|
|
4969
5037
|
device_check: NoCheck # TensorIterator
|
4970
5038
|
python_module: nn
|
4971
5039
|
dispatch:
|
5040
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4972
5041
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4973
5042
|
|
4974
5043
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -5356,6 +5425,21 @@
|
|
5356
5425
|
CompositeExplicitAutograd: slice_backward
|
5357
5426
|
autogen: slice_backward.out
|
5358
5427
|
|
5428
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5429
|
+
# slice.Tensor, split_with_sizes, et al.). Currently, these are only used during fake-ification
|
5430
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5431
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5432
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5433
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5434
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5435
|
+
# easier to implement for a subclass than as_strided()
|
5436
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5437
|
+
variants: function, method
|
5438
|
+
device_check: NoCheck
|
5439
|
+
device_guard: False
|
5440
|
+
dispatch:
|
5441
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5442
|
+
|
5359
5443
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5360
5444
|
variants: function, method
|
5361
5445
|
device_check: NoCheck
|
@@ -5363,7 +5447,7 @@
|
|
5363
5447
|
dispatch:
|
5364
5448
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5365
5449
|
autogen: slice_scatter.out
|
5366
|
-
tags: core
|
5450
|
+
tags: [core, view_copy]
|
5367
5451
|
|
5368
5452
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5369
5453
|
variants: function, method
|
@@ -5562,6 +5646,16 @@
|
|
5562
5646
|
SparseCPU: _sspaddmm_out_cpu
|
5563
5647
|
SparseCUDA: _sspaddmm_out_cuda
|
5564
5648
|
|
5649
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5650
|
+
dispatch:
|
5651
|
+
CompositeExplicitAutograd: _chunk_cat
|
5652
|
+
CUDA: _chunk_cat_cuda
|
5653
|
+
|
5654
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5655
|
+
dispatch:
|
5656
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5657
|
+
CUDA: _chunk_cat_out_cuda
|
5658
|
+
|
5565
5659
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5566
5660
|
dispatch:
|
5567
5661
|
CompositeExplicitAutograd: stack
|
@@ -5626,8 +5720,8 @@
|
|
5626
5720
|
variants: function, method
|
5627
5721
|
dispatch:
|
5628
5722
|
CompositeExplicitAutograd: sum
|
5629
|
-
SparseCPU, SparseCUDA: sum_coo
|
5630
|
-
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
5723
|
+
SparseCPU, SparseCUDA, SparseMeta: sum_coo
|
5724
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr
|
5631
5725
|
autogen: sum.out
|
5632
5726
|
|
5633
5727
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -5753,6 +5847,7 @@
|
|
5753
5847
|
variants: function
|
5754
5848
|
dispatch:
|
5755
5849
|
CPU, CUDA: std_mean
|
5850
|
+
MPS: std_mean_mps
|
5756
5851
|
autogen: std_mean.correction_out
|
5757
5852
|
|
5758
5853
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6008,7 +6103,6 @@
|
|
6008
6103
|
CPU, MPS: roll
|
6009
6104
|
CUDA: roll_cuda
|
6010
6105
|
autogen: roll.out
|
6011
|
-
tags: core
|
6012
6106
|
|
6013
6107
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
6014
6108
|
|
@@ -6091,6 +6185,58 @@
|
|
6091
6185
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
6092
6186
|
autogen: _nested_view_from_buffer_copy.out
|
6093
6187
|
|
6188
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6189
|
+
variants: function
|
6190
|
+
device_check: NoCheck
|
6191
|
+
dispatch: {}
|
6192
|
+
|
6193
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6194
|
+
variants: function
|
6195
|
+
device_check: NoCheck
|
6196
|
+
tags: view_copy
|
6197
|
+
dispatch:
|
6198
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6199
|
+
autogen: _nested_view_from_jagged_copy.out
|
6200
|
+
|
6201
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6202
|
+
variants: function
|
6203
|
+
device_check: NoCheck
|
6204
|
+
dispatch: {}
|
6205
|
+
|
6206
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6207
|
+
variants: function
|
6208
|
+
device_check: NoCheck
|
6209
|
+
tags: view_copy
|
6210
|
+
dispatch:
|
6211
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6212
|
+
autogen: _nested_get_values_copy.out
|
6213
|
+
|
6214
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6215
|
+
variants: function
|
6216
|
+
device_check: NoCheck
|
6217
|
+
dispatch: {}
|
6218
|
+
|
6219
|
+
# returns undefined Tensor if no lengths present
|
6220
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6221
|
+
variants: function
|
6222
|
+
device_check: NoCheck
|
6223
|
+
dispatch: {}
|
6224
|
+
|
6225
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6226
|
+
variants: function
|
6227
|
+
device_check: NoCheck
|
6228
|
+
dispatch: {}
|
6229
|
+
|
6230
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6231
|
+
category_override: dummy
|
6232
|
+
dispatch: {}
|
6233
|
+
|
6234
|
+
- func: _nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)
|
6235
|
+
variants: function
|
6236
|
+
device_check: NoCheck
|
6237
|
+
dispatch:
|
6238
|
+
CPU, CUDA: _nested_compute_contiguous_strides_offsets
|
6239
|
+
|
6094
6240
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6095
6241
|
dispatch:
|
6096
6242
|
# calls unsqueeze
|
@@ -6275,6 +6421,7 @@
|
|
6275
6421
|
variants: function
|
6276
6422
|
dispatch:
|
6277
6423
|
CPU, CUDA: var_mean
|
6424
|
+
MPS: var_mean_mps
|
6278
6425
|
autogen: var_mean.correction_out
|
6279
6426
|
|
6280
6427
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6295,15 +6442,13 @@
|
|
6295
6442
|
device_check: NoCheck # TensorIterator
|
6296
6443
|
variants: function, method
|
6297
6444
|
dispatch:
|
6298
|
-
CPU, CUDA: where
|
6299
|
-
MPS: where_mps
|
6445
|
+
CPU, CUDA, MPS: where
|
6300
6446
|
tags: [core, pointwise]
|
6301
6447
|
|
6302
6448
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6303
6449
|
device_check: NoCheck # TensorIterator
|
6304
6450
|
dispatch:
|
6305
|
-
CPU, CUDA: where_self_out
|
6306
|
-
MPS: where_self_out_mps
|
6451
|
+
CPU, CUDA, MPS: where_self_out
|
6307
6452
|
|
6308
6453
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6309
6454
|
variants: function
|
@@ -6357,7 +6502,7 @@
|
|
6357
6502
|
CPU: _efficientzerotensor
|
6358
6503
|
CUDA: _efficientzerotensor_cuda
|
6359
6504
|
MPS: _efficientzerotensor_mps
|
6360
|
-
Meta:
|
6505
|
+
Meta: _efficientzerotensor_meta_symint
|
6361
6506
|
autogen: _efficientzerotensor.out
|
6362
6507
|
|
6363
6508
|
- func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6434,6 +6579,32 @@
|
|
6434
6579
|
SparseCPU, SparseCUDA: norm_sparse
|
6435
6580
|
autogen: native_norm.ScalarOpt_dim_dtype_out
|
6436
6581
|
|
6582
|
+
- func: _batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6583
|
+
dispatch:
|
6584
|
+
CPU: _batch_norm_with_update_cpu
|
6585
|
+
CUDA: _batch_norm_with_update_cuda
|
6586
|
+
MPS: _batch_norm_with_update_mps
|
6587
|
+
MkldnnCPU: _batch_norm_with_update_mkldnn
|
6588
|
+
autogen: _batch_norm_with_update_functional
|
6589
|
+
|
6590
|
+
- func: _batch_norm_with_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd, Tensor(g!) reserve) -> (Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))
|
6591
|
+
dispatch:
|
6592
|
+
CPU: _batch_norm_with_update_cpu_out
|
6593
|
+
CUDA: _batch_norm_with_update_cuda_out
|
6594
|
+
MPS: _batch_norm_with_update_mps_out
|
6595
|
+
|
6596
|
+
- func: _batch_norm_no_update(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6597
|
+
dispatch:
|
6598
|
+
CompositeExplicitAutograd: _batch_norm_no_update
|
6599
|
+
autogen: _batch_norm_no_update.out
|
6600
|
+
|
6601
|
+
- func: batch_norm_backward(Tensor grad_out, Tensor input, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, bool update, float eps, bool[3] output_mask, Tensor reserve) -> (Tensor, Tensor, Tensor)
|
6602
|
+
dispatch:
|
6603
|
+
CPU: _new_batch_norm_backward_cpu
|
6604
|
+
CUDA: _new_batch_norm_backward_cuda
|
6605
|
+
MPS: _new_batch_norm_backward_mps
|
6606
|
+
MkldnnCPU: _new_batch_norm_backward_mkldnn
|
6607
|
+
|
6437
6608
|
# TODO: reduce signatures down to one when optional args is available
|
6438
6609
|
- func: _sparse_sum(Tensor self) -> Tensor
|
6439
6610
|
|
@@ -6644,7 +6815,7 @@
|
|
6644
6815
|
MPS: zero_mps_
|
6645
6816
|
Meta: zero_meta_
|
6646
6817
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6647
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6818
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6648
6819
|
MkldnnCPU: mkldnn_zero_
|
6649
6820
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6650
6821
|
autogen: zero, zero.out
|
@@ -6934,7 +7105,11 @@
|
|
6934
7105
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6935
7106
|
# the default would never make sense.
|
6936
7107
|
|
6937
|
-
- func:
|
7108
|
+
- func: _sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7109
|
+
dispatch:
|
7110
|
+
CompositeExplicitAutograd: sparse_compressed_tensor_with_dims
|
7111
|
+
|
7112
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6938
7113
|
dispatch:
|
6939
7114
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6940
7115
|
|
@@ -6951,7 +7126,10 @@
|
|
6951
7126
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6952
7127
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6953
7128
|
|
6954
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7129
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7130
|
+
dispatch:
|
7131
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7132
|
+
|
6955
7133
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6956
7134
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6957
7135
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -7035,9 +7213,9 @@
|
|
7035
7213
|
- func: sparse_dim(Tensor self) -> int
|
7036
7214
|
variants: method
|
7037
7215
|
dispatch:
|
7038
|
-
CPU, CUDA: sparse_dim_strided
|
7039
7216
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7040
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7217
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7218
|
+
CompositeExplicitAutograd: sparse_dim_default
|
7041
7219
|
device_check: NoCheck
|
7042
7220
|
device_guard: False
|
7043
7221
|
|
@@ -7052,9 +7230,9 @@
|
|
7052
7230
|
- func: dense_dim(Tensor self) -> int
|
7053
7231
|
variants: method
|
7054
7232
|
dispatch:
|
7055
|
-
CPU, CUDA: dense_dim_strided
|
7056
7233
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7057
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7234
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7235
|
+
CompositeExplicitAutograd: dense_dim_default
|
7058
7236
|
device_check: NoCheck
|
7059
7237
|
device_guard: False
|
7060
7238
|
|
@@ -7070,7 +7248,7 @@
|
|
7070
7248
|
variants: method
|
7071
7249
|
dispatch:
|
7072
7250
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
7073
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7251
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
7074
7252
|
device_check: NoCheck
|
7075
7253
|
device_guard: False
|
7076
7254
|
|
@@ -7133,7 +7311,7 @@
|
|
7133
7311
|
variants: method
|
7134
7312
|
dispatch:
|
7135
7313
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7136
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7314
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7137
7315
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7138
7316
|
CompositeExplicitAutograd: values_default
|
7139
7317
|
device_check: NoCheck
|
@@ -7142,7 +7320,7 @@
|
|
7142
7320
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7143
7321
|
variants: method
|
7144
7322
|
dispatch:
|
7145
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7323
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7146
7324
|
CompositeExplicitAutograd: crow_indices_default
|
7147
7325
|
device_check: NoCheck
|
7148
7326
|
device_guard: False
|
@@ -7150,7 +7328,7 @@
|
|
7150
7328
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7151
7329
|
variants: method
|
7152
7330
|
dispatch:
|
7153
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7331
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7154
7332
|
CompositeExplicitAutograd: col_indices_default
|
7155
7333
|
device_check: NoCheck
|
7156
7334
|
device_guard: False
|
@@ -7158,7 +7336,7 @@
|
|
7158
7336
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7159
7337
|
variants: method
|
7160
7338
|
dispatch:
|
7161
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7339
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7162
7340
|
CompositeExplicitAutograd: ccol_indices_default
|
7163
7341
|
device_check: NoCheck
|
7164
7342
|
device_guard: False
|
@@ -7166,7 +7344,7 @@
|
|
7166
7344
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7167
7345
|
variants: method
|
7168
7346
|
dispatch:
|
7169
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7347
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7170
7348
|
CompositeExplicitAutograd: row_indices_default
|
7171
7349
|
device_check: NoCheck
|
7172
7350
|
device_guard: False
|
@@ -7185,7 +7363,7 @@
|
|
7185
7363
|
device_check: NoCheck # Allows copy into different device
|
7186
7364
|
variants: function
|
7187
7365
|
dispatch:
|
7188
|
-
SparseCPU, SparseCUDA: copy_sparse_
|
7366
|
+
SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
|
7189
7367
|
autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
|
7190
7368
|
|
7191
7369
|
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
|
@@ -7288,7 +7466,7 @@
|
|
7288
7466
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7289
7467
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7290
7468
|
|
7291
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7469
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7292
7470
|
variants: function
|
7293
7471
|
python_module: nn
|
7294
7472
|
dispatch:
|
@@ -7536,7 +7714,7 @@
|
|
7536
7714
|
|
7537
7715
|
- func: result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType
|
7538
7716
|
|
7539
|
-
- func: can_cast(ScalarType
|
7717
|
+
- func: can_cast(ScalarType from_, ScalarType to) -> bool
|
7540
7718
|
variants: function
|
7541
7719
|
|
7542
7720
|
- func: promote_types(ScalarType type1, ScalarType type2) -> ScalarType
|
@@ -7675,6 +7853,7 @@
|
|
7675
7853
|
dispatch:
|
7676
7854
|
CPU, CUDA, Meta, MPS: set_
|
7677
7855
|
autogen: set.source_Storage, set.source_Storage_out
|
7856
|
+
tags: inplace_view
|
7678
7857
|
|
7679
7858
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7680
7859
|
variants: method
|
@@ -7687,6 +7866,7 @@
|
|
7687
7866
|
MPS: set_storage_mps_
|
7688
7867
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7689
7868
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7869
|
+
tags: inplace_view
|
7690
7870
|
|
7691
7871
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7692
7872
|
variants: method
|
@@ -7694,6 +7874,7 @@
|
|
7694
7874
|
device_guard: False
|
7695
7875
|
dispatch:
|
7696
7876
|
CompositeImplicitAutograd: set__symint
|
7877
|
+
tags: inplace_view
|
7697
7878
|
|
7698
7879
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7699
7880
|
variants: method
|
@@ -7702,6 +7883,7 @@
|
|
7702
7883
|
dispatch:
|
7703
7884
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7704
7885
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7886
|
+
tags: inplace_view
|
7705
7887
|
|
7706
7888
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7707
7889
|
variants: method
|
@@ -7711,6 +7893,7 @@
|
|
7711
7893
|
Meta: set_meta_
|
7712
7894
|
MPS: set_mps_
|
7713
7895
|
autogen: set, set.out
|
7896
|
+
tags: inplace_view
|
7714
7897
|
|
7715
7898
|
# Not making it CompositeImplicitAutograd because lift
|
7716
7899
|
# should be a primitive w.r.t. functorch
|
@@ -10106,18 +10289,21 @@
|
|
10106
10289
|
variants: method, function
|
10107
10290
|
dispatch:
|
10108
10291
|
CompositeExplicitAutograd: alias
|
10292
|
+
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10109
10293
|
tags: core
|
10110
10294
|
|
10111
10295
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
10112
10296
|
variants: function
|
10113
10297
|
dispatch:
|
10114
10298
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10299
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10115
10300
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10116
10301
|
|
10117
10302
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
10118
10303
|
variants: function
|
10119
10304
|
dispatch:
|
10120
10305
|
CUDA: _amp_update_scale_cuda_
|
10306
|
+
CPU: _amp_update_scale_cpu_
|
10121
10307
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10122
10308
|
|
10123
10309
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -10137,14 +10323,14 @@
|
|
10137
10323
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10138
10324
|
variants: function
|
10139
10325
|
dispatch:
|
10140
|
-
|
10326
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow
|
10141
10327
|
CUDA: foreach_tensor_add_scalar_kernel_cuda
|
10142
10328
|
|
10143
10329
|
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10144
10330
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10145
10331
|
variants: function
|
10146
10332
|
dispatch:
|
10147
|
-
|
10333
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
|
10148
10334
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
10149
10335
|
autogen: _foreach_add.Scalar_out
|
10150
10336
|
|
@@ -10152,14 +10338,14 @@
|
|
10152
10338
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10153
10339
|
variants: function
|
10154
10340
|
dispatch:
|
10155
|
-
|
10341
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
|
10156
10342
|
CUDA: foreach_tensor_add_list_kernel_cuda
|
10157
10343
|
|
10158
10344
|
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10159
10345
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10160
10346
|
variants: function
|
10161
10347
|
dispatch:
|
10162
|
-
|
10348
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
|
10163
10349
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10164
10350
|
autogen: _foreach_add.List_out
|
10165
10351
|
|
@@ -10167,14 +10353,14 @@
|
|
10167
10353
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10168
10354
|
variants: function
|
10169
10355
|
dispatch:
|
10170
|
-
|
10356
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow
|
10171
10357
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
10172
10358
|
|
10173
10359
|
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10174
10360
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
10361
|
variants: function
|
10176
10362
|
dispatch:
|
10177
|
-
|
10363
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow_
|
10178
10364
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10179
10365
|
autogen: _foreach_add.ScalarList_out
|
10180
10366
|
|
@@ -10182,14 +10368,14 @@
|
|
10182
10368
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10183
10369
|
variants: function
|
10184
10370
|
dispatch:
|
10185
|
-
|
10371
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow
|
10186
10372
|
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10187
10373
|
|
10188
10374
|
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10189
10375
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10190
10376
|
variants: function
|
10191
10377
|
dispatch:
|
10192
|
-
|
10378
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
|
10193
10379
|
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10194
10380
|
autogen: _foreach_add.Tensor_out
|
10195
10381
|
|
@@ -10197,14 +10383,14 @@
|
|
10197
10383
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10198
10384
|
variants: function
|
10199
10385
|
dispatch:
|
10200
|
-
|
10386
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow
|
10201
10387
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
10202
10388
|
|
10203
10389
|
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10204
10390
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10205
10391
|
variants: function
|
10206
10392
|
dispatch:
|
10207
|
-
|
10393
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow_
|
10208
10394
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10209
10395
|
autogen: _foreach_sub.Scalar_out
|
10210
10396
|
|
@@ -10212,14 +10398,14 @@
|
|
10212
10398
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10213
10399
|
variants: function
|
10214
10400
|
dispatch:
|
10215
|
-
|
10401
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow
|
10216
10402
|
CUDA: foreach_tensor_sub_list_kernel_cuda
|
10217
10403
|
|
10218
10404
|
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10219
10405
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10220
10406
|
variants: function
|
10221
10407
|
dispatch:
|
10222
|
-
|
10408
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow_
|
10223
10409
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10224
10410
|
autogen: _foreach_sub.List_out
|
10225
10411
|
|
@@ -10227,14 +10413,14 @@
|
|
10227
10413
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10228
10414
|
variants: function
|
10229
10415
|
dispatch:
|
10230
|
-
|
10416
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow
|
10231
10417
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
10232
10418
|
|
10233
10419
|
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10234
10420
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10235
10421
|
variants: function
|
10236
10422
|
dispatch:
|
10237
|
-
|
10423
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow_
|
10238
10424
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10239
10425
|
autogen: _foreach_sub.ScalarList_out
|
10240
10426
|
|
@@ -10242,14 +10428,14 @@
|
|
10242
10428
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10243
10429
|
variants: function
|
10244
10430
|
dispatch:
|
10245
|
-
|
10431
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow
|
10246
10432
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
10247
10433
|
|
10248
10434
|
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10249
10435
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10250
10436
|
variants: function
|
10251
10437
|
dispatch:
|
10252
|
-
|
10438
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
|
10253
10439
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10254
10440
|
autogen: _foreach_mul.Scalar_out
|
10255
10441
|
|
@@ -10257,14 +10443,14 @@
|
|
10257
10443
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10258
10444
|
variants: function
|
10259
10445
|
dispatch:
|
10260
|
-
|
10446
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
|
10261
10447
|
CUDA: foreach_tensor_mul_list_kernel_cuda
|
10262
10448
|
|
10263
10449
|
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10264
10450
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10265
10451
|
variants: function
|
10266
10452
|
dispatch:
|
10267
|
-
|
10453
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
|
10268
10454
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10269
10455
|
autogen: _foreach_mul.List_out
|
10270
10456
|
|
@@ -10272,14 +10458,14 @@
|
|
10272
10458
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10273
10459
|
variants: function
|
10274
10460
|
dispatch:
|
10275
|
-
|
10461
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow
|
10276
10462
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
10277
10463
|
|
10278
10464
|
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10279
10465
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10280
10466
|
variants: function
|
10281
10467
|
dispatch:
|
10282
|
-
|
10468
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow_
|
10283
10469
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10284
10470
|
autogen: _foreach_mul.ScalarList_out
|
10285
10471
|
|
@@ -10287,14 +10473,14 @@
|
|
10287
10473
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10288
10474
|
variants: function
|
10289
10475
|
dispatch:
|
10290
|
-
|
10476
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
|
10291
10477
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
10292
10478
|
|
10293
10479
|
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10294
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10295
10481
|
variants: function
|
10296
10482
|
dispatch:
|
10297
|
-
|
10483
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
|
10298
10484
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10299
10485
|
autogen: _foreach_mul.Tensor_out
|
10300
10486
|
|
@@ -10302,14 +10488,14 @@
|
|
10302
10488
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10303
10489
|
variants: function
|
10304
10490
|
dispatch:
|
10305
|
-
|
10491
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow
|
10306
10492
|
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
10307
10493
|
|
10308
10494
|
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10309
10495
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10310
10496
|
variants: function
|
10311
10497
|
dispatch:
|
10312
|
-
|
10498
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow_
|
10313
10499
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10314
10500
|
autogen: _foreach_div.Scalar_out
|
10315
10501
|
|
@@ -10317,14 +10503,14 @@
|
|
10317
10503
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10318
10504
|
variants: function
|
10319
10505
|
dispatch:
|
10320
|
-
|
10506
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow
|
10321
10507
|
CUDA: foreach_tensor_div_list_kernel_cuda
|
10322
10508
|
|
10323
10509
|
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10324
10510
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10325
10511
|
variants: function
|
10326
10512
|
dispatch:
|
10327
|
-
|
10513
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_
|
10328
10514
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
10329
10515
|
autogen: _foreach_div.List_out
|
10330
10516
|
|
@@ -10332,14 +10518,14 @@
|
|
10332
10518
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10333
10519
|
variants: function
|
10334
10520
|
dispatch:
|
10335
|
-
|
10521
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow
|
10336
10522
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
10337
10523
|
|
10338
10524
|
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10339
10525
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10340
10526
|
variants: function
|
10341
10527
|
dispatch:
|
10342
|
-
|
10528
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow_
|
10343
10529
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10344
10530
|
autogen: _foreach_div.ScalarList_out
|
10345
10531
|
|
@@ -10347,14 +10533,14 @@
|
|
10347
10533
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10348
10534
|
variants: function
|
10349
10535
|
dispatch:
|
10350
|
-
|
10536
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow
|
10351
10537
|
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10352
10538
|
|
10353
10539
|
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10354
10540
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10355
10541
|
variants: function
|
10356
10542
|
dispatch:
|
10357
|
-
|
10543
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_
|
10358
10544
|
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10359
10545
|
autogen: _foreach_div.Tensor_out
|
10360
10546
|
|
@@ -10362,14 +10548,14 @@
|
|
10362
10548
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10363
10549
|
variants: function
|
10364
10550
|
dispatch:
|
10365
|
-
|
10551
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10366
10552
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10367
10553
|
|
10368
10554
|
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10369
10555
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10370
10556
|
variants: function
|
10371
10557
|
dispatch:
|
10372
|
-
|
10558
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10373
10559
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10374
10560
|
autogen: _foreach_clamp_max.Scalar_out
|
10375
10561
|
|
@@ -10377,14 +10563,14 @@
|
|
10377
10563
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10378
10564
|
variants: function
|
10379
10565
|
dispatch:
|
10380
|
-
|
10566
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10381
10567
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10382
10568
|
|
10383
10569
|
- func: _foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10384
10570
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10385
10571
|
variants: function
|
10386
10572
|
dispatch:
|
10387
|
-
|
10573
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10388
10574
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10389
10575
|
autogen: _foreach_clamp_max.List_out
|
10390
10576
|
|
@@ -10392,14 +10578,14 @@
|
|
10392
10578
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10393
10579
|
variants: function
|
10394
10580
|
dispatch:
|
10395
|
-
|
10581
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10396
10582
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10397
10583
|
|
10398
10584
|
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10399
10585
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10400
10586
|
variants: function
|
10401
10587
|
dispatch:
|
10402
|
-
|
10588
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10403
10589
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10404
10590
|
autogen: _foreach_clamp_max.ScalarList_out
|
10405
10591
|
|
@@ -10407,14 +10593,14 @@
|
|
10407
10593
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10408
10594
|
variants: function
|
10409
10595
|
dispatch:
|
10410
|
-
|
10596
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10411
10597
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10412
10598
|
|
10413
10599
|
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10414
10600
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10415
10601
|
variants: function
|
10416
10602
|
dispatch:
|
10417
|
-
|
10603
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10418
10604
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10419
10605
|
autogen: _foreach_clamp_min.Scalar_out
|
10420
10606
|
|
@@ -10422,14 +10608,14 @@
|
|
10422
10608
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10423
10609
|
variants: function
|
10424
10610
|
dispatch:
|
10425
|
-
|
10611
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10426
10612
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10427
10613
|
|
10428
10614
|
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10429
10615
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10430
10616
|
variants: function
|
10431
10617
|
dispatch:
|
10432
|
-
|
10618
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10433
10619
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10434
10620
|
autogen: _foreach_clamp_min.List_out
|
10435
10621
|
|
@@ -10437,14 +10623,14 @@
|
|
10437
10623
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10438
10624
|
variants: function
|
10439
10625
|
dispatch:
|
10440
|
-
|
10626
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10441
10627
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10442
10628
|
|
10443
10629
|
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10444
10630
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10445
10631
|
variants: function
|
10446
10632
|
dispatch:
|
10447
|
-
|
10633
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10448
10634
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10449
10635
|
autogen: _foreach_clamp_min.ScalarList_out
|
10450
10636
|
|
@@ -10453,14 +10639,14 @@
|
|
10453
10639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10454
10640
|
variants: function
|
10455
10641
|
dispatch:
|
10456
|
-
|
10642
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10457
10643
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10458
10644
|
|
10459
10645
|
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10460
10646
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10461
10647
|
variants: function
|
10462
10648
|
dispatch:
|
10463
|
-
|
10649
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10464
10650
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10465
10651
|
autogen: _foreach_maximum.Scalar_out
|
10466
10652
|
|
@@ -10469,14 +10655,14 @@
|
|
10469
10655
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10470
10656
|
variants: function
|
10471
10657
|
dispatch:
|
10472
|
-
|
10658
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10473
10659
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10474
10660
|
|
10475
10661
|
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10476
10662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10477
10663
|
variants: function
|
10478
10664
|
dispatch:
|
10479
|
-
|
10665
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10480
10666
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10481
10667
|
autogen: _foreach_maximum.List_out
|
10482
10668
|
|
@@ -10485,14 +10671,14 @@
|
|
10485
10671
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10486
10672
|
variants: function
|
10487
10673
|
dispatch:
|
10488
|
-
|
10674
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10489
10675
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10490
10676
|
|
10491
10677
|
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10492
10678
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10493
10679
|
variants: function
|
10494
10680
|
dispatch:
|
10495
|
-
|
10681
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10496
10682
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10497
10683
|
autogen: _foreach_maximum.ScalarList_out
|
10498
10684
|
|
@@ -10500,14 +10686,14 @@
|
|
10500
10686
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10501
10687
|
variants: function
|
10502
10688
|
dispatch:
|
10503
|
-
|
10689
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10504
10690
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10505
10691
|
|
10506
10692
|
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10507
10693
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10508
10694
|
variants: function
|
10509
10695
|
dispatch:
|
10510
|
-
|
10696
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10511
10697
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10512
10698
|
autogen: _foreach_minimum.Scalar_out
|
10513
10699
|
|
@@ -10515,14 +10701,14 @@
|
|
10515
10701
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10516
10702
|
variants: function
|
10517
10703
|
dispatch:
|
10518
|
-
|
10704
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10519
10705
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10520
10706
|
|
10521
10707
|
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10522
10708
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10523
10709
|
variants: function
|
10524
10710
|
dispatch:
|
10525
|
-
|
10711
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10526
10712
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10527
10713
|
autogen: _foreach_minimum.List_out
|
10528
10714
|
|
@@ -10530,14 +10716,14 @@
|
|
10530
10716
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10531
10717
|
variants: function
|
10532
10718
|
dispatch:
|
10533
|
-
|
10719
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10534
10720
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10535
10721
|
|
10536
10722
|
- func: _foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10537
10723
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10538
10724
|
variants: function
|
10539
10725
|
dispatch:
|
10540
|
-
|
10726
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10541
10727
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10542
10728
|
autogen: _foreach_minimum.ScalarList_out
|
10543
10729
|
|
@@ -10545,28 +10731,28 @@
|
|
10545
10731
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10546
10732
|
variants: function
|
10547
10733
|
dispatch:
|
10548
|
-
|
10734
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow
|
10549
10735
|
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10550
10736
|
|
10551
10737
|
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10552
10738
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10553
10739
|
variants: function
|
10554
10740
|
dispatch:
|
10555
|
-
|
10741
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow
|
10556
10742
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10557
10743
|
|
10558
10744
|
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10559
10745
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10560
10746
|
variants: function
|
10561
10747
|
dispatch:
|
10562
|
-
|
10748
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow
|
10563
10749
|
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10564
10750
|
|
10565
10751
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10566
10752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10567
10753
|
variants: function
|
10568
10754
|
dispatch:
|
10569
|
-
|
10755
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow_
|
10570
10756
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10571
10757
|
autogen: _foreach_addcdiv.Scalar_out
|
10572
10758
|
|
@@ -10574,7 +10760,7 @@
|
|
10574
10760
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10575
10761
|
variants: function
|
10576
10762
|
dispatch:
|
10577
|
-
|
10763
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow_
|
10578
10764
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10579
10765
|
autogen: _foreach_addcdiv.ScalarList_out
|
10580
10766
|
|
@@ -10582,7 +10768,7 @@
|
|
10582
10768
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10583
10769
|
variants: function
|
10584
10770
|
dispatch:
|
10585
|
-
|
10771
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow_
|
10586
10772
|
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10587
10773
|
autogen: _foreach_addcdiv.Tensor_out
|
10588
10774
|
|
@@ -10590,28 +10776,28 @@
|
|
10590
10776
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10591
10777
|
variants: function
|
10592
10778
|
dispatch:
|
10593
|
-
|
10779
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
|
10594
10780
|
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10595
10781
|
|
10596
10782
|
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10597
10783
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10598
10784
|
variants: function
|
10599
10785
|
dispatch:
|
10600
|
-
|
10786
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow
|
10601
10787
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10602
10788
|
|
10603
10789
|
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10604
10790
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10605
10791
|
variants: function
|
10606
10792
|
dispatch:
|
10607
|
-
|
10793
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow
|
10608
10794
|
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10609
10795
|
|
10610
10796
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10611
10797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10612
10798
|
variants: function
|
10613
10799
|
dispatch:
|
10614
|
-
|
10800
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
|
10615
10801
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10616
10802
|
autogen: _foreach_addcmul.Scalar_out
|
10617
10803
|
|
@@ -10619,7 +10805,7 @@
|
|
10619
10805
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10620
10806
|
variants: function
|
10621
10807
|
dispatch:
|
10622
|
-
|
10808
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow_
|
10623
10809
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10624
10810
|
autogen: _foreach_addcmul.ScalarList_out
|
10625
10811
|
|
@@ -10627,7 +10813,7 @@
|
|
10627
10813
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10628
10814
|
variants: function
|
10629
10815
|
dispatch:
|
10630
|
-
|
10816
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow_
|
10631
10817
|
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10632
10818
|
autogen: _foreach_addcmul.Tensor_out
|
10633
10819
|
|
@@ -10635,14 +10821,14 @@
|
|
10635
10821
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10636
10822
|
variants: function
|
10637
10823
|
dispatch:
|
10638
|
-
|
10824
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow
|
10639
10825
|
CUDA: foreach_tensor_abs_cuda
|
10640
10826
|
|
10641
10827
|
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
10642
10828
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10643
10829
|
variants: function
|
10644
10830
|
dispatch:
|
10645
|
-
|
10831
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow_
|
10646
10832
|
CUDA: foreach_tensor_abs_cuda_
|
10647
10833
|
autogen: _foreach_abs.out
|
10648
10834
|
|
@@ -10650,14 +10836,14 @@
|
|
10650
10836
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10651
10837
|
variants: function
|
10652
10838
|
dispatch:
|
10653
|
-
|
10839
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow
|
10654
10840
|
CUDA: foreach_tensor_acos_cuda
|
10655
10841
|
|
10656
10842
|
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
|
10657
10843
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10658
10844
|
variants: function
|
10659
10845
|
dispatch:
|
10660
|
-
|
10846
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow_
|
10661
10847
|
CUDA: foreach_tensor_acos_cuda_
|
10662
10848
|
autogen: _foreach_acos.out
|
10663
10849
|
|
@@ -10665,14 +10851,14 @@
|
|
10665
10851
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10666
10852
|
variants: function
|
10667
10853
|
dispatch:
|
10668
|
-
|
10854
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow
|
10669
10855
|
CUDA: foreach_tensor_asin_cuda
|
10670
10856
|
|
10671
10857
|
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
|
10672
10858
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10673
10859
|
variants: function
|
10674
10860
|
dispatch:
|
10675
|
-
|
10861
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow_
|
10676
10862
|
CUDA: foreach_tensor_asin_cuda_
|
10677
10863
|
autogen: _foreach_asin.out
|
10678
10864
|
|
@@ -10680,14 +10866,14 @@
|
|
10680
10866
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10681
10867
|
variants: function
|
10682
10868
|
dispatch:
|
10683
|
-
|
10869
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow
|
10684
10870
|
CUDA: foreach_tensor_atan_cuda
|
10685
10871
|
|
10686
10872
|
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
|
10687
10873
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10688
10874
|
variants: function
|
10689
10875
|
dispatch:
|
10690
|
-
|
10876
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow_
|
10691
10877
|
CUDA: foreach_tensor_atan_cuda_
|
10692
10878
|
autogen: _foreach_atan.out
|
10693
10879
|
|
@@ -10695,14 +10881,14 @@
|
|
10695
10881
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10696
10882
|
variants: function
|
10697
10883
|
dispatch:
|
10698
|
-
|
10884
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow
|
10699
10885
|
CUDA: foreach_tensor_ceil_cuda
|
10700
10886
|
|
10701
10887
|
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
|
10702
10888
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10703
10889
|
variants: function
|
10704
10890
|
dispatch:
|
10705
|
-
|
10891
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow_
|
10706
10892
|
CUDA: foreach_tensor_ceil_cuda_
|
10707
10893
|
autogen: _foreach_ceil.out
|
10708
10894
|
|
@@ -10710,14 +10896,14 @@
|
|
10710
10896
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10711
10897
|
variants: function
|
10712
10898
|
dispatch:
|
10713
|
-
|
10899
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow
|
10714
10900
|
CUDA: foreach_tensor_cos_cuda
|
10715
10901
|
|
10716
10902
|
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
|
10717
10903
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10718
10904
|
variants: function
|
10719
10905
|
dispatch:
|
10720
|
-
|
10906
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow_
|
10721
10907
|
CUDA: foreach_tensor_cos_cuda_
|
10722
10908
|
autogen: _foreach_cos.out
|
10723
10909
|
|
@@ -10725,14 +10911,14 @@
|
|
10725
10911
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10726
10912
|
variants: function
|
10727
10913
|
dispatch:
|
10728
|
-
|
10914
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow
|
10729
10915
|
CUDA: foreach_tensor_cosh_cuda
|
10730
10916
|
|
10731
10917
|
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
|
10732
10918
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10733
10919
|
variants: function
|
10734
10920
|
dispatch:
|
10735
|
-
|
10921
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow_
|
10736
10922
|
CUDA: foreach_tensor_cosh_cuda_
|
10737
10923
|
autogen: _foreach_cosh.out
|
10738
10924
|
|
@@ -10740,14 +10926,14 @@
|
|
10740
10926
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10741
10927
|
variants: function
|
10742
10928
|
dispatch:
|
10743
|
-
|
10929
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow
|
10744
10930
|
CUDA: foreach_tensor_erf_cuda
|
10745
10931
|
|
10746
10932
|
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
|
10747
10933
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10748
10934
|
variants: function
|
10749
10935
|
dispatch:
|
10750
|
-
|
10936
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow_
|
10751
10937
|
CUDA: foreach_tensor_erf_cuda_
|
10752
10938
|
autogen: _foreach_erf.out
|
10753
10939
|
|
@@ -10755,14 +10941,14 @@
|
|
10755
10941
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10756
10942
|
variants: function
|
10757
10943
|
dispatch:
|
10758
|
-
|
10944
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow
|
10759
10945
|
CUDA: foreach_tensor_erfc_cuda
|
10760
10946
|
|
10761
10947
|
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
|
10762
10948
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10763
10949
|
variants: function
|
10764
10950
|
dispatch:
|
10765
|
-
|
10951
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow_
|
10766
10952
|
CUDA: foreach_tensor_erfc_cuda_
|
10767
10953
|
autogen: _foreach_erfc.out
|
10768
10954
|
|
@@ -10770,14 +10956,14 @@
|
|
10770
10956
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10771
10957
|
variants: function
|
10772
10958
|
dispatch:
|
10773
|
-
|
10959
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow
|
10774
10960
|
CUDA: foreach_tensor_exp_cuda
|
10775
10961
|
|
10776
10962
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10777
10963
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10778
10964
|
variants: function
|
10779
10965
|
dispatch:
|
10780
|
-
|
10966
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow_
|
10781
10967
|
CUDA: foreach_tensor_exp_cuda_
|
10782
10968
|
autogen: _foreach_exp.out
|
10783
10969
|
|
@@ -10785,14 +10971,14 @@
|
|
10785
10971
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10786
10972
|
variants: function
|
10787
10973
|
dispatch:
|
10788
|
-
|
10974
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow
|
10789
10975
|
CUDA: foreach_tensor_expm1_cuda
|
10790
10976
|
|
10791
10977
|
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
|
10792
10978
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10793
10979
|
variants: function
|
10794
10980
|
dispatch:
|
10795
|
-
|
10981
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow_
|
10796
10982
|
CUDA: foreach_tensor_expm1_cuda_
|
10797
10983
|
autogen: _foreach_expm1.out
|
10798
10984
|
|
@@ -10800,14 +10986,14 @@
|
|
10800
10986
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10801
10987
|
variants: function
|
10802
10988
|
dispatch:
|
10803
|
-
|
10989
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow
|
10804
10990
|
CUDA: foreach_tensor_floor_cuda
|
10805
10991
|
|
10806
10992
|
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
|
10807
10993
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10808
10994
|
variants: function
|
10809
10995
|
dispatch:
|
10810
|
-
|
10996
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow_
|
10811
10997
|
CUDA: foreach_tensor_floor_cuda_
|
10812
10998
|
autogen: _foreach_floor.out
|
10813
10999
|
|
@@ -10815,14 +11001,14 @@
|
|
10815
11001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10816
11002
|
variants: function
|
10817
11003
|
dispatch:
|
10818
|
-
|
11004
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow
|
10819
11005
|
CUDA: foreach_tensor_frac_cuda
|
10820
11006
|
|
10821
11007
|
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10822
11008
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10823
11009
|
variants: function
|
10824
11010
|
dispatch:
|
10825
|
-
|
11011
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow_
|
10826
11012
|
CUDA: foreach_tensor_frac_cuda_
|
10827
11013
|
autogen: _foreach_frac.out
|
10828
11014
|
|
@@ -10830,7 +11016,7 @@
|
|
10830
11016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10831
11017
|
variants: function
|
10832
11018
|
dispatch:
|
10833
|
-
|
11019
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow
|
10834
11020
|
CUDA: foreach_tensor_lerp_ternary_cuda
|
10835
11021
|
autogen: _foreach_lerp.List_out
|
10836
11022
|
|
@@ -10838,7 +11024,7 @@
|
|
10838
11024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10839
11025
|
variants: function
|
10840
11026
|
dispatch:
|
10841
|
-
|
11027
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow_
|
10842
11028
|
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10843
11029
|
autogen: _foreach_lerp.List_out
|
10844
11030
|
|
@@ -10846,7 +11032,7 @@
|
|
10846
11032
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10847
11033
|
variants: function
|
10848
11034
|
dispatch:
|
10849
|
-
|
11035
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow
|
10850
11036
|
CUDA: foreach_tensor_lerp_list_cuda
|
10851
11037
|
autogen: _foreach_lerp.Scalar_out
|
10852
11038
|
|
@@ -10854,7 +11040,7 @@
|
|
10854
11040
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10855
11041
|
variants: function
|
10856
11042
|
dispatch:
|
10857
|
-
|
11043
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow_
|
10858
11044
|
CUDA: foreach_tensor_lerp_list_cuda_
|
10859
11045
|
autogen: _foreach_lerp.Scalar_out
|
10860
11046
|
|
@@ -10862,14 +11048,14 @@
|
|
10862
11048
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10863
11049
|
variants: function
|
10864
11050
|
dispatch:
|
10865
|
-
|
11051
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow
|
10866
11052
|
CUDA: foreach_tensor_lgamma_cuda
|
10867
11053
|
|
10868
11054
|
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10869
11055
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10870
11056
|
variants: function
|
10871
11057
|
dispatch:
|
10872
|
-
|
11058
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow_
|
10873
11059
|
CUDA: foreach_tensor_lgamma_cuda_
|
10874
11060
|
autogen: _foreach_lgamma.out
|
10875
11061
|
|
@@ -10877,14 +11063,14 @@
|
|
10877
11063
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10878
11064
|
variants: function
|
10879
11065
|
dispatch:
|
10880
|
-
|
11066
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow
|
10881
11067
|
CUDA: foreach_tensor_log_cuda
|
10882
11068
|
|
10883
11069
|
- func: _foreach_log_(Tensor(a!)[] self) -> ()
|
10884
11070
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10885
11071
|
variants: function
|
10886
11072
|
dispatch:
|
10887
|
-
|
11073
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow_
|
10888
11074
|
CUDA: foreach_tensor_log_cuda_
|
10889
11075
|
autogen: _foreach_log.out
|
10890
11076
|
|
@@ -10892,14 +11078,14 @@
|
|
10892
11078
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10893
11079
|
variants: function
|
10894
11080
|
dispatch:
|
10895
|
-
|
11081
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow
|
10896
11082
|
CUDA: foreach_tensor_log10_cuda
|
10897
11083
|
|
10898
11084
|
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
|
10899
11085
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10900
11086
|
variants: function
|
10901
11087
|
dispatch:
|
10902
|
-
|
11088
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow_
|
10903
11089
|
CUDA: foreach_tensor_log10_cuda_
|
10904
11090
|
autogen: _foreach_log10.out
|
10905
11091
|
|
@@ -10907,14 +11093,14 @@
|
|
10907
11093
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10908
11094
|
variants: function
|
10909
11095
|
dispatch:
|
10910
|
-
|
11096
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow
|
10911
11097
|
CUDA: foreach_tensor_log1p_cuda
|
10912
11098
|
|
10913
11099
|
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
|
10914
11100
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10915
11101
|
variants: function
|
10916
11102
|
dispatch:
|
10917
|
-
|
11103
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow_
|
10918
11104
|
CUDA: foreach_tensor_log1p_cuda_
|
10919
11105
|
autogen: _foreach_log1p.out
|
10920
11106
|
|
@@ -10922,37 +11108,45 @@
|
|
10922
11108
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10923
11109
|
variants: function
|
10924
11110
|
dispatch:
|
10925
|
-
|
11111
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow
|
10926
11112
|
CUDA: foreach_tensor_log2_cuda
|
10927
11113
|
|
10928
11114
|
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
|
10929
11115
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10930
11116
|
variants: function
|
10931
11117
|
dispatch:
|
10932
|
-
|
11118
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow_
|
10933
11119
|
CUDA: foreach_tensor_log2_cuda_
|
10934
11120
|
autogen: _foreach_log2.out
|
10935
11121
|
|
11122
|
+
- func: _foreach_max(Tensor[] self) -> Tensor[]
|
11123
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11124
|
+
variants: function
|
11125
|
+
dispatch:
|
11126
|
+
CompositeExplicitAutograd: foreach_tensor_max_slow
|
11127
|
+
CUDA: foreach_tensor_max_cuda
|
11128
|
+
autogen: _foreach_max.out
|
11129
|
+
|
10936
11130
|
- func: _foreach_neg(Tensor[] self) -> Tensor[]
|
10937
11131
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10938
11132
|
variants: function
|
10939
11133
|
dispatch:
|
10940
|
-
|
11134
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow
|
10941
11135
|
CUDA: foreach_tensor_neg_cuda
|
10942
11136
|
|
10943
11137
|
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
|
10944
11138
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10945
11139
|
variants: function
|
10946
11140
|
dispatch:
|
10947
|
-
|
11141
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow_
|
10948
11142
|
CUDA: foreach_tensor_neg_cuda_
|
10949
11143
|
autogen: _foreach_neg.out
|
10950
11144
|
|
10951
|
-
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
11145
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]
|
10952
11146
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10953
11147
|
variants: function
|
10954
11148
|
dispatch:
|
10955
|
-
|
11149
|
+
CompositeExplicitAutograd: foreach_tensor_norm_slow
|
10956
11150
|
CUDA: foreach_tensor_norm_cuda
|
10957
11151
|
autogen: _foreach_norm.Scalar_out
|
10958
11152
|
|
@@ -10960,35 +11154,35 @@
|
|
10960
11154
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10961
11155
|
variants: function
|
10962
11156
|
dispatch:
|
10963
|
-
|
11157
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow
|
10964
11158
|
CUDA: foreach_tensor_pow_list_kernel_cuda
|
10965
11159
|
|
10966
11160
|
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
10967
11161
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10968
11162
|
variants: function
|
10969
11163
|
dispatch:
|
10970
|
-
|
11164
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow
|
10971
11165
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
10972
11166
|
|
10973
11167
|
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
10974
11168
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10975
11169
|
variants: function
|
10976
11170
|
dispatch:
|
10977
|
-
|
11171
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow
|
10978
11172
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
10979
11173
|
|
10980
11174
|
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
10981
11175
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10982
11176
|
variants: function
|
10983
11177
|
dispatch:
|
10984
|
-
|
11178
|
+
CompositeExplicitAutograd: foreach_scalar_pow_list_kernel_slow
|
10985
11179
|
CUDA: foreach_scalar_pow_list_kernel_cuda
|
10986
11180
|
|
10987
11181
|
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
10988
11182
|
device_check: NoCheck
|
10989
11183
|
variants: function
|
10990
11184
|
dispatch:
|
10991
|
-
|
11185
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow_
|
10992
11186
|
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
10993
11187
|
autogen: _foreach_pow.List_out
|
10994
11188
|
|
@@ -10996,7 +11190,7 @@
|
|
10996
11190
|
device_check: NoCheck
|
10997
11191
|
variants: function
|
10998
11192
|
dispatch:
|
10999
|
-
|
11193
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow_
|
11000
11194
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
11001
11195
|
autogen: _foreach_pow.Scalar_out
|
11002
11196
|
|
@@ -11004,7 +11198,7 @@
|
|
11004
11198
|
device_check: NoCheck
|
11005
11199
|
variants: function
|
11006
11200
|
dispatch:
|
11007
|
-
|
11201
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow_
|
11008
11202
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
11009
11203
|
autogen: _foreach_pow.ScalarList_out
|
11010
11204
|
|
@@ -11012,14 +11206,14 @@
|
|
11012
11206
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11013
11207
|
variants: function
|
11014
11208
|
dispatch:
|
11015
|
-
|
11209
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow
|
11016
11210
|
CUDA: foreach_tensor_reciprocal_cuda
|
11017
11211
|
|
11018
11212
|
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
|
11019
11213
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11020
11214
|
variants: function
|
11021
11215
|
dispatch:
|
11022
|
-
|
11216
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow_
|
11023
11217
|
CUDA: foreach_tensor_reciprocal_cuda_
|
11024
11218
|
autogen: _foreach_reciprocal.out
|
11025
11219
|
|
@@ -11027,14 +11221,14 @@
|
|
11027
11221
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11028
11222
|
variants: function
|
11029
11223
|
dispatch:
|
11030
|
-
|
11224
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow
|
11031
11225
|
CUDA: foreach_tensor_round_cuda
|
11032
11226
|
|
11033
11227
|
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
11034
11228
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11035
11229
|
variants: function
|
11036
11230
|
dispatch:
|
11037
|
-
|
11231
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow_
|
11038
11232
|
CUDA: foreach_tensor_round_cuda_
|
11039
11233
|
autogen: _foreach_round.out
|
11040
11234
|
|
@@ -11042,14 +11236,14 @@
|
|
11042
11236
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11043
11237
|
variants: function
|
11044
11238
|
dispatch:
|
11045
|
-
|
11239
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow
|
11046
11240
|
CUDA: foreach_tensor_sigmoid_cuda
|
11047
11241
|
|
11048
11242
|
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
|
11049
11243
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11050
11244
|
variants: function
|
11051
11245
|
dispatch:
|
11052
|
-
|
11246
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow_
|
11053
11247
|
CUDA: foreach_tensor_sigmoid_cuda_
|
11054
11248
|
autogen: _foreach_sigmoid.out
|
11055
11249
|
|
@@ -11057,14 +11251,14 @@
|
|
11057
11251
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11058
11252
|
variants: function
|
11059
11253
|
dispatch:
|
11060
|
-
|
11254
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow
|
11061
11255
|
CUDA: foreach_tensor_sign_cuda
|
11062
11256
|
|
11063
11257
|
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
11064
11258
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11065
11259
|
variants: function
|
11066
11260
|
dispatch:
|
11067
|
-
|
11261
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow_
|
11068
11262
|
CUDA: foreach_tensor_sign_cuda_
|
11069
11263
|
autogen: _foreach_sign.out
|
11070
11264
|
|
@@ -11072,14 +11266,14 @@
|
|
11072
11266
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11073
11267
|
variants: function
|
11074
11268
|
dispatch:
|
11075
|
-
|
11269
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow
|
11076
11270
|
CUDA: foreach_tensor_sin_cuda
|
11077
11271
|
|
11078
11272
|
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
11079
11273
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11080
11274
|
variants: function
|
11081
11275
|
dispatch:
|
11082
|
-
|
11276
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow_
|
11083
11277
|
CUDA: foreach_tensor_sin_cuda_
|
11084
11278
|
autogen: _foreach_sin.out
|
11085
11279
|
|
@@ -11087,14 +11281,14 @@
|
|
11087
11281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11088
11282
|
variants: function
|
11089
11283
|
dispatch:
|
11090
|
-
|
11284
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow
|
11091
11285
|
CUDA: foreach_tensor_sinh_cuda
|
11092
11286
|
|
11093
11287
|
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
11094
11288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11095
11289
|
variants: function
|
11096
11290
|
dispatch:
|
11097
|
-
|
11291
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow_
|
11098
11292
|
CUDA: foreach_tensor_sinh_cuda_
|
11099
11293
|
autogen: _foreach_sinh.out
|
11100
11294
|
|
@@ -11102,14 +11296,14 @@
|
|
11102
11296
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11103
11297
|
variants: function
|
11104
11298
|
dispatch:
|
11105
|
-
|
11299
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow
|
11106
11300
|
CUDA: foreach_tensor_sqrt_cuda
|
11107
11301
|
|
11108
11302
|
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
11109
11303
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11110
11304
|
variants: function
|
11111
11305
|
dispatch:
|
11112
|
-
|
11306
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
|
11113
11307
|
CUDA: foreach_tensor_sqrt_cuda_
|
11114
11308
|
autogen: _foreach_sqrt.out
|
11115
11309
|
|
@@ -11117,14 +11311,14 @@
|
|
11117
11311
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11118
11312
|
variants: function
|
11119
11313
|
dispatch:
|
11120
|
-
|
11314
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow
|
11121
11315
|
CUDA: foreach_tensor_tan_cuda
|
11122
11316
|
|
11123
11317
|
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
11124
11318
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11125
11319
|
variants: function
|
11126
11320
|
dispatch:
|
11127
|
-
|
11321
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow_
|
11128
11322
|
CUDA: foreach_tensor_tan_cuda_
|
11129
11323
|
autogen: _foreach_tan.out
|
11130
11324
|
|
@@ -11132,14 +11326,14 @@
|
|
11132
11326
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11133
11327
|
variants: function
|
11134
11328
|
dispatch:
|
11135
|
-
|
11329
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow
|
11136
11330
|
CUDA: foreach_tensor_tanh_cuda
|
11137
11331
|
|
11138
11332
|
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
11139
11333
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11140
11334
|
variants: function
|
11141
11335
|
dispatch:
|
11142
|
-
|
11336
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow_
|
11143
11337
|
CUDA: foreach_tensor_tanh_cuda_
|
11144
11338
|
autogen: _foreach_tanh.out
|
11145
11339
|
|
@@ -11147,14 +11341,14 @@
|
|
11147
11341
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11148
11342
|
variants: function
|
11149
11343
|
dispatch:
|
11150
|
-
|
11344
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow
|
11151
11345
|
CUDA: foreach_tensor_trunc_cuda
|
11152
11346
|
|
11153
11347
|
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
11154
11348
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11155
11349
|
variants: function
|
11156
11350
|
dispatch:
|
11157
|
-
|
11351
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow_
|
11158
11352
|
CUDA: foreach_tensor_trunc_cuda_
|
11159
11353
|
autogen: _foreach_trunc.out
|
11160
11354
|
|
@@ -11162,7 +11356,7 @@
|
|
11162
11356
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11163
11357
|
variants: function
|
11164
11358
|
dispatch:
|
11165
|
-
|
11359
|
+
CompositeExplicitAutograd: foreach_tensor_zero_slow_
|
11166
11360
|
CUDA: foreach_tensor_zero_cuda_
|
11167
11361
|
autogen: _foreach_zero, _foreach_zero.out
|
11168
11362
|
|
@@ -11170,9 +11364,15 @@
|
|
11170
11364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11171
11365
|
variants: function
|
11172
11366
|
dispatch:
|
11173
|
-
|
11367
|
+
CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
|
11174
11368
|
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
11175
|
-
autogen: _foreach_copy
|
11369
|
+
autogen: _foreach_copy.out
|
11370
|
+
|
11371
|
+
- func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
|
11372
|
+
device_check: NoCheck
|
11373
|
+
variants: function
|
11374
|
+
dispatch:
|
11375
|
+
CompositeExplicitAutograd: _foreach_copy
|
11176
11376
|
|
11177
11377
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11178
11378
|
dispatch:
|
@@ -12341,6 +12541,7 @@
|
|
12341
12541
|
dispatch:
|
12342
12542
|
CPU: upsample_linear1d_out_cpu
|
12343
12543
|
CUDA: upsample_linear1d_out_cuda
|
12544
|
+
MPS: upsample_linear1d_out_mps
|
12344
12545
|
|
12345
12546
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12346
12547
|
python_module: nn
|
@@ -12352,6 +12553,7 @@
|
|
12352
12553
|
dispatch:
|
12353
12554
|
CPU: upsample_linear1d_backward_out_cpu
|
12354
12555
|
CUDA: upsample_linear1d_backward_out_cuda
|
12556
|
+
MPS: upsample_linear1d_backward_out_mps
|
12355
12557
|
|
12356
12558
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12357
12559
|
python_module: nn
|
@@ -12824,7 +13026,7 @@
|
|
12824
13026
|
SparseMeta: isinf_sparse_meta
|
12825
13027
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12826
13028
|
autogen: isinf.out
|
12827
|
-
tags: core
|
13029
|
+
tags: [core, pointwise]
|
12828
13030
|
|
12829
13031
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12830
13032
|
variants: method
|
@@ -13750,11 +13952,18 @@
|
|
13750
13952
|
dispatch:
|
13751
13953
|
CPU, CUDA: linalg_eig_out
|
13752
13954
|
|
13955
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13956
|
+
python_module: linalg
|
13957
|
+
dispatch:
|
13958
|
+
CPU, CUDA: _linalg_eigvals
|
13959
|
+
|
13753
13960
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13754
13961
|
python_module: linalg
|
13755
13962
|
|
13756
13963
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13757
13964
|
python_module: linalg
|
13965
|
+
dispatch:
|
13966
|
+
CPU, CUDA: linalg_eigvals_out
|
13758
13967
|
|
13759
13968
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13760
13969
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -14058,6 +14267,12 @@
|
|
14058
14267
|
# It is undocumented and should not be used outside of tests.
|
14059
14268
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
14060
14269
|
|
14270
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14271
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14272
|
+
variants: function
|
14273
|
+
dispatch:
|
14274
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14275
|
+
|
14061
14276
|
# Note: this function is only for testing.
|
14062
14277
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
14063
14278
|
python_module: nn
|
@@ -14392,6 +14607,7 @@
|
|
14392
14607
|
variants: function
|
14393
14608
|
dispatch:
|
14394
14609
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14610
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14395
14611
|
|
14396
14612
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14397
14613
|
variants: function
|
@@ -14428,6 +14644,16 @@
|
|
14428
14644
|
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
14429
14645
|
autogen: to_padded_tensor.out
|
14430
14646
|
|
14647
|
+
- func: _jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor
|
14648
|
+
variants: function
|
14649
|
+
dispatch:
|
14650
|
+
CUDA: _fbgemm_jagged_to_padded_dense_forward
|
14651
|
+
|
14652
|
+
- func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
|
14653
|
+
variants: function
|
14654
|
+
dispatch:
|
14655
|
+
CUDA: _fbgemm_dense_to_jagged_forward_symint
|
14656
|
+
|
14431
14657
|
- func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
|
14432
14658
|
dispatch:
|
14433
14659
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
@@ -14468,19 +14694,28 @@
|
|
14468
14694
|
|
14469
14695
|
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14470
14696
|
dispatch:
|
14471
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14472
14697
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14473
14698
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14474
14699
|
tags: nondeterministic_seeded
|
14475
14700
|
|
14701
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14702
|
+
dispatch:
|
14703
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14704
|
+
tags: nondeterministic_seeded
|
14705
|
+
|
14476
14706
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14477
14707
|
device_check: NoCheck
|
14478
14708
|
variants: function
|
14479
14709
|
dispatch:
|
14480
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14481
14710
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14482
14711
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14483
14712
|
|
14713
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14714
|
+
device_check: NoCheck
|
14715
|
+
variants: function
|
14716
|
+
dispatch:
|
14717
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14718
|
+
|
14484
14719
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14485
14720
|
dispatch:
|
14486
14721
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
@@ -14493,26 +14728,36 @@
|
|
14493
14728
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14494
14729
|
tags: nondeterministic_seeded
|
14495
14730
|
|
14496
|
-
- func:
|
14731
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14732
|
+
dispatch:
|
14733
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14734
|
+
tags: nondeterministic_seeded
|
14735
|
+
|
14736
|
+
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14737
|
+
dispatch:
|
14738
|
+
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14739
|
+
tags: nondeterministic_seeded
|
14740
|
+
|
14741
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14497
14742
|
variants: function
|
14498
14743
|
dispatch:
|
14499
14744
|
CUDA: _flash_attention_forward
|
14500
14745
|
tags: nondeterministic_seeded
|
14501
14746
|
|
14502
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14747
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14503
14748
|
device_check: NoCheck
|
14504
14749
|
variants: function
|
14505
14750
|
dispatch:
|
14506
14751
|
CUDA: _flash_attention_backward
|
14507
14752
|
|
14508
|
-
# Returns
|
14509
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14753
|
+
# Returns output, logsumexp if compute_logsumexp
|
14754
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt? max_seqlen_q, SymInt? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? seqlen_k=None, int? window_size=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14510
14755
|
variants: function
|
14511
14756
|
dispatch:
|
14512
14757
|
CUDA: _efficient_attention_forward
|
14513
14758
|
tags: nondeterministic_seeded
|
14514
14759
|
|
14515
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14760
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None, int? window_size=None, bool shared_storage_dqdkdv=False) -> (Tensor, Tensor, Tensor, Tensor)
|
14516
14761
|
device_check: NoCheck
|
14517
14762
|
variants: function
|
14518
14763
|
dispatch:
|
@@ -15312,11 +15557,11 @@
|
|
15312
15557
|
CPU: foobar
|
15313
15558
|
autogen: _foobar.out
|
15314
15559
|
|
15315
|
-
# Fused Optimizer CUDA kernels.
|
15316
15560
|
- func: _fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15317
15561
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15318
15562
|
variants: function
|
15319
15563
|
dispatch:
|
15564
|
+
CPU: _fused_adam_kernel_cpu_
|
15320
15565
|
CUDA: _fused_adam_kernel_cuda_
|
15321
15566
|
autogen: _fused_adam, _fused_adam.out
|
15322
15567
|
|
@@ -15326,6 +15571,7 @@
|
|
15326
15571
|
device_check: NoCheck
|
15327
15572
|
variants: function
|
15328
15573
|
dispatch:
|
15574
|
+
CPU: _fused_adam_kernel_cpu_
|
15329
15575
|
CUDA: _fused_adam_kernel_cuda_
|
15330
15576
|
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15331
15577
|
|
@@ -15333,6 +15579,7 @@
|
|
15333
15579
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15334
15580
|
variants: function
|
15335
15581
|
dispatch:
|
15582
|
+
CPU: _fused_adamw_kernel_cpu_
|
15336
15583
|
CUDA: _fused_adamw_kernel_cuda_
|
15337
15584
|
autogen: _fused_adamw, _fused_adamw.out
|
15338
15585
|
|
@@ -15342,9 +15589,34 @@
|
|
15342
15589
|
device_check: NoCheck
|
15343
15590
|
variants: function
|
15344
15591
|
dispatch:
|
15592
|
+
CPU: _fused_adamw_kernel_cpu_
|
15345
15593
|
CUDA: _fused_adamw_kernel_cuda_
|
15346
15594
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15347
15595
|
|
15596
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15597
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15598
|
+
variants: function
|
15599
|
+
dispatch:
|
15600
|
+
CPU: _fused_sgd_kernel_cpu_
|
15601
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15602
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15603
|
+
|
15604
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15605
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15606
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15607
|
+
device_check: NoCheck
|
15608
|
+
variants: function
|
15609
|
+
dispatch:
|
15610
|
+
CPU: _fused_sgd_kernel_cpu_
|
15611
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15612
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15613
|
+
|
15614
|
+
- func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15615
|
+
variants: function
|
15616
|
+
dispatch:
|
15617
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15618
|
+
autogen: _fused_adagrad, _fused_adagrad.out
|
15619
|
+
|
15348
15620
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15349
15621
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15350
15622
|
variants: function
|