torch-rb 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/codegen/native_functions.yaml +197 -33
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +12 -0
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -11
- data/ext/torch/fft_functions.h +0 -6
- data/ext/torch/linalg_functions.h +0 -6
- data/ext/torch/nn_functions.h +0 -6
- data/ext/torch/sparse_functions.h +0 -6
- data/ext/torch/special_functions.h +0 -6
- data/ext/torch/tensor_functions.h +0 -6
- data/ext/torch/torch_functions.h +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8442fc0f85d6f2465258a54e5aefbe03d23a7c0e58753e06855bfebd2f4de802
|
4
|
+
data.tar.gz: ac0efb89f9b6d413498bfb1c2e84336aa728047dd013d00fa736449e5be82617
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6830efe74de98fc8a8d23e7308795a60ee60fff72b3f82fa7cb92815f4efe52fdf3637e0821490f5e3e8c2c8731043f52f5aff20cfb01db1340be0962fed18db
|
7
|
+
data.tar.gz: 3e50976e5add37b4158956c76e3c922167911492acda9e171af42ad39d5abe946c36427e545d9fc820a2800e3df0523b0068ce76b804d0c05a6f1e2ad495de01
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,12 +175,24 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
@@ -470,6 +482,7 @@
|
|
470
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
471
483
|
dispatch:
|
472
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
473
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
474
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
475
488
|
tags: pointwise
|
@@ -564,8 +577,8 @@
|
|
564
577
|
dispatch:
|
565
578
|
SparseCPU: add_out_sparse_cpu
|
566
579
|
SparseCUDA: add_out_sparse_cuda
|
567
|
-
SparseCsrCPU:
|
568
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
569
582
|
MkldnnCPU: mkldnn_add_out
|
570
583
|
MPS: add_out_mps
|
571
584
|
tags: pointwise
|
@@ -763,7 +776,7 @@
|
|
763
776
|
dispatch:
|
764
777
|
CompositeExplicitAutograd: arange
|
765
778
|
|
766
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
767
780
|
# name is already taken. Disabled because of CI job failures.
|
768
781
|
# FIXME: enable this
|
769
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1220,6 +1233,13 @@
|
|
1220
1233
|
CompositeExplicitAutograd: copysign_out
|
1221
1234
|
tags: pointwise
|
1222
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1223
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1224
1244
|
device_check: NoCheck # TensorIterator
|
1225
1245
|
variants: function, method
|
@@ -1621,6 +1641,7 @@
|
|
1621
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1622
1642
|
dispatch:
|
1623
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1624
1645
|
|
1625
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1626
1647
|
variants: function
|
@@ -1847,7 +1868,10 @@
|
|
1847
1868
|
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1848
1869
|
dispatch:
|
1849
1870
|
CUDA: cudnn_convolution
|
1850
|
-
|
1871
|
+
|
1872
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1873
|
+
dispatch:
|
1874
|
+
CUDA: cudnn_convolution_out
|
1851
1875
|
|
1852
1876
|
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1853
1877
|
dispatch:
|
@@ -2346,7 +2370,7 @@
|
|
2346
2370
|
Meta: empty_meta_symint
|
2347
2371
|
MkldnnCPU: empty_mkldnn
|
2348
2372
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2349
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2373
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2350
2374
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2351
2375
|
tags: core
|
2352
2376
|
|
@@ -2452,7 +2476,7 @@
|
|
2452
2476
|
CompositeExplicitAutograd: empty_like
|
2453
2477
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2454
2478
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2455
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2479
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2456
2480
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2457
2481
|
autogen: empty_like.out
|
2458
2482
|
|
@@ -2954,12 +2978,14 @@
|
|
2954
2978
|
dispatch:
|
2955
2979
|
CPU: _fft_r2c_mkl
|
2956
2980
|
CUDA: _fft_r2c_cufft
|
2981
|
+
MPS: _fft_r2c_mps
|
2957
2982
|
|
2958
2983
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2959
2984
|
variants: function
|
2960
2985
|
dispatch:
|
2961
2986
|
CPU: _fft_r2c_mkl_out
|
2962
2987
|
CUDA: _fft_r2c_cufft_out
|
2988
|
+
MPS: _fft_r2c_mps_out
|
2963
2989
|
|
2964
2990
|
# Complex to real inverse FFT
|
2965
2991
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2967,12 +2993,14 @@
|
|
2967
2993
|
dispatch:
|
2968
2994
|
CPU: _fft_c2r_mkl
|
2969
2995
|
CUDA: _fft_c2r_cufft
|
2996
|
+
MPS: _fft_c2r_mps
|
2970
2997
|
|
2971
2998
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2972
2999
|
variants: function
|
2973
3000
|
dispatch:
|
2974
3001
|
CPU: _fft_c2r_mkl_out
|
2975
3002
|
CUDA: _fft_c2r_cufft_out
|
3003
|
+
MPS: _fft_c2r_mps_out
|
2976
3004
|
|
2977
3005
|
# Standard complex to complex FFT (forward or backward)
|
2978
3006
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2980,12 +3008,14 @@
|
|
2980
3008
|
dispatch:
|
2981
3009
|
CPU: _fft_c2c_mkl
|
2982
3010
|
CUDA: _fft_c2c_cufft
|
3011
|
+
MPS: _fft_c2c_mps
|
2983
3012
|
|
2984
3013
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3014
|
variants: function
|
2986
3015
|
dispatch:
|
2987
3016
|
CPU: _fft_c2c_mkl_out
|
2988
3017
|
CUDA: _fft_c2c_cufft_out
|
3018
|
+
MPS: _fft_c2c_mps_out
|
2989
3019
|
|
2990
3020
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2991
3021
|
device_check: NoCheck
|
@@ -3302,11 +3332,15 @@
|
|
3302
3332
|
dispatch:
|
3303
3333
|
CUDA: _cslt_compress
|
3304
3334
|
|
3305
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
|
3335
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3306
3336
|
dispatch:
|
3307
3337
|
CUDA: _cslt_sparse_mm
|
3308
3338
|
|
3309
|
-
- func:
|
3339
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3340
|
+
dispatch:
|
3341
|
+
CUDA: _cslt_sparse_mm_search
|
3342
|
+
|
3343
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3310
3344
|
dispatch:
|
3311
3345
|
CUDA: _sparse_semi_structured_linear
|
3312
3346
|
|
@@ -4058,12 +4092,18 @@
|
|
4058
4092
|
|
4059
4093
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4060
4094
|
dispatch:
|
4095
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4061
4096
|
CUDA: _convert_weight_to_int4pack_cuda
|
4062
4097
|
|
4063
4098
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4064
4099
|
dispatch:
|
4100
|
+
CPU: _weight_int4pack_mm_cpu
|
4065
4101
|
CUDA: _weight_int4pack_mm_cuda
|
4066
4102
|
|
4103
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
|
+
dispatch:
|
4105
|
+
CPU: _weight_int8pack_mm_cpu
|
4106
|
+
|
4067
4107
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4068
4108
|
python_module: sparse
|
4069
4109
|
|
@@ -4439,7 +4479,6 @@
|
|
4439
4479
|
MPS: pixel_shuffle_mps
|
4440
4480
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4441
4481
|
autogen: pixel_shuffle.out
|
4442
|
-
tags: core
|
4443
4482
|
|
4444
4483
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4445
4484
|
dispatch:
|
@@ -4810,7 +4849,7 @@
|
|
4810
4849
|
device_guard: False
|
4811
4850
|
dispatch:
|
4812
4851
|
CompositeImplicitAutograd: reshape_symint
|
4813
|
-
CompositeImplicitAutogradNestedTensor:
|
4852
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4814
4853
|
|
4815
4854
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4816
4855
|
variants: function
|
@@ -4969,6 +5008,7 @@
|
|
4969
5008
|
device_check: NoCheck # TensorIterator
|
4970
5009
|
python_module: nn
|
4971
5010
|
dispatch:
|
5011
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4972
5012
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4973
5013
|
|
4974
5014
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -5356,6 +5396,21 @@
|
|
5356
5396
|
CompositeExplicitAutograd: slice_backward
|
5357
5397
|
autogen: slice_backward.out
|
5358
5398
|
|
5399
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
+
# slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
|
5401
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5402
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5404
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5405
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5406
|
+
# easier to implement for a subclass than as_strided()
|
5407
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5408
|
+
variants: function, method
|
5409
|
+
device_check: NoCheck
|
5410
|
+
device_guard: False
|
5411
|
+
dispatch:
|
5412
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5413
|
+
|
5359
5414
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5360
5415
|
variants: function, method
|
5361
5416
|
device_check: NoCheck
|
@@ -5363,7 +5418,7 @@
|
|
5363
5418
|
dispatch:
|
5364
5419
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5365
5420
|
autogen: slice_scatter.out
|
5366
|
-
tags: core
|
5421
|
+
tags: [core, view_copy]
|
5367
5422
|
|
5368
5423
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5369
5424
|
variants: function, method
|
@@ -5562,6 +5617,14 @@
|
|
5562
5617
|
SparseCPU: _sspaddmm_out_cpu
|
5563
5618
|
SparseCUDA: _sspaddmm_out_cuda
|
5564
5619
|
|
5620
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
|
+
dispatch:
|
5622
|
+
CompositeExplicitAutograd: _chunk_cat
|
5623
|
+
|
5624
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
|
+
dispatch:
|
5626
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5627
|
+
|
5565
5628
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5566
5629
|
dispatch:
|
5567
5630
|
CompositeExplicitAutograd: stack
|
@@ -5753,6 +5816,7 @@
|
|
5753
5816
|
variants: function
|
5754
5817
|
dispatch:
|
5755
5818
|
CPU, CUDA: std_mean
|
5819
|
+
MPS: std_mean_mps
|
5756
5820
|
autogen: std_mean.correction_out
|
5757
5821
|
|
5758
5822
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6008,7 +6072,6 @@
|
|
6008
6072
|
CPU, MPS: roll
|
6009
6073
|
CUDA: roll_cuda
|
6010
6074
|
autogen: roll.out
|
6011
|
-
tags: core
|
6012
6075
|
|
6013
6076
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
6014
6077
|
|
@@ -6091,6 +6154,52 @@
|
|
6091
6154
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
6092
6155
|
autogen: _nested_view_from_buffer_copy.out
|
6093
6156
|
|
6157
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6158
|
+
variants: function
|
6159
|
+
device_check: NoCheck
|
6160
|
+
dispatch: {}
|
6161
|
+
|
6162
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6163
|
+
variants: function
|
6164
|
+
device_check: NoCheck
|
6165
|
+
tags: view_copy
|
6166
|
+
dispatch:
|
6167
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6168
|
+
autogen: _nested_view_from_jagged_copy.out
|
6169
|
+
|
6170
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6171
|
+
variants: function
|
6172
|
+
device_check: NoCheck
|
6173
|
+
dispatch: {}
|
6174
|
+
|
6175
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6176
|
+
variants: function
|
6177
|
+
device_check: NoCheck
|
6178
|
+
tags: view_copy
|
6179
|
+
dispatch:
|
6180
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6181
|
+
autogen: _nested_get_values_copy.out
|
6182
|
+
|
6183
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6184
|
+
variants: function
|
6185
|
+
device_check: NoCheck
|
6186
|
+
dispatch: {}
|
6187
|
+
|
6188
|
+
# returns undefined Tensor if no lengths present
|
6189
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6190
|
+
variants: function
|
6191
|
+
device_check: NoCheck
|
6192
|
+
dispatch: {}
|
6193
|
+
|
6194
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6195
|
+
variants: function
|
6196
|
+
device_check: NoCheck
|
6197
|
+
dispatch: {}
|
6198
|
+
|
6199
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6200
|
+
category_override: dummy
|
6201
|
+
dispatch: {}
|
6202
|
+
|
6094
6203
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6095
6204
|
dispatch:
|
6096
6205
|
# calls unsqueeze
|
@@ -6275,6 +6384,7 @@
|
|
6275
6384
|
variants: function
|
6276
6385
|
dispatch:
|
6277
6386
|
CPU, CUDA: var_mean
|
6387
|
+
MPS: var_mean_mps
|
6278
6388
|
autogen: var_mean.correction_out
|
6279
6389
|
|
6280
6390
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6295,15 +6405,13 @@
|
|
6295
6405
|
device_check: NoCheck # TensorIterator
|
6296
6406
|
variants: function, method
|
6297
6407
|
dispatch:
|
6298
|
-
CPU, CUDA: where
|
6299
|
-
MPS: where_mps
|
6408
|
+
CPU, CUDA, MPS: where
|
6300
6409
|
tags: [core, pointwise]
|
6301
6410
|
|
6302
6411
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6303
6412
|
device_check: NoCheck # TensorIterator
|
6304
6413
|
dispatch:
|
6305
|
-
CPU, CUDA: where_self_out
|
6306
|
-
MPS: where_self_out_mps
|
6414
|
+
CPU, CUDA, MPS: where_self_out
|
6307
6415
|
|
6308
6416
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6309
6417
|
variants: function
|
@@ -6644,7 +6752,7 @@
|
|
6644
6752
|
MPS: zero_mps_
|
6645
6753
|
Meta: zero_meta_
|
6646
6754
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6647
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6755
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6648
6756
|
MkldnnCPU: mkldnn_zero_
|
6649
6757
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6650
6758
|
autogen: zero, zero.out
|
@@ -6934,7 +7042,7 @@
|
|
6934
7042
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6935
7043
|
# the default would never make sense.
|
6936
7044
|
|
6937
|
-
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7045
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6938
7046
|
dispatch:
|
6939
7047
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6940
7048
|
|
@@ -6951,7 +7059,10 @@
|
|
6951
7059
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6952
7060
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6953
7061
|
|
6954
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7062
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7063
|
+
dispatch:
|
7064
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7065
|
+
|
6955
7066
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6956
7067
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6957
7068
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -7037,7 +7148,7 @@
|
|
7037
7148
|
dispatch:
|
7038
7149
|
CPU, CUDA: sparse_dim_strided
|
7039
7150
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7040
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7151
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7041
7152
|
device_check: NoCheck
|
7042
7153
|
device_guard: False
|
7043
7154
|
|
@@ -7054,7 +7165,7 @@
|
|
7054
7165
|
dispatch:
|
7055
7166
|
CPU, CUDA: dense_dim_strided
|
7056
7167
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7057
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7168
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7058
7169
|
device_check: NoCheck
|
7059
7170
|
device_guard: False
|
7060
7171
|
|
@@ -7070,7 +7181,7 @@
|
|
7070
7181
|
variants: method
|
7071
7182
|
dispatch:
|
7072
7183
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
7073
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7184
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
7074
7185
|
device_check: NoCheck
|
7075
7186
|
device_guard: False
|
7076
7187
|
|
@@ -7133,7 +7244,7 @@
|
|
7133
7244
|
variants: method
|
7134
7245
|
dispatch:
|
7135
7246
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7136
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7247
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7137
7248
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7138
7249
|
CompositeExplicitAutograd: values_default
|
7139
7250
|
device_check: NoCheck
|
@@ -7142,7 +7253,7 @@
|
|
7142
7253
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7143
7254
|
variants: method
|
7144
7255
|
dispatch:
|
7145
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7256
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7146
7257
|
CompositeExplicitAutograd: crow_indices_default
|
7147
7258
|
device_check: NoCheck
|
7148
7259
|
device_guard: False
|
@@ -7150,7 +7261,7 @@
|
|
7150
7261
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7151
7262
|
variants: method
|
7152
7263
|
dispatch:
|
7153
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7264
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7154
7265
|
CompositeExplicitAutograd: col_indices_default
|
7155
7266
|
device_check: NoCheck
|
7156
7267
|
device_guard: False
|
@@ -7158,7 +7269,7 @@
|
|
7158
7269
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7159
7270
|
variants: method
|
7160
7271
|
dispatch:
|
7161
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7272
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7162
7273
|
CompositeExplicitAutograd: ccol_indices_default
|
7163
7274
|
device_check: NoCheck
|
7164
7275
|
device_guard: False
|
@@ -7166,7 +7277,7 @@
|
|
7166
7277
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7167
7278
|
variants: method
|
7168
7279
|
dispatch:
|
7169
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7280
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7170
7281
|
CompositeExplicitAutograd: row_indices_default
|
7171
7282
|
device_check: NoCheck
|
7172
7283
|
device_guard: False
|
@@ -7675,6 +7786,7 @@
|
|
7675
7786
|
dispatch:
|
7676
7787
|
CPU, CUDA, Meta, MPS: set_
|
7677
7788
|
autogen: set.source_Storage, set.source_Storage_out
|
7789
|
+
tags: inplace_view
|
7678
7790
|
|
7679
7791
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7680
7792
|
variants: method
|
@@ -7687,6 +7799,7 @@
|
|
7687
7799
|
MPS: set_storage_mps_
|
7688
7800
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7689
7801
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7802
|
+
tags: inplace_view
|
7690
7803
|
|
7691
7804
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7692
7805
|
variants: method
|
@@ -7694,6 +7807,7 @@
|
|
7694
7807
|
device_guard: False
|
7695
7808
|
dispatch:
|
7696
7809
|
CompositeImplicitAutograd: set__symint
|
7810
|
+
tags: inplace_view
|
7697
7811
|
|
7698
7812
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7699
7813
|
variants: method
|
@@ -7702,6 +7816,7 @@
|
|
7702
7816
|
dispatch:
|
7703
7817
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7704
7818
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7819
|
+
tags: inplace_view
|
7705
7820
|
|
7706
7821
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7707
7822
|
variants: method
|
@@ -7711,6 +7826,7 @@
|
|
7711
7826
|
Meta: set_meta_
|
7712
7827
|
MPS: set_mps_
|
7713
7828
|
autogen: set, set.out
|
7829
|
+
tags: inplace_view
|
7714
7830
|
|
7715
7831
|
# Not making it CompositeImplicitAutograd because lift
|
7716
7832
|
# should be a primitive w.r.t. functorch
|
@@ -10112,12 +10228,14 @@
|
|
10112
10228
|
variants: function
|
10113
10229
|
dispatch:
|
10114
10230
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10231
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10115
10232
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10116
10233
|
|
10117
10234
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
10118
10235
|
variants: function
|
10119
10236
|
dispatch:
|
10120
10237
|
CUDA: _amp_update_scale_cuda_
|
10238
|
+
CPU: _amp_update_scale_cpu_
|
10121
10239
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10122
10240
|
|
10123
10241
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -12341,6 +12459,7 @@
|
|
12341
12459
|
dispatch:
|
12342
12460
|
CPU: upsample_linear1d_out_cpu
|
12343
12461
|
CUDA: upsample_linear1d_out_cuda
|
12462
|
+
MPS: upsample_linear1d_out_mps
|
12344
12463
|
|
12345
12464
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12346
12465
|
python_module: nn
|
@@ -12352,6 +12471,7 @@
|
|
12352
12471
|
dispatch:
|
12353
12472
|
CPU: upsample_linear1d_backward_out_cpu
|
12354
12473
|
CUDA: upsample_linear1d_backward_out_cuda
|
12474
|
+
MPS: upsample_linear1d_backward_out_mps
|
12355
12475
|
|
12356
12476
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12357
12477
|
python_module: nn
|
@@ -12824,7 +12944,7 @@
|
|
12824
12944
|
SparseMeta: isinf_sparse_meta
|
12825
12945
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12826
12946
|
autogen: isinf.out
|
12827
|
-
tags: core
|
12947
|
+
tags: [core, pointwise]
|
12828
12948
|
|
12829
12949
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12830
12950
|
variants: method
|
@@ -13750,11 +13870,18 @@
|
|
13750
13870
|
dispatch:
|
13751
13871
|
CPU, CUDA: linalg_eig_out
|
13752
13872
|
|
13873
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13874
|
+
python_module: linalg
|
13875
|
+
dispatch:
|
13876
|
+
CPU, CUDA: _linalg_eigvals
|
13877
|
+
|
13753
13878
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13754
13879
|
python_module: linalg
|
13755
13880
|
|
13756
13881
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13757
13882
|
python_module: linalg
|
13883
|
+
dispatch:
|
13884
|
+
CPU, CUDA: linalg_eigvals_out
|
13758
13885
|
|
13759
13886
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13760
13887
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -14058,6 +14185,12 @@
|
|
14058
14185
|
# It is undocumented and should not be used outside of tests.
|
14059
14186
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
14060
14187
|
|
14188
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14189
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14190
|
+
variants: function
|
14191
|
+
dispatch:
|
14192
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14193
|
+
|
14061
14194
|
# Note: this function is only for testing.
|
14062
14195
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
14063
14196
|
python_module: nn
|
@@ -14392,6 +14525,7 @@
|
|
14392
14525
|
variants: function
|
14393
14526
|
dispatch:
|
14394
14527
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14528
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14395
14529
|
|
14396
14530
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14397
14531
|
variants: function
|
@@ -14468,19 +14602,28 @@
|
|
14468
14602
|
|
14469
14603
|
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14470
14604
|
dispatch:
|
14471
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14472
14605
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14473
14606
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14474
14607
|
tags: nondeterministic_seeded
|
14475
14608
|
|
14609
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14610
|
+
dispatch:
|
14611
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14612
|
+
tags: nondeterministic_seeded
|
14613
|
+
|
14476
14614
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14477
14615
|
device_check: NoCheck
|
14478
14616
|
variants: function
|
14479
14617
|
dispatch:
|
14480
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14481
14618
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14482
14619
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14483
14620
|
|
14621
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14622
|
+
device_check: NoCheck
|
14623
|
+
variants: function
|
14624
|
+
dispatch:
|
14625
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14626
|
+
|
14484
14627
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14485
14628
|
dispatch:
|
14486
14629
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
@@ -14493,6 +14636,11 @@
|
|
14493
14636
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14494
14637
|
tags: nondeterministic_seeded
|
14495
14638
|
|
14639
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14640
|
+
dispatch:
|
14641
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
|
+
tags: nondeterministic_seeded
|
14643
|
+
|
14496
14644
|
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14497
14645
|
variants: function
|
14498
14646
|
dispatch:
|
@@ -14505,8 +14653,8 @@
|
|
14505
14653
|
dispatch:
|
14506
14654
|
CUDA: _flash_attention_backward
|
14507
14655
|
|
14508
|
-
# Returns
|
14509
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14656
|
+
# Returns output, logsumexp if compute_logsumexp
|
14657
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14510
14658
|
variants: function
|
14511
14659
|
dispatch:
|
14512
14660
|
CUDA: _efficient_attention_forward
|
@@ -15345,6 +15493,22 @@
|
|
15345
15493
|
CUDA: _fused_adamw_kernel_cuda_
|
15346
15494
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15347
15495
|
|
15496
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15497
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
|
+
variants: function
|
15499
|
+
dispatch:
|
15500
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15501
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15502
|
+
|
15503
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15504
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15505
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15506
|
+
device_check: NoCheck
|
15507
|
+
variants: function
|
15508
|
+
dispatch:
|
15509
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15510
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
|
+
|
15348
15512
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15349
15513
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15350
15514
|
variants: function
|
data/ext/torch/utils.h
CHANGED
data/lib/torch/nn/elu.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class ELU < Module
|
4
|
+
def initialize(alpha: 1, inplace: false)
|
5
|
+
super()
|
6
|
+
@alpha = alpha
|
7
|
+
@inplace = inplace
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(input)
|
11
|
+
F.elu(input, alpha: @alpha, inplace: @inplace)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extra_inspect
|
15
|
+
inplace_str = @inplace ? ", inplace: true" : ""
|
16
|
+
format("alpha: %s", @alpha) + inplace_str
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/torch/nn/functional.rb
CHANGED
@@ -174,6 +174,18 @@ module Torch
|
|
174
174
|
|
175
175
|
# activation layers
|
176
176
|
|
177
|
+
def elu(input, alpha: 1, inplace: false)
|
178
|
+
if inplace
|
179
|
+
NN.elu!(input, alpha)
|
180
|
+
else
|
181
|
+
NN.elu(input, alpha)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def gelu(input, approximate: 'none')
|
186
|
+
NN.gelu(input, approximate: approximate)
|
187
|
+
end
|
188
|
+
|
177
189
|
def hardshrink(input, lambd = 0.5)
|
178
190
|
Torch.hardshrink(input, lambd)
|
179
191
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class GELU < Module
|
4
|
+
def initialize(approximate: 'none')
|
5
|
+
super()
|
6
|
+
@approximate = approximate
|
7
|
+
end
|
8
|
+
|
9
|
+
def forward(input)
|
10
|
+
F.gelu(input, approximate: @approximate)
|
11
|
+
end
|
12
|
+
|
13
|
+
def extra_inspect
|
14
|
+
"approximate: #{@approximate.inspect}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/torch/nn/leaky_relu.rb
CHANGED
data/lib/torch/version.rb
CHANGED
data/lib/torch.rb
CHANGED
@@ -123,6 +123,8 @@ require_relative "torch/nn/dropout3d"
|
|
123
123
|
require_relative "torch/nn/feature_alpha_dropout"
|
124
124
|
|
125
125
|
# nn activations
|
126
|
+
require_relative "torch/nn/elu"
|
127
|
+
require_relative "torch/nn/gelu"
|
126
128
|
require_relative "torch/nn/hardshrink"
|
127
129
|
require_relative "torch/nn/leaky_relu"
|
128
130
|
require_relative "torch/nn/log_sigmoid"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: torch-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -43,24 +43,17 @@ files:
|
|
43
43
|
- ext/torch/ext.cpp
|
44
44
|
- ext/torch/extconf.rb
|
45
45
|
- ext/torch/fft.cpp
|
46
|
-
- ext/torch/fft_functions.h
|
47
46
|
- ext/torch/generator.cpp
|
48
47
|
- ext/torch/ivalue.cpp
|
49
48
|
- ext/torch/linalg.cpp
|
50
|
-
- ext/torch/linalg_functions.h
|
51
49
|
- ext/torch/nn.cpp
|
52
|
-
- ext/torch/nn_functions.h
|
53
50
|
- ext/torch/random.cpp
|
54
51
|
- ext/torch/ruby_arg_parser.cpp
|
55
52
|
- ext/torch/ruby_arg_parser.h
|
56
|
-
- ext/torch/sparse_functions.h
|
57
53
|
- ext/torch/special.cpp
|
58
|
-
- ext/torch/special_functions.h
|
59
54
|
- ext/torch/templates.h
|
60
55
|
- ext/torch/tensor.cpp
|
61
|
-
- ext/torch/tensor_functions.h
|
62
56
|
- ext/torch/torch.cpp
|
63
|
-
- ext/torch/torch_functions.h
|
64
57
|
- ext/torch/utils.h
|
65
58
|
- ext/torch/wrap_outputs.h
|
66
59
|
- lib/torch-rb.rb
|
@@ -103,12 +96,14 @@ files:
|
|
103
96
|
- lib/torch/nn/dropout2d.rb
|
104
97
|
- lib/torch/nn/dropout3d.rb
|
105
98
|
- lib/torch/nn/dropoutnd.rb
|
99
|
+
- lib/torch/nn/elu.rb
|
106
100
|
- lib/torch/nn/embedding.rb
|
107
101
|
- lib/torch/nn/embedding_bag.rb
|
108
102
|
- lib/torch/nn/feature_alpha_dropout.rb
|
109
103
|
- lib/torch/nn/fold.rb
|
110
104
|
- lib/torch/nn/functional.rb
|
111
105
|
- lib/torch/nn/functional_attention.rb
|
106
|
+
- lib/torch/nn/gelu.rb
|
112
107
|
- lib/torch/nn/group_norm.rb
|
113
108
|
- lib/torch/nn/gru.rb
|
114
109
|
- lib/torch/nn/hardshrink.rb
|
@@ -230,14 +225,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
230
225
|
requirements:
|
231
226
|
- - ">="
|
232
227
|
- !ruby/object:Gem::Version
|
233
|
-
version: '3'
|
228
|
+
version: '3.1'
|
234
229
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
230
|
requirements:
|
236
231
|
- - ">="
|
237
232
|
- !ruby/object:Gem::Version
|
238
233
|
version: '0'
|
239
234
|
requirements: []
|
240
|
-
rubygems_version: 3.5.
|
235
|
+
rubygems_version: 3.5.11
|
241
236
|
signing_key:
|
242
237
|
specification_version: 4
|
243
238
|
summary: Deep learning for Ruby, powered by LibTorch
|
data/ext/torch/fft_functions.h
DELETED
data/ext/torch/nn_functions.h
DELETED