torch-rb 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/codegen/native_functions.yaml +197 -33
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +12 -0
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -11
- data/ext/torch/fft_functions.h +0 -6
- data/ext/torch/linalg_functions.h +0 -6
- data/ext/torch/nn_functions.h +0 -6
- data/ext/torch/sparse_functions.h +0 -6
- data/ext/torch/special_functions.h +0 -6
- data/ext/torch/tensor_functions.h +0 -6
- data/ext/torch/torch_functions.h +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8442fc0f85d6f2465258a54e5aefbe03d23a7c0e58753e06855bfebd2f4de802
|
4
|
+
data.tar.gz: ac0efb89f9b6d413498bfb1c2e84336aa728047dd013d00fa736449e5be82617
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6830efe74de98fc8a8d23e7308795a60ee60fff72b3f82fa7cb92815f4efe52fdf3637e0821490f5e3e8c2c8731043f52f5aff20cfb01db1340be0962fed18db
|
7
|
+
data.tar.gz: 3e50976e5add37b4158956c76e3c922167911492acda9e171af42ad39d5abe946c36427e545d9fc820a2800e3df0523b0068ce76b804d0c05a6f1e2ad495de01
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,12 +175,24 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
@@ -470,6 +482,7 @@
|
|
470
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
471
483
|
dispatch:
|
472
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
473
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
474
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
475
488
|
tags: pointwise
|
@@ -564,8 +577,8 @@
|
|
564
577
|
dispatch:
|
565
578
|
SparseCPU: add_out_sparse_cpu
|
566
579
|
SparseCUDA: add_out_sparse_cuda
|
567
|
-
SparseCsrCPU:
|
568
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
569
582
|
MkldnnCPU: mkldnn_add_out
|
570
583
|
MPS: add_out_mps
|
571
584
|
tags: pointwise
|
@@ -763,7 +776,7 @@
|
|
763
776
|
dispatch:
|
764
777
|
CompositeExplicitAutograd: arange
|
765
778
|
|
766
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
767
780
|
# name is already taken. Disabled because of CI job failures.
|
768
781
|
# FIXME: enable this
|
769
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1220,6 +1233,13 @@
|
|
1220
1233
|
CompositeExplicitAutograd: copysign_out
|
1221
1234
|
tags: pointwise
|
1222
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1223
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1224
1244
|
device_check: NoCheck # TensorIterator
|
1225
1245
|
variants: function, method
|
@@ -1621,6 +1641,7 @@
|
|
1621
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1622
1642
|
dispatch:
|
1623
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1624
1645
|
|
1625
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1626
1647
|
variants: function
|
@@ -1847,7 +1868,10 @@
|
|
1847
1868
|
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1848
1869
|
dispatch:
|
1849
1870
|
CUDA: cudnn_convolution
|
1850
|
-
|
1871
|
+
|
1872
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1873
|
+
dispatch:
|
1874
|
+
CUDA: cudnn_convolution_out
|
1851
1875
|
|
1852
1876
|
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1853
1877
|
dispatch:
|
@@ -2346,7 +2370,7 @@
|
|
2346
2370
|
Meta: empty_meta_symint
|
2347
2371
|
MkldnnCPU: empty_mkldnn
|
2348
2372
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2349
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2373
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2350
2374
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2351
2375
|
tags: core
|
2352
2376
|
|
@@ -2452,7 +2476,7 @@
|
|
2452
2476
|
CompositeExplicitAutograd: empty_like
|
2453
2477
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2454
2478
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2455
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2479
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2456
2480
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2457
2481
|
autogen: empty_like.out
|
2458
2482
|
|
@@ -2954,12 +2978,14 @@
|
|
2954
2978
|
dispatch:
|
2955
2979
|
CPU: _fft_r2c_mkl
|
2956
2980
|
CUDA: _fft_r2c_cufft
|
2981
|
+
MPS: _fft_r2c_mps
|
2957
2982
|
|
2958
2983
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2959
2984
|
variants: function
|
2960
2985
|
dispatch:
|
2961
2986
|
CPU: _fft_r2c_mkl_out
|
2962
2987
|
CUDA: _fft_r2c_cufft_out
|
2988
|
+
MPS: _fft_r2c_mps_out
|
2963
2989
|
|
2964
2990
|
# Complex to real inverse FFT
|
2965
2991
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2967,12 +2993,14 @@
|
|
2967
2993
|
dispatch:
|
2968
2994
|
CPU: _fft_c2r_mkl
|
2969
2995
|
CUDA: _fft_c2r_cufft
|
2996
|
+
MPS: _fft_c2r_mps
|
2970
2997
|
|
2971
2998
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2972
2999
|
variants: function
|
2973
3000
|
dispatch:
|
2974
3001
|
CPU: _fft_c2r_mkl_out
|
2975
3002
|
CUDA: _fft_c2r_cufft_out
|
3003
|
+
MPS: _fft_c2r_mps_out
|
2976
3004
|
|
2977
3005
|
# Standard complex to complex FFT (forward or backward)
|
2978
3006
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2980,12 +3008,14 @@
|
|
2980
3008
|
dispatch:
|
2981
3009
|
CPU: _fft_c2c_mkl
|
2982
3010
|
CUDA: _fft_c2c_cufft
|
3011
|
+
MPS: _fft_c2c_mps
|
2983
3012
|
|
2984
3013
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3014
|
variants: function
|
2986
3015
|
dispatch:
|
2987
3016
|
CPU: _fft_c2c_mkl_out
|
2988
3017
|
CUDA: _fft_c2c_cufft_out
|
3018
|
+
MPS: _fft_c2c_mps_out
|
2989
3019
|
|
2990
3020
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2991
3021
|
device_check: NoCheck
|
@@ -3302,11 +3332,15 @@
|
|
3302
3332
|
dispatch:
|
3303
3333
|
CUDA: _cslt_compress
|
3304
3334
|
|
3305
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
|
3335
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3306
3336
|
dispatch:
|
3307
3337
|
CUDA: _cslt_sparse_mm
|
3308
3338
|
|
3309
|
-
- func:
|
3339
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3340
|
+
dispatch:
|
3341
|
+
CUDA: _cslt_sparse_mm_search
|
3342
|
+
|
3343
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3310
3344
|
dispatch:
|
3311
3345
|
CUDA: _sparse_semi_structured_linear
|
3312
3346
|
|
@@ -4058,12 +4092,18 @@
|
|
4058
4092
|
|
4059
4093
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4060
4094
|
dispatch:
|
4095
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4061
4096
|
CUDA: _convert_weight_to_int4pack_cuda
|
4062
4097
|
|
4063
4098
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4064
4099
|
dispatch:
|
4100
|
+
CPU: _weight_int4pack_mm_cpu
|
4065
4101
|
CUDA: _weight_int4pack_mm_cuda
|
4066
4102
|
|
4103
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
|
+
dispatch:
|
4105
|
+
CPU: _weight_int8pack_mm_cpu
|
4106
|
+
|
4067
4107
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4068
4108
|
python_module: sparse
|
4069
4109
|
|
@@ -4439,7 +4479,6 @@
|
|
4439
4479
|
MPS: pixel_shuffle_mps
|
4440
4480
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4441
4481
|
autogen: pixel_shuffle.out
|
4442
|
-
tags: core
|
4443
4482
|
|
4444
4483
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4445
4484
|
dispatch:
|
@@ -4810,7 +4849,7 @@
|
|
4810
4849
|
device_guard: False
|
4811
4850
|
dispatch:
|
4812
4851
|
CompositeImplicitAutograd: reshape_symint
|
4813
|
-
CompositeImplicitAutogradNestedTensor:
|
4852
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4814
4853
|
|
4815
4854
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4816
4855
|
variants: function
|
@@ -4969,6 +5008,7 @@
|
|
4969
5008
|
device_check: NoCheck # TensorIterator
|
4970
5009
|
python_module: nn
|
4971
5010
|
dispatch:
|
5011
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4972
5012
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4973
5013
|
|
4974
5014
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -5356,6 +5396,21 @@
|
|
5356
5396
|
CompositeExplicitAutograd: slice_backward
|
5357
5397
|
autogen: slice_backward.out
|
5358
5398
|
|
5399
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
+
# slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
|
5401
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5402
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5404
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5405
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5406
|
+
# easier to implement for a subclass than as_strided()
|
5407
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5408
|
+
variants: function, method
|
5409
|
+
device_check: NoCheck
|
5410
|
+
device_guard: False
|
5411
|
+
dispatch:
|
5412
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5413
|
+
|
5359
5414
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5360
5415
|
variants: function, method
|
5361
5416
|
device_check: NoCheck
|
@@ -5363,7 +5418,7 @@
|
|
5363
5418
|
dispatch:
|
5364
5419
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5365
5420
|
autogen: slice_scatter.out
|
5366
|
-
tags: core
|
5421
|
+
tags: [core, view_copy]
|
5367
5422
|
|
5368
5423
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5369
5424
|
variants: function, method
|
@@ -5562,6 +5617,14 @@
|
|
5562
5617
|
SparseCPU: _sspaddmm_out_cpu
|
5563
5618
|
SparseCUDA: _sspaddmm_out_cuda
|
5564
5619
|
|
5620
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
|
+
dispatch:
|
5622
|
+
CompositeExplicitAutograd: _chunk_cat
|
5623
|
+
|
5624
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
|
+
dispatch:
|
5626
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5627
|
+
|
5565
5628
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5566
5629
|
dispatch:
|
5567
5630
|
CompositeExplicitAutograd: stack
|
@@ -5753,6 +5816,7 @@
|
|
5753
5816
|
variants: function
|
5754
5817
|
dispatch:
|
5755
5818
|
CPU, CUDA: std_mean
|
5819
|
+
MPS: std_mean_mps
|
5756
5820
|
autogen: std_mean.correction_out
|
5757
5821
|
|
5758
5822
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6008,7 +6072,6 @@
|
|
6008
6072
|
CPU, MPS: roll
|
6009
6073
|
CUDA: roll_cuda
|
6010
6074
|
autogen: roll.out
|
6011
|
-
tags: core
|
6012
6075
|
|
6013
6076
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
6014
6077
|
|
@@ -6091,6 +6154,52 @@
|
|
6091
6154
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
6092
6155
|
autogen: _nested_view_from_buffer_copy.out
|
6093
6156
|
|
6157
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6158
|
+
variants: function
|
6159
|
+
device_check: NoCheck
|
6160
|
+
dispatch: {}
|
6161
|
+
|
6162
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6163
|
+
variants: function
|
6164
|
+
device_check: NoCheck
|
6165
|
+
tags: view_copy
|
6166
|
+
dispatch:
|
6167
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6168
|
+
autogen: _nested_view_from_jagged_copy.out
|
6169
|
+
|
6170
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6171
|
+
variants: function
|
6172
|
+
device_check: NoCheck
|
6173
|
+
dispatch: {}
|
6174
|
+
|
6175
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6176
|
+
variants: function
|
6177
|
+
device_check: NoCheck
|
6178
|
+
tags: view_copy
|
6179
|
+
dispatch:
|
6180
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6181
|
+
autogen: _nested_get_values_copy.out
|
6182
|
+
|
6183
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6184
|
+
variants: function
|
6185
|
+
device_check: NoCheck
|
6186
|
+
dispatch: {}
|
6187
|
+
|
6188
|
+
# returns undefined Tensor if no lengths present
|
6189
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6190
|
+
variants: function
|
6191
|
+
device_check: NoCheck
|
6192
|
+
dispatch: {}
|
6193
|
+
|
6194
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6195
|
+
variants: function
|
6196
|
+
device_check: NoCheck
|
6197
|
+
dispatch: {}
|
6198
|
+
|
6199
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6200
|
+
category_override: dummy
|
6201
|
+
dispatch: {}
|
6202
|
+
|
6094
6203
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6095
6204
|
dispatch:
|
6096
6205
|
# calls unsqueeze
|
@@ -6275,6 +6384,7 @@
|
|
6275
6384
|
variants: function
|
6276
6385
|
dispatch:
|
6277
6386
|
CPU, CUDA: var_mean
|
6387
|
+
MPS: var_mean_mps
|
6278
6388
|
autogen: var_mean.correction_out
|
6279
6389
|
|
6280
6390
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6295,15 +6405,13 @@
|
|
6295
6405
|
device_check: NoCheck # TensorIterator
|
6296
6406
|
variants: function, method
|
6297
6407
|
dispatch:
|
6298
|
-
CPU, CUDA: where
|
6299
|
-
MPS: where_mps
|
6408
|
+
CPU, CUDA, MPS: where
|
6300
6409
|
tags: [core, pointwise]
|
6301
6410
|
|
6302
6411
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6303
6412
|
device_check: NoCheck # TensorIterator
|
6304
6413
|
dispatch:
|
6305
|
-
CPU, CUDA: where_self_out
|
6306
|
-
MPS: where_self_out_mps
|
6414
|
+
CPU, CUDA, MPS: where_self_out
|
6307
6415
|
|
6308
6416
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6309
6417
|
variants: function
|
@@ -6644,7 +6752,7 @@
|
|
6644
6752
|
MPS: zero_mps_
|
6645
6753
|
Meta: zero_meta_
|
6646
6754
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6647
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6755
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6648
6756
|
MkldnnCPU: mkldnn_zero_
|
6649
6757
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6650
6758
|
autogen: zero, zero.out
|
@@ -6934,7 +7042,7 @@
|
|
6934
7042
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6935
7043
|
# the default would never make sense.
|
6936
7044
|
|
6937
|
-
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7045
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6938
7046
|
dispatch:
|
6939
7047
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6940
7048
|
|
@@ -6951,7 +7059,10 @@
|
|
6951
7059
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6952
7060
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6953
7061
|
|
6954
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7062
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7063
|
+
dispatch:
|
7064
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7065
|
+
|
6955
7066
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6956
7067
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6957
7068
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -7037,7 +7148,7 @@
|
|
7037
7148
|
dispatch:
|
7038
7149
|
CPU, CUDA: sparse_dim_strided
|
7039
7150
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7040
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7151
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7041
7152
|
device_check: NoCheck
|
7042
7153
|
device_guard: False
|
7043
7154
|
|
@@ -7054,7 +7165,7 @@
|
|
7054
7165
|
dispatch:
|
7055
7166
|
CPU, CUDA: dense_dim_strided
|
7056
7167
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7057
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7168
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7058
7169
|
device_check: NoCheck
|
7059
7170
|
device_guard: False
|
7060
7171
|
|
@@ -7070,7 +7181,7 @@
|
|
7070
7181
|
variants: method
|
7071
7182
|
dispatch:
|
7072
7183
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
7073
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7184
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
7074
7185
|
device_check: NoCheck
|
7075
7186
|
device_guard: False
|
7076
7187
|
|
@@ -7133,7 +7244,7 @@
|
|
7133
7244
|
variants: method
|
7134
7245
|
dispatch:
|
7135
7246
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7136
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7247
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7137
7248
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7138
7249
|
CompositeExplicitAutograd: values_default
|
7139
7250
|
device_check: NoCheck
|
@@ -7142,7 +7253,7 @@
|
|
7142
7253
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7143
7254
|
variants: method
|
7144
7255
|
dispatch:
|
7145
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7256
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7146
7257
|
CompositeExplicitAutograd: crow_indices_default
|
7147
7258
|
device_check: NoCheck
|
7148
7259
|
device_guard: False
|
@@ -7150,7 +7261,7 @@
|
|
7150
7261
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7151
7262
|
variants: method
|
7152
7263
|
dispatch:
|
7153
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7264
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7154
7265
|
CompositeExplicitAutograd: col_indices_default
|
7155
7266
|
device_check: NoCheck
|
7156
7267
|
device_guard: False
|
@@ -7158,7 +7269,7 @@
|
|
7158
7269
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7159
7270
|
variants: method
|
7160
7271
|
dispatch:
|
7161
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7272
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7162
7273
|
CompositeExplicitAutograd: ccol_indices_default
|
7163
7274
|
device_check: NoCheck
|
7164
7275
|
device_guard: False
|
@@ -7166,7 +7277,7 @@
|
|
7166
7277
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7167
7278
|
variants: method
|
7168
7279
|
dispatch:
|
7169
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7280
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7170
7281
|
CompositeExplicitAutograd: row_indices_default
|
7171
7282
|
device_check: NoCheck
|
7172
7283
|
device_guard: False
|
@@ -7675,6 +7786,7 @@
|
|
7675
7786
|
dispatch:
|
7676
7787
|
CPU, CUDA, Meta, MPS: set_
|
7677
7788
|
autogen: set.source_Storage, set.source_Storage_out
|
7789
|
+
tags: inplace_view
|
7678
7790
|
|
7679
7791
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7680
7792
|
variants: method
|
@@ -7687,6 +7799,7 @@
|
|
7687
7799
|
MPS: set_storage_mps_
|
7688
7800
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7689
7801
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7802
|
+
tags: inplace_view
|
7690
7803
|
|
7691
7804
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7692
7805
|
variants: method
|
@@ -7694,6 +7807,7 @@
|
|
7694
7807
|
device_guard: False
|
7695
7808
|
dispatch:
|
7696
7809
|
CompositeImplicitAutograd: set__symint
|
7810
|
+
tags: inplace_view
|
7697
7811
|
|
7698
7812
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7699
7813
|
variants: method
|
@@ -7702,6 +7816,7 @@
|
|
7702
7816
|
dispatch:
|
7703
7817
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7704
7818
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7819
|
+
tags: inplace_view
|
7705
7820
|
|
7706
7821
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7707
7822
|
variants: method
|
@@ -7711,6 +7826,7 @@
|
|
7711
7826
|
Meta: set_meta_
|
7712
7827
|
MPS: set_mps_
|
7713
7828
|
autogen: set, set.out
|
7829
|
+
tags: inplace_view
|
7714
7830
|
|
7715
7831
|
# Not making it CompositeImplicitAutograd because lift
|
7716
7832
|
# should be a primitive w.r.t. functorch
|
@@ -10112,12 +10228,14 @@
|
|
10112
10228
|
variants: function
|
10113
10229
|
dispatch:
|
10114
10230
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10231
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10115
10232
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10116
10233
|
|
10117
10234
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
10118
10235
|
variants: function
|
10119
10236
|
dispatch:
|
10120
10237
|
CUDA: _amp_update_scale_cuda_
|
10238
|
+
CPU: _amp_update_scale_cpu_
|
10121
10239
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10122
10240
|
|
10123
10241
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -12341,6 +12459,7 @@
|
|
12341
12459
|
dispatch:
|
12342
12460
|
CPU: upsample_linear1d_out_cpu
|
12343
12461
|
CUDA: upsample_linear1d_out_cuda
|
12462
|
+
MPS: upsample_linear1d_out_mps
|
12344
12463
|
|
12345
12464
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12346
12465
|
python_module: nn
|
@@ -12352,6 +12471,7 @@
|
|
12352
12471
|
dispatch:
|
12353
12472
|
CPU: upsample_linear1d_backward_out_cpu
|
12354
12473
|
CUDA: upsample_linear1d_backward_out_cuda
|
12474
|
+
MPS: upsample_linear1d_backward_out_mps
|
12355
12475
|
|
12356
12476
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12357
12477
|
python_module: nn
|
@@ -12824,7 +12944,7 @@
|
|
12824
12944
|
SparseMeta: isinf_sparse_meta
|
12825
12945
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12826
12946
|
autogen: isinf.out
|
12827
|
-
tags: core
|
12947
|
+
tags: [core, pointwise]
|
12828
12948
|
|
12829
12949
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12830
12950
|
variants: method
|
@@ -13750,11 +13870,18 @@
|
|
13750
13870
|
dispatch:
|
13751
13871
|
CPU, CUDA: linalg_eig_out
|
13752
13872
|
|
13873
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13874
|
+
python_module: linalg
|
13875
|
+
dispatch:
|
13876
|
+
CPU, CUDA: _linalg_eigvals
|
13877
|
+
|
13753
13878
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13754
13879
|
python_module: linalg
|
13755
13880
|
|
13756
13881
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13757
13882
|
python_module: linalg
|
13883
|
+
dispatch:
|
13884
|
+
CPU, CUDA: linalg_eigvals_out
|
13758
13885
|
|
13759
13886
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13760
13887
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -14058,6 +14185,12 @@
|
|
14058
14185
|
# It is undocumented and should not be used outside of tests.
|
14059
14186
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
14060
14187
|
|
14188
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14189
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14190
|
+
variants: function
|
14191
|
+
dispatch:
|
14192
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14193
|
+
|
14061
14194
|
# Note: this function is only for testing.
|
14062
14195
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
14063
14196
|
python_module: nn
|
@@ -14392,6 +14525,7 @@
|
|
14392
14525
|
variants: function
|
14393
14526
|
dispatch:
|
14394
14527
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14528
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14395
14529
|
|
14396
14530
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14397
14531
|
variants: function
|
@@ -14468,19 +14602,28 @@
|
|
14468
14602
|
|
14469
14603
|
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14470
14604
|
dispatch:
|
14471
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14472
14605
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14473
14606
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14474
14607
|
tags: nondeterministic_seeded
|
14475
14608
|
|
14609
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14610
|
+
dispatch:
|
14611
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14612
|
+
tags: nondeterministic_seeded
|
14613
|
+
|
14476
14614
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14477
14615
|
device_check: NoCheck
|
14478
14616
|
variants: function
|
14479
14617
|
dispatch:
|
14480
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14481
14618
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14482
14619
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14483
14620
|
|
14621
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14622
|
+
device_check: NoCheck
|
14623
|
+
variants: function
|
14624
|
+
dispatch:
|
14625
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14626
|
+
|
14484
14627
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14485
14628
|
dispatch:
|
14486
14629
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
@@ -14493,6 +14636,11 @@
|
|
14493
14636
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14494
14637
|
tags: nondeterministic_seeded
|
14495
14638
|
|
14639
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14640
|
+
dispatch:
|
14641
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
|
+
tags: nondeterministic_seeded
|
14643
|
+
|
14496
14644
|
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14497
14645
|
variants: function
|
14498
14646
|
dispatch:
|
@@ -14505,8 +14653,8 @@
|
|
14505
14653
|
dispatch:
|
14506
14654
|
CUDA: _flash_attention_backward
|
14507
14655
|
|
14508
|
-
# Returns
|
14509
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14656
|
+
# Returns output, logsumexp if compute_logsumexp
|
14657
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14510
14658
|
variants: function
|
14511
14659
|
dispatch:
|
14512
14660
|
CUDA: _efficient_attention_forward
|
@@ -15345,6 +15493,22 @@
|
|
15345
15493
|
CUDA: _fused_adamw_kernel_cuda_
|
15346
15494
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15347
15495
|
|
15496
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15497
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
|
+
variants: function
|
15499
|
+
dispatch:
|
15500
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15501
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15502
|
+
|
15503
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15504
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15505
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15506
|
+
device_check: NoCheck
|
15507
|
+
variants: function
|
15508
|
+
dispatch:
|
15509
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15510
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
|
+
|
15348
15512
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15349
15513
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15350
15514
|
variants: function
|
data/ext/torch/utils.h
CHANGED
data/lib/torch/nn/elu.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class ELU < Module
|
4
|
+
def initialize(alpha: 1, inplace: false)
|
5
|
+
super()
|
6
|
+
@alpha = alpha
|
7
|
+
@inplace = inplace
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(input)
|
11
|
+
F.elu(input, alpha: @alpha, inplace: @inplace)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extra_inspect
|
15
|
+
inplace_str = @inplace ? ", inplace: true" : ""
|
16
|
+
format("alpha: %s", @alpha) + inplace_str
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/torch/nn/functional.rb
CHANGED
@@ -174,6 +174,18 @@ module Torch
|
|
174
174
|
|
175
175
|
# activation layers
|
176
176
|
|
177
|
+
def elu(input, alpha: 1, inplace: false)
|
178
|
+
if inplace
|
179
|
+
NN.elu!(input, alpha)
|
180
|
+
else
|
181
|
+
NN.elu(input, alpha)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def gelu(input, approximate: 'none')
|
186
|
+
NN.gelu(input, approximate: approximate)
|
187
|
+
end
|
188
|
+
|
177
189
|
def hardshrink(input, lambd = 0.5)
|
178
190
|
Torch.hardshrink(input, lambd)
|
179
191
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class GELU < Module
|
4
|
+
def initialize(approximate: 'none')
|
5
|
+
super()
|
6
|
+
@approximate = approximate
|
7
|
+
end
|
8
|
+
|
9
|
+
def forward(input)
|
10
|
+
F.gelu(input, approximate: @approximate)
|
11
|
+
end
|
12
|
+
|
13
|
+
def extra_inspect
|
14
|
+
"approximate: #{@approximate.inspect}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/torch/nn/leaky_relu.rb
CHANGED
data/lib/torch/version.rb
CHANGED
data/lib/torch.rb
CHANGED
@@ -123,6 +123,8 @@ require_relative "torch/nn/dropout3d"
|
|
123
123
|
require_relative "torch/nn/feature_alpha_dropout"
|
124
124
|
|
125
125
|
# nn activations
|
126
|
+
require_relative "torch/nn/elu"
|
127
|
+
require_relative "torch/nn/gelu"
|
126
128
|
require_relative "torch/nn/hardshrink"
|
127
129
|
require_relative "torch/nn/leaky_relu"
|
128
130
|
require_relative "torch/nn/log_sigmoid"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: torch-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -43,24 +43,17 @@ files:
|
|
43
43
|
- ext/torch/ext.cpp
|
44
44
|
- ext/torch/extconf.rb
|
45
45
|
- ext/torch/fft.cpp
|
46
|
-
- ext/torch/fft_functions.h
|
47
46
|
- ext/torch/generator.cpp
|
48
47
|
- ext/torch/ivalue.cpp
|
49
48
|
- ext/torch/linalg.cpp
|
50
|
-
- ext/torch/linalg_functions.h
|
51
49
|
- ext/torch/nn.cpp
|
52
|
-
- ext/torch/nn_functions.h
|
53
50
|
- ext/torch/random.cpp
|
54
51
|
- ext/torch/ruby_arg_parser.cpp
|
55
52
|
- ext/torch/ruby_arg_parser.h
|
56
|
-
- ext/torch/sparse_functions.h
|
57
53
|
- ext/torch/special.cpp
|
58
|
-
- ext/torch/special_functions.h
|
59
54
|
- ext/torch/templates.h
|
60
55
|
- ext/torch/tensor.cpp
|
61
|
-
- ext/torch/tensor_functions.h
|
62
56
|
- ext/torch/torch.cpp
|
63
|
-
- ext/torch/torch_functions.h
|
64
57
|
- ext/torch/utils.h
|
65
58
|
- ext/torch/wrap_outputs.h
|
66
59
|
- lib/torch-rb.rb
|
@@ -103,12 +96,14 @@ files:
|
|
103
96
|
- lib/torch/nn/dropout2d.rb
|
104
97
|
- lib/torch/nn/dropout3d.rb
|
105
98
|
- lib/torch/nn/dropoutnd.rb
|
99
|
+
- lib/torch/nn/elu.rb
|
106
100
|
- lib/torch/nn/embedding.rb
|
107
101
|
- lib/torch/nn/embedding_bag.rb
|
108
102
|
- lib/torch/nn/feature_alpha_dropout.rb
|
109
103
|
- lib/torch/nn/fold.rb
|
110
104
|
- lib/torch/nn/functional.rb
|
111
105
|
- lib/torch/nn/functional_attention.rb
|
106
|
+
- lib/torch/nn/gelu.rb
|
112
107
|
- lib/torch/nn/group_norm.rb
|
113
108
|
- lib/torch/nn/gru.rb
|
114
109
|
- lib/torch/nn/hardshrink.rb
|
@@ -230,14 +225,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
230
225
|
requirements:
|
231
226
|
- - ">="
|
232
227
|
- !ruby/object:Gem::Version
|
233
|
-
version: '3'
|
228
|
+
version: '3.1'
|
234
229
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
230
|
requirements:
|
236
231
|
- - ">="
|
237
232
|
- !ruby/object:Gem::Version
|
238
233
|
version: '0'
|
239
234
|
requirements: []
|
240
|
-
rubygems_version: 3.5.
|
235
|
+
rubygems_version: 3.5.11
|
241
236
|
signing_key:
|
242
237
|
specification_version: 4
|
243
238
|
summary: Deep learning for Ruby, powered by LibTorch
|
data/ext/torch/fft_functions.h
DELETED
data/ext/torch/nn_functions.h
DELETED