torch-rb 0.15.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5cb7d8bf760b3f2a52976b0fda929cd7227a89ed7475b19a87cd9ce53ab5fe4e
4
- data.tar.gz: 9977b9740c8490b4be11682dbd700bfe1175cf1f109e81412a694c4a4fd4a043
3
+ metadata.gz: 8442fc0f85d6f2465258a54e5aefbe03d23a7c0e58753e06855bfebd2f4de802
4
+ data.tar.gz: ac0efb89f9b6d413498bfb1c2e84336aa728047dd013d00fa736449e5be82617
5
5
  SHA512:
6
- metadata.gz: fb7283603fff1ad25fd234926abaeaafada2f0def3b5a52051242f3348445a5e4b615680f414d09c72a502c355fade701e79867444dd848c8dcd989f42359f19
7
- data.tar.gz: bb41a341f271fb5bce47ad6e1a6ef223b6d7c01165a86de13681d25720c96d5a17de81e549439feedcf482c4e539083e3e04e97a768999438b939bc235fb652d
6
+ metadata.gz: 6830efe74de98fc8a8d23e7308795a60ee60fff72b3f82fa7cb92815f4efe52fdf3637e0821490f5e3e8c2c8731043f52f5aff20cfb01db1340be0962fed18db
7
+ data.tar.gz: 3e50976e5add37b4158956c76e3c922167911492acda9e171af42ad39d5abe946c36427e545d9fc820a2800e3df0523b0068ce76b804d0c05a6f1e2ad495de01
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.16.0 (2024-06-12)
2
+
3
+ - Updated LibTorch to 2.3.0
4
+ - Added `ELU` and `GELU` classes
5
+ - Dropped support for Ruby < 3.1
6
+
1
7
  ## 0.15.0 (2024-02-28)
2
8
 
3
9
  - Updated LibTorch to 2.2.0
data/README.md CHANGED
@@ -410,6 +410,7 @@ Here’s the list of compatible versions.
410
410
 
411
411
  Torch.rb | LibTorch
412
412
  --- | ---
413
+ 0.16.x | 2.3.x
413
414
  0.15.x | 2.2.x
414
415
  0.14.x | 2.1.x
415
416
  0.13.x | 2.0.x
@@ -134,7 +134,7 @@
134
134
  autogen: _new_zeros_with_same_feature_meta.out
135
135
 
136
136
  # This function compares the storage numel of self with that of other, where
137
- # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
138
138
  # We create this function for composite compliance purposes. The batching rule
139
139
  # always returns true because vmapped as_strided does not support accessing
140
140
  # storage locations not indexable by the input tensor.
@@ -175,12 +175,24 @@
175
175
  CPU: _assert_async_msg_cpu
176
176
  CUDA: _assert_async_msg_cuda
177
177
 
178
+ - func: _assert_scalar(Scalar self, str assert_msg) -> ()
179
+ dispatch:
180
+ CompositeExplicitAutograd: _assert_scalar
181
+
182
+ - func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
183
+ dispatch:
184
+ CompositeExplicitAutograd: _functional_assert_scalar
185
+
178
186
  - func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
179
187
  dispatch:
180
188
  CPU: _functional_assert_async_msg_cpu
181
189
 
182
190
  - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
183
191
 
192
+ - func: _print(str s) -> ()
193
+ dispatch:
194
+ CompositeExplicitAutograd: _print
195
+
184
196
  - func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
185
197
  dispatch:
186
198
  CompositeExplicitAutograd: sym_constrain_range
@@ -470,6 +482,7 @@
470
482
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
471
483
  dispatch:
472
484
  CPU, CUDA: conj_physical_out
485
+ MPS: conj_physical_out_mps
473
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
474
487
  SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
475
488
  tags: pointwise
@@ -564,8 +577,8 @@
564
577
  dispatch:
565
578
  SparseCPU: add_out_sparse_cpu
566
579
  SparseCUDA: add_out_sparse_cuda
567
- SparseCsrCPU: add_out_sparse_csr_cpu
568
- SparseCsrCUDA: add_out_sparse_csr_cuda
580
+ SparseCsrCPU: add_out_sparse_compressed_cpu
581
+ SparseCsrCUDA: add_out_sparse_compressed_cuda
569
582
  MkldnnCPU: mkldnn_add_out
570
583
  MPS: add_out_mps
571
584
  tags: pointwise
@@ -763,7 +776,7 @@
763
776
  dispatch:
764
777
  CompositeExplicitAutograd: arange
765
778
 
766
- # This operator should be named `aragne.start_out` if following the naming convention. However that
779
+ # This operator should be named `arange.start_out` if following the naming convention. However that
767
780
  # name is already taken. Disabled because of CI job failures.
768
781
  # FIXME: enable this
769
782
  #- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
@@ -1220,6 +1233,13 @@
1220
1233
  CompositeExplicitAutograd: copysign_out
1221
1234
  tags: pointwise
1222
1235
 
1236
+ - func: _lazy_clone(Tensor self) -> Tensor
1237
+ # Like clone, but the copy takes place lazily, only if either the
1238
+ # input or the output are written.
1239
+ variants: function, method
1240
+ dispatch:
1241
+ CompositeExplicitAutograd: _lazy_clone
1242
+
1223
1243
  - func: logical_not(Tensor self) -> Tensor
1224
1244
  device_check: NoCheck # TensorIterator
1225
1245
  variants: function, method
@@ -1621,6 +1641,7 @@
1621
1641
  - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
1622
1642
  dispatch:
1623
1643
  CPU, CUDA: complex_out
1644
+ MPS: complex_out_mps
1624
1645
 
1625
1646
  - func: polar(Tensor abs, Tensor angle) -> Tensor
1626
1647
  variants: function
@@ -1847,7 +1868,10 @@
1847
1868
  - func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1848
1869
  dispatch:
1849
1870
  CUDA: cudnn_convolution
1850
- autogen: cudnn_convolution.out
1871
+
1872
+ - func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
1873
+ dispatch:
1874
+ CUDA: cudnn_convolution_out
1851
1875
 
1852
1876
  - func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1853
1877
  dispatch:
@@ -2346,7 +2370,7 @@
2346
2370
  Meta: empty_meta_symint
2347
2371
  MkldnnCPU: empty_mkldnn
2348
2372
  SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2349
- SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2373
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2350
2374
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2351
2375
  tags: core
2352
2376
 
@@ -2452,7 +2476,7 @@
2452
2476
  CompositeExplicitAutograd: empty_like
2453
2477
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2454
2478
  SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2455
- SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
2479
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2456
2480
  NestedTensorCPU, NestedTensorCUDA: empty_like_nested
2457
2481
  autogen: empty_like.out
2458
2482
 
@@ -2954,12 +2978,14 @@
2954
2978
  dispatch:
2955
2979
  CPU: _fft_r2c_mkl
2956
2980
  CUDA: _fft_r2c_cufft
2981
+ MPS: _fft_r2c_mps
2957
2982
 
2958
2983
  - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
2959
2984
  variants: function
2960
2985
  dispatch:
2961
2986
  CPU: _fft_r2c_mkl_out
2962
2987
  CUDA: _fft_r2c_cufft_out
2988
+ MPS: _fft_r2c_mps_out
2963
2989
 
2964
2990
  # Complex to real inverse FFT
2965
2991
  - func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
@@ -2967,12 +2993,14 @@
2967
2993
  dispatch:
2968
2994
  CPU: _fft_c2r_mkl
2969
2995
  CUDA: _fft_c2r_cufft
2996
+ MPS: _fft_c2r_mps
2970
2997
 
2971
2998
  - func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
2972
2999
  variants: function
2973
3000
  dispatch:
2974
3001
  CPU: _fft_c2r_mkl_out
2975
3002
  CUDA: _fft_c2r_cufft_out
3003
+ MPS: _fft_c2r_mps_out
2976
3004
 
2977
3005
  # Standard complex to complex FFT (forward or backward)
2978
3006
  - func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
@@ -2980,12 +3008,14 @@
2980
3008
  dispatch:
2981
3009
  CPU: _fft_c2c_mkl
2982
3010
  CUDA: _fft_c2c_cufft
3011
+ MPS: _fft_c2c_mps
2983
3012
 
2984
3013
  - func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
2985
3014
  variants: function
2986
3015
  dispatch:
2987
3016
  CPU: _fft_c2c_mkl_out
2988
3017
  CUDA: _fft_c2c_cufft_out
3018
+ MPS: _fft_c2c_mps_out
2989
3019
 
2990
3020
  - func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
2991
3021
  device_check: NoCheck
@@ -3302,11 +3332,15 @@
3302
3332
  dispatch:
3303
3333
  CUDA: _cslt_compress
3304
3334
 
3305
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
3335
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3306
3336
  dispatch:
3307
3337
  CUDA: _cslt_sparse_mm
3308
3338
 
3309
- - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
3339
+ - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3340
+ dispatch:
3341
+ CUDA: _cslt_sparse_mm_search
3342
+
3343
+ - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
3310
3344
  dispatch:
3311
3345
  CUDA: _sparse_semi_structured_linear
3312
3346
 
@@ -4058,12 +4092,18 @@
4058
4092
 
4059
4093
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4060
4094
  dispatch:
4095
+ CPU: _convert_weight_to_int4pack_cpu
4061
4096
  CUDA: _convert_weight_to_int4pack_cuda
4062
4097
 
4063
4098
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4064
4099
  dispatch:
4100
+ CPU: _weight_int4pack_mm_cpu
4065
4101
  CUDA: _weight_int4pack_mm_cuda
4066
4102
 
4103
+ - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4104
+ dispatch:
4105
+ CPU: _weight_int8pack_mm_cpu
4106
+
4067
4107
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
4068
4108
  python_module: sparse
4069
4109
 
@@ -4439,7 +4479,6 @@
4439
4479
  MPS: pixel_shuffle_mps
4440
4480
  CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
4441
4481
  autogen: pixel_shuffle.out
4442
- tags: core
4443
4482
 
4444
4483
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
4445
4484
  dispatch:
@@ -4810,7 +4849,7 @@
4810
4849
  device_guard: False
4811
4850
  dispatch:
4812
4851
  CompositeImplicitAutograd: reshape_symint
4813
- CompositeImplicitAutogradNestedTensor: reshape_nested
4852
+ CompositeImplicitAutogradNestedTensor: reshape_nested_symint
4814
4853
 
4815
4854
  - func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
4816
4855
  variants: function
@@ -4969,6 +5008,7 @@
4969
5008
  device_check: NoCheck # TensorIterator
4970
5009
  python_module: nn
4971
5010
  dispatch:
5011
+ QuantizedCPU: gelu_quantized_cpu_
4972
5012
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
4973
5013
 
4974
5014
  - func: gelu(Tensor self, *, str approximate='none') -> Tensor
@@ -5356,6 +5396,21 @@
5356
5396
  CompositeExplicitAutograd: slice_backward
5357
5397
  autogen: slice_backward.out
5358
5398
 
5399
+ # NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
5400
+ # slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
5401
+ # of PT2 graph input subclass instances that are views. This means:
5402
+ # * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
5403
+ # * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
5404
+ # * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
5405
+ # input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
5406
+ # easier to implement for a subclass than as_strided()
5407
+ - func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
5408
+ variants: function, method
5409
+ device_check: NoCheck
5410
+ device_guard: False
5411
+ dispatch:
5412
+ CompositeExplicitAutograd: slice_inverse_symint
5413
+
5359
5414
  - func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
5360
5415
  variants: function, method
5361
5416
  device_check: NoCheck
@@ -5363,7 +5418,7 @@
5363
5418
  dispatch:
5364
5419
  CompositeExplicitAutogradNonFunctional: slice_scatter
5365
5420
  autogen: slice_scatter.out
5366
- tags: core
5421
+ tags: [core, view_copy]
5367
5422
 
5368
5423
  - func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
5369
5424
  variants: function, method
@@ -5562,6 +5617,14 @@
5562
5617
  SparseCPU: _sspaddmm_out_cpu
5563
5618
  SparseCUDA: _sspaddmm_out_cuda
5564
5619
 
5620
+ - func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
5621
+ dispatch:
5622
+ CompositeExplicitAutograd: _chunk_cat
5623
+
5624
+ - func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
5625
+ dispatch:
5626
+ CompositeExplicitAutograd: _chunk_cat_out
5627
+
5565
5628
  - func: stack(Tensor[] tensors, int dim=0) -> Tensor
5566
5629
  dispatch:
5567
5630
  CompositeExplicitAutograd: stack
@@ -5753,6 +5816,7 @@
5753
5816
  variants: function
5754
5817
  dispatch:
5755
5818
  CPU, CUDA: std_mean
5819
+ MPS: std_mean_mps
5756
5820
  autogen: std_mean.correction_out
5757
5821
 
5758
5822
  - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6008,7 +6072,6 @@
6008
6072
  CPU, MPS: roll
6009
6073
  CUDA: roll_cuda
6010
6074
  autogen: roll.out
6011
- tags: core
6012
6075
 
6013
6076
  # default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
6014
6077
 
@@ -6091,6 +6154,52 @@
6091
6154
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6092
6155
  autogen: _nested_view_from_buffer_copy.out
6093
6156
 
6157
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6158
+ variants: function
6159
+ device_check: NoCheck
6160
+ dispatch: {}
6161
+
6162
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6163
+ variants: function
6164
+ device_check: NoCheck
6165
+ tags: view_copy
6166
+ dispatch:
6167
+ CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
6168
+ autogen: _nested_view_from_jagged_copy.out
6169
+
6170
+ - func: _nested_get_values(Tensor(a) self) -> Tensor(a)
6171
+ variants: function
6172
+ device_check: NoCheck
6173
+ dispatch: {}
6174
+
6175
+ - func: _nested_get_values_copy(Tensor self) -> Tensor
6176
+ variants: function
6177
+ device_check: NoCheck
6178
+ tags: view_copy
6179
+ dispatch:
6180
+ CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
6181
+ autogen: _nested_get_values_copy.out
6182
+
6183
+ - func: _nested_get_offsets(Tensor self) -> Tensor
6184
+ variants: function
6185
+ device_check: NoCheck
6186
+ dispatch: {}
6187
+
6188
+ # returns undefined Tensor if no lengths present
6189
+ - func: _nested_get_lengths(Tensor self) -> Tensor
6190
+ variants: function
6191
+ device_check: NoCheck
6192
+ dispatch: {}
6193
+
6194
+ - func: _nested_get_ragged_idx(Tensor self) -> int
6195
+ variants: function
6196
+ device_check: NoCheck
6197
+ dispatch: {}
6198
+
6199
+ - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6200
+ category_override: dummy
6201
+ dispatch: {}
6202
+
6094
6203
  - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
6095
6204
  dispatch:
6096
6205
  # calls unsqueeze
@@ -6275,6 +6384,7 @@
6275
6384
  variants: function
6276
6385
  dispatch:
6277
6386
  CPU, CUDA: var_mean
6387
+ MPS: var_mean_mps
6278
6388
  autogen: var_mean.correction_out
6279
6389
 
6280
6390
  - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6295,15 +6405,13 @@
6295
6405
  device_check: NoCheck # TensorIterator
6296
6406
  variants: function, method
6297
6407
  dispatch:
6298
- CPU, CUDA: where
6299
- MPS: where_mps
6408
+ CPU, CUDA, MPS: where
6300
6409
  tags: [core, pointwise]
6301
6410
 
6302
6411
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6303
6412
  device_check: NoCheck # TensorIterator
6304
6413
  dispatch:
6305
- CPU, CUDA: where_self_out
6306
- MPS: where_self_out_mps
6414
+ CPU, CUDA, MPS: where_self_out
6307
6415
 
6308
6416
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6309
6417
  variants: function
@@ -6644,7 +6752,7 @@
6644
6752
  MPS: zero_mps_
6645
6753
  Meta: zero_meta_
6646
6754
  SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6647
- SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
6755
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6648
6756
  MkldnnCPU: mkldnn_zero_
6649
6757
  NestedTensorCPU, NestedTensorCUDA: zero_nested_
6650
6758
  autogen: zero, zero.out
@@ -6934,7 +7042,7 @@
6934
7042
  # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
6935
7043
  # the default would never make sense.
6936
7044
 
6937
- - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7045
+ - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6938
7046
  dispatch:
6939
7047
  CompositeExplicitAutograd: sparse_compressed_tensor
6940
7048
 
@@ -6951,7 +7059,10 @@
6951
7059
  - func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6952
7060
  - func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6953
7061
 
6954
- - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7062
+ - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7063
+ dispatch:
7064
+ CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
7065
+
6955
7066
  - func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6956
7067
  - func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6957
7068
  - func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -7037,7 +7148,7 @@
7037
7148
  dispatch:
7038
7149
  CPU, CUDA: sparse_dim_strided
7039
7150
  SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
7040
- SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
7151
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
7041
7152
  device_check: NoCheck
7042
7153
  device_guard: False
7043
7154
 
@@ -7054,7 +7165,7 @@
7054
7165
  dispatch:
7055
7166
  CPU, CUDA: dense_dim_strided
7056
7167
  SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
7057
- SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
7168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
7058
7169
  device_check: NoCheck
7059
7170
  device_guard: False
7060
7171
 
@@ -7070,7 +7181,7 @@
7070
7181
  variants: method
7071
7182
  dispatch:
7072
7183
  SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
7073
- SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
7184
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
7074
7185
  device_check: NoCheck
7075
7186
  device_guard: False
7076
7187
 
@@ -7133,7 +7244,7 @@
7133
7244
  variants: method
7134
7245
  dispatch:
7135
7246
  SparseCPU, SparseCUDA, SparseMeta: values_sparse
7136
- SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
7247
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
7137
7248
  NestedTensorCPU, NestedTensorCUDA: values_nested
7138
7249
  CompositeExplicitAutograd: values_default
7139
7250
  device_check: NoCheck
@@ -7142,7 +7253,7 @@
7142
7253
  - func: crow_indices(Tensor(a) self) -> Tensor(a)
7143
7254
  variants: method
7144
7255
  dispatch:
7145
- SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
7256
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
7146
7257
  CompositeExplicitAutograd: crow_indices_default
7147
7258
  device_check: NoCheck
7148
7259
  device_guard: False
@@ -7150,7 +7261,7 @@
7150
7261
  - func: col_indices(Tensor(a) self) -> Tensor(a)
7151
7262
  variants: method
7152
7263
  dispatch:
7153
- SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
7264
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
7154
7265
  CompositeExplicitAutograd: col_indices_default
7155
7266
  device_check: NoCheck
7156
7267
  device_guard: False
@@ -7158,7 +7269,7 @@
7158
7269
  - func: ccol_indices(Tensor(a) self) -> Tensor(a)
7159
7270
  variants: method
7160
7271
  dispatch:
7161
- SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
7272
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
7162
7273
  CompositeExplicitAutograd: ccol_indices_default
7163
7274
  device_check: NoCheck
7164
7275
  device_guard: False
@@ -7166,7 +7277,7 @@
7166
7277
  - func: row_indices(Tensor(a) self) -> Tensor(a)
7167
7278
  variants: method
7168
7279
  dispatch:
7169
- SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
7280
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
7170
7281
  CompositeExplicitAutograd: row_indices_default
7171
7282
  device_check: NoCheck
7172
7283
  device_guard: False
@@ -7675,6 +7786,7 @@
7675
7786
  dispatch:
7676
7787
  CPU, CUDA, Meta, MPS: set_
7677
7788
  autogen: set.source_Storage, set.source_Storage_out
7789
+ tags: inplace_view
7678
7790
 
7679
7791
  - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7680
7792
  variants: method
@@ -7687,6 +7799,7 @@
7687
7799
  MPS: set_storage_mps_
7688
7800
  QuantizedCPU, QuantizedCUDA: set_storage_quantized_
7689
7801
  autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
7802
+ tags: inplace_view
7690
7803
 
7691
7804
  - func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7692
7805
  variants: method
@@ -7694,6 +7807,7 @@
7694
7807
  device_guard: False
7695
7808
  dispatch:
7696
7809
  CompositeImplicitAutograd: set__symint
7810
+ tags: inplace_view
7697
7811
 
7698
7812
  - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
7699
7813
  variants: method
@@ -7702,6 +7816,7 @@
7702
7816
  dispatch:
7703
7817
  CPU, CUDA, Meta, MPS: set_tensor_
7704
7818
  autogen: set.source_Tensor, set.source_Tensor_out
7819
+ tags: inplace_view
7705
7820
 
7706
7821
  - func: set_(Tensor(a!) self) -> Tensor(a!)
7707
7822
  variants: method
@@ -7711,6 +7826,7 @@
7711
7826
  Meta: set_meta_
7712
7827
  MPS: set_mps_
7713
7828
  autogen: set, set.out
7829
+ tags: inplace_view
7714
7830
 
7715
7831
  # Not making it CompositeImplicitAutograd because lift
7716
7832
  # should be a primitive w.r.t. functorch
@@ -10112,12 +10228,14 @@
10112
10228
  variants: function
10113
10229
  dispatch:
10114
10230
  CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
10231
+ CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
10115
10232
  autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
10116
10233
 
10117
10234
  - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
10118
10235
  variants: function
10119
10236
  dispatch:
10120
10237
  CUDA: _amp_update_scale_cuda_
10238
+ CPU: _amp_update_scale_cpu_
10121
10239
  autogen: _amp_update_scale, _amp_update_scale.out
10122
10240
 
10123
10241
  #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
@@ -12341,6 +12459,7 @@
12341
12459
  dispatch:
12342
12460
  CPU: upsample_linear1d_out_cpu
12343
12461
  CUDA: upsample_linear1d_out_cuda
12462
+ MPS: upsample_linear1d_out_mps
12344
12463
 
12345
12464
  - func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
12346
12465
  python_module: nn
@@ -12352,6 +12471,7 @@
12352
12471
  dispatch:
12353
12472
  CPU: upsample_linear1d_backward_out_cpu
12354
12473
  CUDA: upsample_linear1d_backward_out_cuda
12474
+ MPS: upsample_linear1d_backward_out_mps
12355
12475
 
12356
12476
  - func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
12357
12477
  python_module: nn
@@ -12824,7 +12944,7 @@
12824
12944
  SparseMeta: isinf_sparse_meta
12825
12945
  SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
12826
12946
  autogen: isinf.out
12827
- tags: core
12947
+ tags: [core, pointwise]
12828
12948
 
12829
12949
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
12830
12950
  variants: method
@@ -13750,11 +13870,18 @@
13750
13870
  dispatch:
13751
13871
  CPU, CUDA: linalg_eig_out
13752
13872
 
13873
+ - func: _linalg_eigvals(Tensor self) -> Tensor
13874
+ python_module: linalg
13875
+ dispatch:
13876
+ CPU, CUDA: _linalg_eigvals
13877
+
13753
13878
  - func: linalg_eigvals(Tensor self) -> Tensor
13754
13879
  python_module: linalg
13755
13880
 
13756
13881
  - func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13757
13882
  python_module: linalg
13883
+ dispatch:
13884
+ CPU, CUDA: linalg_eigvals_out
13758
13885
 
13759
13886
  # This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
13760
13887
  # `linalg.eigvalsh` as composite functions that call this one
@@ -14058,6 +14185,12 @@
14058
14185
  # It is undocumented and should not be used outside of tests.
14059
14186
  - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
14060
14187
 
14188
+ # Note: for testing COW materialization within `at::parallel_for` loop function
14189
+ - func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
14190
+ variants: function
14191
+ dispatch:
14192
+ CompositeExplicitAutograd: _test_parallel_materialize
14193
+
14061
14194
  # Note: this function is only for testing.
14062
14195
  - func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
14063
14196
  python_module: nn
@@ -14392,6 +14525,7 @@
14392
14525
  variants: function
14393
14526
  dispatch:
14394
14527
  CompositeExplicitAutograd: split_with_sizes_copy_out
14528
+ CUDA: split_with_sizes_copy_out_cuda
14395
14529
 
14396
14530
  - func: view_copy(Tensor self, SymInt[] size) -> Tensor
14397
14531
  variants: function
@@ -14468,19 +14602,28 @@
14468
14602
 
14469
14603
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14470
14604
  dispatch:
14471
- CPU: _scaled_dot_product_flash_attention_cpu
14472
14605
  CUDA: _scaled_dot_product_flash_attention_cuda
14473
14606
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
14474
14607
  tags: nondeterministic_seeded
14475
14608
 
14609
+ - func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
14610
+ dispatch:
14611
+ CPU: _scaled_dot_product_flash_attention_cpu
14612
+ tags: nondeterministic_seeded
14613
+
14476
14614
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14477
14615
  device_check: NoCheck
14478
14616
  variants: function
14479
14617
  dispatch:
14480
- CPU: _scaled_dot_product_flash_attention_backward_cpu
14481
14618
  CUDA: _scaled_dot_product_flash_attention_backward_cuda
14482
14619
  NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
14483
14620
 
14621
+ - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14622
+ device_check: NoCheck
14623
+ variants: function
14624
+ dispatch:
14625
+ CPU: _scaled_dot_product_flash_attention_cpu_backward
14626
+
14484
14627
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14485
14628
  dispatch:
14486
14629
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14493,6 +14636,11 @@
14493
14636
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14494
14637
  tags: nondeterministic_seeded
14495
14638
 
14639
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14640
+ dispatch:
14641
+ CUDA: _scaled_dot_product_cudnn_attention_cuda
14642
+ tags: nondeterministic_seeded
14643
+
14496
14644
  - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14497
14645
  variants: function
14498
14646
  dispatch:
@@ -14505,8 +14653,8 @@
14505
14653
  dispatch:
14506
14654
  CUDA: _flash_attention_backward
14507
14655
 
14508
- # Returns ouput, logsumexp if compute_logsumexp
14509
- - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14656
+ # Returns output, logsumexp if compute_logsumexp
14657
+ - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14510
14658
  variants: function
14511
14659
  dispatch:
14512
14660
  CUDA: _efficient_attention_forward
@@ -15345,6 +15493,22 @@
15345
15493
  CUDA: _fused_adamw_kernel_cuda_
15346
15494
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15347
15495
 
15496
+ - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15497
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15498
+ variants: function
15499
+ dispatch:
15500
+ CUDA: _fused_sgd_kernel_cuda_
15501
+ autogen: _fused_sgd, _fused_sgd.out
15502
+
15503
+ - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15504
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15505
+ # but still skip the device check as the Tensor LR can be on CPU
15506
+ device_check: NoCheck
15507
+ variants: function
15508
+ dispatch:
15509
+ CUDA: _fused_sgd_kernel_cuda_
15510
+ autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15511
+
15348
15512
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
15349
15513
  - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
15350
15514
  variants: function
data/ext/torch/utils.h CHANGED
@@ -6,7 +6,7 @@
6
6
  #include <rice/stl.hpp>
7
7
 
8
8
  static_assert(
9
- TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 2,
9
+ TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 3,
10
10
  "Incompatible LibTorch version"
11
11
  );
12
12
 
@@ -0,0 +1,20 @@
1
+ module Torch
2
+ module NN
3
+ class ELU < Module
4
+ def initialize(alpha: 1, inplace: false)
5
+ super()
6
+ @alpha = alpha
7
+ @inplace = inplace
8
+ end
9
+
10
+ def forward(input)
11
+ F.elu(input, alpha: @alpha, inplace: @inplace)
12
+ end
13
+
14
+ def extra_inspect
15
+ inplace_str = @inplace ? ", inplace: true" : ""
16
+ format("alpha: %s", @alpha) + inplace_str
17
+ end
18
+ end
19
+ end
20
+ end
@@ -174,6 +174,18 @@ module Torch
174
174
 
175
175
  # activation layers
176
176
 
177
+ def elu(input, alpha: 1, inplace: false)
178
+ if inplace
179
+ NN.elu!(input, alpha)
180
+ else
181
+ NN.elu(input, alpha)
182
+ end
183
+ end
184
+
185
+ def gelu(input, approximate: 'none')
186
+ NN.gelu(input, approximate: approximate)
187
+ end
188
+
177
189
  def hardshrink(input, lambd = 0.5)
178
190
  Torch.hardshrink(input, lambd)
179
191
  end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class GELU < Module
4
+ def initialize(approximate: 'none')
5
+ super()
6
+ @approximate = approximate
7
+ end
8
+
9
+ def forward(input)
10
+ F.gelu(input, approximate: @approximate)
11
+ end
12
+
13
+ def extra_inspect
14
+ "approximate: #{@approximate.inspect}"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -13,7 +13,7 @@ module Torch
13
13
 
14
14
  def extra_inspect
15
15
  inplace_str = @inplace ? ", inplace: true" : ""
16
- format("negative_slope: %s%s", @negative_slope, inplace_str)
16
+ format("negative_slope: %s", @negative_slope) + inplace_str
17
17
  end
18
18
  end
19
19
  end
data/lib/torch/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Torch
2
- VERSION = "0.15.0"
2
+ VERSION = "0.16.0"
3
3
  end
data/lib/torch.rb CHANGED
@@ -123,6 +123,8 @@ require_relative "torch/nn/dropout3d"
123
123
  require_relative "torch/nn/feature_alpha_dropout"
124
124
 
125
125
  # nn activations
126
+ require_relative "torch/nn/elu"
127
+ require_relative "torch/nn/gelu"
126
128
  require_relative "torch/nn/hardshrink"
127
129
  require_relative "torch/nn/leaky_relu"
128
130
  require_relative "torch/nn/log_sigmoid"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: torch-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-29 00:00:00.000000000 Z
11
+ date: 2024-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -43,24 +43,17 @@ files:
43
43
  - ext/torch/ext.cpp
44
44
  - ext/torch/extconf.rb
45
45
  - ext/torch/fft.cpp
46
- - ext/torch/fft_functions.h
47
46
  - ext/torch/generator.cpp
48
47
  - ext/torch/ivalue.cpp
49
48
  - ext/torch/linalg.cpp
50
- - ext/torch/linalg_functions.h
51
49
  - ext/torch/nn.cpp
52
- - ext/torch/nn_functions.h
53
50
  - ext/torch/random.cpp
54
51
  - ext/torch/ruby_arg_parser.cpp
55
52
  - ext/torch/ruby_arg_parser.h
56
- - ext/torch/sparse_functions.h
57
53
  - ext/torch/special.cpp
58
- - ext/torch/special_functions.h
59
54
  - ext/torch/templates.h
60
55
  - ext/torch/tensor.cpp
61
- - ext/torch/tensor_functions.h
62
56
  - ext/torch/torch.cpp
63
- - ext/torch/torch_functions.h
64
57
  - ext/torch/utils.h
65
58
  - ext/torch/wrap_outputs.h
66
59
  - lib/torch-rb.rb
@@ -103,12 +96,14 @@ files:
103
96
  - lib/torch/nn/dropout2d.rb
104
97
  - lib/torch/nn/dropout3d.rb
105
98
  - lib/torch/nn/dropoutnd.rb
99
+ - lib/torch/nn/elu.rb
106
100
  - lib/torch/nn/embedding.rb
107
101
  - lib/torch/nn/embedding_bag.rb
108
102
  - lib/torch/nn/feature_alpha_dropout.rb
109
103
  - lib/torch/nn/fold.rb
110
104
  - lib/torch/nn/functional.rb
111
105
  - lib/torch/nn/functional_attention.rb
106
+ - lib/torch/nn/gelu.rb
112
107
  - lib/torch/nn/group_norm.rb
113
108
  - lib/torch/nn/gru.rb
114
109
  - lib/torch/nn/hardshrink.rb
@@ -230,14 +225,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
230
225
  requirements:
231
226
  - - ">="
232
227
  - !ruby/object:Gem::Version
233
- version: '3'
228
+ version: '3.1'
234
229
  required_rubygems_version: !ruby/object:Gem::Requirement
235
230
  requirements:
236
231
  - - ">="
237
232
  - !ruby/object:Gem::Version
238
233
  version: '0'
239
234
  requirements: []
240
- rubygems_version: 3.5.3
235
+ rubygems_version: 3.5.11
241
236
  signing_key:
242
237
  specification_version: 4
243
238
  summary: Deep learning for Ruby, powered by LibTorch
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_fft_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_linalg_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_nn_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_sparse_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_special_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_tensor_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_torch_functions(Rice::Module& m);