torch-rb 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5cb7d8bf760b3f2a52976b0fda929cd7227a89ed7475b19a87cd9ce53ab5fe4e
4
- data.tar.gz: 9977b9740c8490b4be11682dbd700bfe1175cf1f109e81412a694c4a4fd4a043
3
+ metadata.gz: 8442fc0f85d6f2465258a54e5aefbe03d23a7c0e58753e06855bfebd2f4de802
4
+ data.tar.gz: ac0efb89f9b6d413498bfb1c2e84336aa728047dd013d00fa736449e5be82617
5
5
  SHA512:
6
- metadata.gz: fb7283603fff1ad25fd234926abaeaafada2f0def3b5a52051242f3348445a5e4b615680f414d09c72a502c355fade701e79867444dd848c8dcd989f42359f19
7
- data.tar.gz: bb41a341f271fb5bce47ad6e1a6ef223b6d7c01165a86de13681d25720c96d5a17de81e549439feedcf482c4e539083e3e04e97a768999438b939bc235fb652d
6
+ metadata.gz: 6830efe74de98fc8a8d23e7308795a60ee60fff72b3f82fa7cb92815f4efe52fdf3637e0821490f5e3e8c2c8731043f52f5aff20cfb01db1340be0962fed18db
7
+ data.tar.gz: 3e50976e5add37b4158956c76e3c922167911492acda9e171af42ad39d5abe946c36427e545d9fc820a2800e3df0523b0068ce76b804d0c05a6f1e2ad495de01
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.16.0 (2024-06-12)
2
+
3
+ - Updated LibTorch to 2.3.0
4
+ - Added `ELU` and `GELU` classes
5
+ - Dropped support for Ruby < 3.1
6
+
1
7
  ## 0.15.0 (2024-02-28)
2
8
 
3
9
  - Updated LibTorch to 2.2.0
data/README.md CHANGED
@@ -410,6 +410,7 @@ Here’s the list of compatible versions.
410
410
 
411
411
  Torch.rb | LibTorch
412
412
  --- | ---
413
+ 0.16.x | 2.3.x
413
414
  0.15.x | 2.2.x
414
415
  0.14.x | 2.1.x
415
416
  0.13.x | 2.0.x
@@ -134,7 +134,7 @@
134
134
  autogen: _new_zeros_with_same_feature_meta.out
135
135
 
136
136
  # This function compares the storage numel of self with that of other, where
137
- # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
138
138
  # We create this function for composite compliance purposes. The batching rule
139
139
  # always returns true because vmapped as_strided does not support accessing
140
140
  # storage locations not indexable by the input tensor.
@@ -175,12 +175,24 @@
175
175
  CPU: _assert_async_msg_cpu
176
176
  CUDA: _assert_async_msg_cuda
177
177
 
178
+ - func: _assert_scalar(Scalar self, str assert_msg) -> ()
179
+ dispatch:
180
+ CompositeExplicitAutograd: _assert_scalar
181
+
182
+ - func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
183
+ dispatch:
184
+ CompositeExplicitAutograd: _functional_assert_scalar
185
+
178
186
  - func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
179
187
  dispatch:
180
188
  CPU: _functional_assert_async_msg_cpu
181
189
 
182
190
  - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
183
191
 
192
+ - func: _print(str s) -> ()
193
+ dispatch:
194
+ CompositeExplicitAutograd: _print
195
+
184
196
  - func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
185
197
  dispatch:
186
198
  CompositeExplicitAutograd: sym_constrain_range
@@ -470,6 +482,7 @@
470
482
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
471
483
  dispatch:
472
484
  CPU, CUDA: conj_physical_out
485
+ MPS: conj_physical_out_mps
473
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
474
487
  SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
475
488
  tags: pointwise
@@ -564,8 +577,8 @@
564
577
  dispatch:
565
578
  SparseCPU: add_out_sparse_cpu
566
579
  SparseCUDA: add_out_sparse_cuda
567
- SparseCsrCPU: add_out_sparse_csr_cpu
568
- SparseCsrCUDA: add_out_sparse_csr_cuda
580
+ SparseCsrCPU: add_out_sparse_compressed_cpu
581
+ SparseCsrCUDA: add_out_sparse_compressed_cuda
569
582
  MkldnnCPU: mkldnn_add_out
570
583
  MPS: add_out_mps
571
584
  tags: pointwise
@@ -763,7 +776,7 @@
763
776
  dispatch:
764
777
  CompositeExplicitAutograd: arange
765
778
 
766
- # This operator should be named `aragne.start_out` if following the naming convention. However that
779
+ # This operator should be named `arange.start_out` if following the naming convention. However that
767
780
  # name is already taken. Disabled because of CI job failures.
768
781
  # FIXME: enable this
769
782
  #- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
@@ -1220,6 +1233,13 @@
1220
1233
  CompositeExplicitAutograd: copysign_out
1221
1234
  tags: pointwise
1222
1235
 
1236
+ - func: _lazy_clone(Tensor self) -> Tensor
1237
+ # Like clone, but the copy takes place lazily, only if either the
1238
+ # input or the output are written.
1239
+ variants: function, method
1240
+ dispatch:
1241
+ CompositeExplicitAutograd: _lazy_clone
1242
+
1223
1243
  - func: logical_not(Tensor self) -> Tensor
1224
1244
  device_check: NoCheck # TensorIterator
1225
1245
  variants: function, method
@@ -1621,6 +1641,7 @@
1621
1641
  - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
1622
1642
  dispatch:
1623
1643
  CPU, CUDA: complex_out
1644
+ MPS: complex_out_mps
1624
1645
 
1625
1646
  - func: polar(Tensor abs, Tensor angle) -> Tensor
1626
1647
  variants: function
@@ -1847,7 +1868,10 @@
1847
1868
  - func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1848
1869
  dispatch:
1849
1870
  CUDA: cudnn_convolution
1850
- autogen: cudnn_convolution.out
1871
+
1872
+ - func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
1873
+ dispatch:
1874
+ CUDA: cudnn_convolution_out
1851
1875
 
1852
1876
  - func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1853
1877
  dispatch:
@@ -2346,7 +2370,7 @@
2346
2370
  Meta: empty_meta_symint
2347
2371
  MkldnnCPU: empty_mkldnn
2348
2372
  SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2349
- SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2373
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2350
2374
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2351
2375
  tags: core
2352
2376
 
@@ -2452,7 +2476,7 @@
2452
2476
  CompositeExplicitAutograd: empty_like
2453
2477
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2454
2478
  SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2455
- SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
2479
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2456
2480
  NestedTensorCPU, NestedTensorCUDA: empty_like_nested
2457
2481
  autogen: empty_like.out
2458
2482
 
@@ -2954,12 +2978,14 @@
2954
2978
  dispatch:
2955
2979
  CPU: _fft_r2c_mkl
2956
2980
  CUDA: _fft_r2c_cufft
2981
+ MPS: _fft_r2c_mps
2957
2982
 
2958
2983
  - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
2959
2984
  variants: function
2960
2985
  dispatch:
2961
2986
  CPU: _fft_r2c_mkl_out
2962
2987
  CUDA: _fft_r2c_cufft_out
2988
+ MPS: _fft_r2c_mps_out
2963
2989
 
2964
2990
  # Complex to real inverse FFT
2965
2991
  - func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
@@ -2967,12 +2993,14 @@
2967
2993
  dispatch:
2968
2994
  CPU: _fft_c2r_mkl
2969
2995
  CUDA: _fft_c2r_cufft
2996
+ MPS: _fft_c2r_mps
2970
2997
 
2971
2998
  - func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
2972
2999
  variants: function
2973
3000
  dispatch:
2974
3001
  CPU: _fft_c2r_mkl_out
2975
3002
  CUDA: _fft_c2r_cufft_out
3003
+ MPS: _fft_c2r_mps_out
2976
3004
 
2977
3005
  # Standard complex to complex FFT (forward or backward)
2978
3006
  - func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
@@ -2980,12 +3008,14 @@
2980
3008
  dispatch:
2981
3009
  CPU: _fft_c2c_mkl
2982
3010
  CUDA: _fft_c2c_cufft
3011
+ MPS: _fft_c2c_mps
2983
3012
 
2984
3013
  - func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
2985
3014
  variants: function
2986
3015
  dispatch:
2987
3016
  CPU: _fft_c2c_mkl_out
2988
3017
  CUDA: _fft_c2c_cufft_out
3018
+ MPS: _fft_c2c_mps_out
2989
3019
 
2990
3020
  - func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
2991
3021
  device_check: NoCheck
@@ -3302,11 +3332,15 @@
3302
3332
  dispatch:
3303
3333
  CUDA: _cslt_compress
3304
3334
 
3305
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
3335
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3306
3336
  dispatch:
3307
3337
  CUDA: _cslt_sparse_mm
3308
3338
 
3309
- - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
3339
+ - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3340
+ dispatch:
3341
+ CUDA: _cslt_sparse_mm_search
3342
+
3343
+ - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
3310
3344
  dispatch:
3311
3345
  CUDA: _sparse_semi_structured_linear
3312
3346
 
@@ -4058,12 +4092,18 @@
4058
4092
 
4059
4093
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4060
4094
  dispatch:
4095
+ CPU: _convert_weight_to_int4pack_cpu
4061
4096
  CUDA: _convert_weight_to_int4pack_cuda
4062
4097
 
4063
4098
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4064
4099
  dispatch:
4100
+ CPU: _weight_int4pack_mm_cpu
4065
4101
  CUDA: _weight_int4pack_mm_cuda
4066
4102
 
4103
+ - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4104
+ dispatch:
4105
+ CPU: _weight_int8pack_mm_cpu
4106
+
4067
4107
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
4068
4108
  python_module: sparse
4069
4109
 
@@ -4439,7 +4479,6 @@
4439
4479
  MPS: pixel_shuffle_mps
4440
4480
  CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
4441
4481
  autogen: pixel_shuffle.out
4442
- tags: core
4443
4482
 
4444
4483
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
4445
4484
  dispatch:
@@ -4810,7 +4849,7 @@
4810
4849
  device_guard: False
4811
4850
  dispatch:
4812
4851
  CompositeImplicitAutograd: reshape_symint
4813
- CompositeImplicitAutogradNestedTensor: reshape_nested
4852
+ CompositeImplicitAutogradNestedTensor: reshape_nested_symint
4814
4853
 
4815
4854
  - func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
4816
4855
  variants: function
@@ -4969,6 +5008,7 @@
4969
5008
  device_check: NoCheck # TensorIterator
4970
5009
  python_module: nn
4971
5010
  dispatch:
5011
+ QuantizedCPU: gelu_quantized_cpu_
4972
5012
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
4973
5013
 
4974
5014
  - func: gelu(Tensor self, *, str approximate='none') -> Tensor
@@ -5356,6 +5396,21 @@
5356
5396
  CompositeExplicitAutograd: slice_backward
5357
5397
  autogen: slice_backward.out
5358
5398
 
5399
+ # NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
5400
+ # slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
5401
+ # of PT2 graph input subclass instances that are views. This means:
5402
+ # * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
5403
+ # * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
5404
+ # * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
5405
+ # input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
5406
+ # easier to implement for a subclass than as_strided()
5407
+ - func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
5408
+ variants: function, method
5409
+ device_check: NoCheck
5410
+ device_guard: False
5411
+ dispatch:
5412
+ CompositeExplicitAutograd: slice_inverse_symint
5413
+
5359
5414
  - func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
5360
5415
  variants: function, method
5361
5416
  device_check: NoCheck
@@ -5363,7 +5418,7 @@
5363
5418
  dispatch:
5364
5419
  CompositeExplicitAutogradNonFunctional: slice_scatter
5365
5420
  autogen: slice_scatter.out
5366
- tags: core
5421
+ tags: [core, view_copy]
5367
5422
 
5368
5423
  - func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
5369
5424
  variants: function, method
@@ -5562,6 +5617,14 @@
5562
5617
  SparseCPU: _sspaddmm_out_cpu
5563
5618
  SparseCUDA: _sspaddmm_out_cuda
5564
5619
 
5620
+ - func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
5621
+ dispatch:
5622
+ CompositeExplicitAutograd: _chunk_cat
5623
+
5624
+ - func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
5625
+ dispatch:
5626
+ CompositeExplicitAutograd: _chunk_cat_out
5627
+
5565
5628
  - func: stack(Tensor[] tensors, int dim=0) -> Tensor
5566
5629
  dispatch:
5567
5630
  CompositeExplicitAutograd: stack
@@ -5753,6 +5816,7 @@
5753
5816
  variants: function
5754
5817
  dispatch:
5755
5818
  CPU, CUDA: std_mean
5819
+ MPS: std_mean_mps
5756
5820
  autogen: std_mean.correction_out
5757
5821
 
5758
5822
  - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6008,7 +6072,6 @@
6008
6072
  CPU, MPS: roll
6009
6073
  CUDA: roll_cuda
6010
6074
  autogen: roll.out
6011
- tags: core
6012
6075
 
6013
6076
  # default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
6014
6077
 
@@ -6091,6 +6154,52 @@
6091
6154
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6092
6155
  autogen: _nested_view_from_buffer_copy.out
6093
6156
 
6157
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6158
+ variants: function
6159
+ device_check: NoCheck
6160
+ dispatch: {}
6161
+
6162
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6163
+ variants: function
6164
+ device_check: NoCheck
6165
+ tags: view_copy
6166
+ dispatch:
6167
+ CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
6168
+ autogen: _nested_view_from_jagged_copy.out
6169
+
6170
+ - func: _nested_get_values(Tensor(a) self) -> Tensor(a)
6171
+ variants: function
6172
+ device_check: NoCheck
6173
+ dispatch: {}
6174
+
6175
+ - func: _nested_get_values_copy(Tensor self) -> Tensor
6176
+ variants: function
6177
+ device_check: NoCheck
6178
+ tags: view_copy
6179
+ dispatch:
6180
+ CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
6181
+ autogen: _nested_get_values_copy.out
6182
+
6183
+ - func: _nested_get_offsets(Tensor self) -> Tensor
6184
+ variants: function
6185
+ device_check: NoCheck
6186
+ dispatch: {}
6187
+
6188
+ # returns undefined Tensor if no lengths present
6189
+ - func: _nested_get_lengths(Tensor self) -> Tensor
6190
+ variants: function
6191
+ device_check: NoCheck
6192
+ dispatch: {}
6193
+
6194
+ - func: _nested_get_ragged_idx(Tensor self) -> int
6195
+ variants: function
6196
+ device_check: NoCheck
6197
+ dispatch: {}
6198
+
6199
+ - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6200
+ category_override: dummy
6201
+ dispatch: {}
6202
+
6094
6203
  - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
6095
6204
  dispatch:
6096
6205
  # calls unsqueeze
@@ -6275,6 +6384,7 @@
6275
6384
  variants: function
6276
6385
  dispatch:
6277
6386
  CPU, CUDA: var_mean
6387
+ MPS: var_mean_mps
6278
6388
  autogen: var_mean.correction_out
6279
6389
 
6280
6390
  - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6295,15 +6405,13 @@
6295
6405
  device_check: NoCheck # TensorIterator
6296
6406
  variants: function, method
6297
6407
  dispatch:
6298
- CPU, CUDA: where
6299
- MPS: where_mps
6408
+ CPU, CUDA, MPS: where
6300
6409
  tags: [core, pointwise]
6301
6410
 
6302
6411
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6303
6412
  device_check: NoCheck # TensorIterator
6304
6413
  dispatch:
6305
- CPU, CUDA: where_self_out
6306
- MPS: where_self_out_mps
6414
+ CPU, CUDA, MPS: where_self_out
6307
6415
 
6308
6416
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6309
6417
  variants: function
@@ -6644,7 +6752,7 @@
6644
6752
  MPS: zero_mps_
6645
6753
  Meta: zero_meta_
6646
6754
  SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6647
- SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
6755
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6648
6756
  MkldnnCPU: mkldnn_zero_
6649
6757
  NestedTensorCPU, NestedTensorCUDA: zero_nested_
6650
6758
  autogen: zero, zero.out
@@ -6934,7 +7042,7 @@
6934
7042
  # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
6935
7043
  # the default would never make sense.
6936
7044
 
6937
- - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7045
+ - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6938
7046
  dispatch:
6939
7047
  CompositeExplicitAutograd: sparse_compressed_tensor
6940
7048
 
@@ -6951,7 +7059,10 @@
6951
7059
  - func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6952
7060
  - func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6953
7061
 
6954
- - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7062
+ - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7063
+ dispatch:
7064
+ CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
7065
+
6955
7066
  - func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6956
7067
  - func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6957
7068
  - func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -7037,7 +7148,7 @@
7037
7148
  dispatch:
7038
7149
  CPU, CUDA: sparse_dim_strided
7039
7150
  SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
7040
- SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
7151
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
7041
7152
  device_check: NoCheck
7042
7153
  device_guard: False
7043
7154
 
@@ -7054,7 +7165,7 @@
7054
7165
  dispatch:
7055
7166
  CPU, CUDA: dense_dim_strided
7056
7167
  SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
7057
- SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
7168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
7058
7169
  device_check: NoCheck
7059
7170
  device_guard: False
7060
7171
 
@@ -7070,7 +7181,7 @@
7070
7181
  variants: method
7071
7182
  dispatch:
7072
7183
  SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
7073
- SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
7184
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
7074
7185
  device_check: NoCheck
7075
7186
  device_guard: False
7076
7187
 
@@ -7133,7 +7244,7 @@
7133
7244
  variants: method
7134
7245
  dispatch:
7135
7246
  SparseCPU, SparseCUDA, SparseMeta: values_sparse
7136
- SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
7247
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
7137
7248
  NestedTensorCPU, NestedTensorCUDA: values_nested
7138
7249
  CompositeExplicitAutograd: values_default
7139
7250
  device_check: NoCheck
@@ -7142,7 +7253,7 @@
7142
7253
  - func: crow_indices(Tensor(a) self) -> Tensor(a)
7143
7254
  variants: method
7144
7255
  dispatch:
7145
- SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
7256
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
7146
7257
  CompositeExplicitAutograd: crow_indices_default
7147
7258
  device_check: NoCheck
7148
7259
  device_guard: False
@@ -7150,7 +7261,7 @@
7150
7261
  - func: col_indices(Tensor(a) self) -> Tensor(a)
7151
7262
  variants: method
7152
7263
  dispatch:
7153
- SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
7264
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
7154
7265
  CompositeExplicitAutograd: col_indices_default
7155
7266
  device_check: NoCheck
7156
7267
  device_guard: False
@@ -7158,7 +7269,7 @@
7158
7269
  - func: ccol_indices(Tensor(a) self) -> Tensor(a)
7159
7270
  variants: method
7160
7271
  dispatch:
7161
- SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
7272
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
7162
7273
  CompositeExplicitAutograd: ccol_indices_default
7163
7274
  device_check: NoCheck
7164
7275
  device_guard: False
@@ -7166,7 +7277,7 @@
7166
7277
  - func: row_indices(Tensor(a) self) -> Tensor(a)
7167
7278
  variants: method
7168
7279
  dispatch:
7169
- SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
7280
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
7170
7281
  CompositeExplicitAutograd: row_indices_default
7171
7282
  device_check: NoCheck
7172
7283
  device_guard: False
@@ -7675,6 +7786,7 @@
7675
7786
  dispatch:
7676
7787
  CPU, CUDA, Meta, MPS: set_
7677
7788
  autogen: set.source_Storage, set.source_Storage_out
7789
+ tags: inplace_view
7678
7790
 
7679
7791
  - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7680
7792
  variants: method
@@ -7687,6 +7799,7 @@
7687
7799
  MPS: set_storage_mps_
7688
7800
  QuantizedCPU, QuantizedCUDA: set_storage_quantized_
7689
7801
  autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
7802
+ tags: inplace_view
7690
7803
 
7691
7804
  - func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7692
7805
  variants: method
@@ -7694,6 +7807,7 @@
7694
7807
  device_guard: False
7695
7808
  dispatch:
7696
7809
  CompositeImplicitAutograd: set__symint
7810
+ tags: inplace_view
7697
7811
 
7698
7812
  - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
7699
7813
  variants: method
@@ -7702,6 +7816,7 @@
7702
7816
  dispatch:
7703
7817
  CPU, CUDA, Meta, MPS: set_tensor_
7704
7818
  autogen: set.source_Tensor, set.source_Tensor_out
7819
+ tags: inplace_view
7705
7820
 
7706
7821
  - func: set_(Tensor(a!) self) -> Tensor(a!)
7707
7822
  variants: method
@@ -7711,6 +7826,7 @@
7711
7826
  Meta: set_meta_
7712
7827
  MPS: set_mps_
7713
7828
  autogen: set, set.out
7829
+ tags: inplace_view
7714
7830
 
7715
7831
  # Not making it CompositeImplicitAutograd because lift
7716
7832
  # should be a primitive w.r.t. functorch
@@ -10112,12 +10228,14 @@
10112
10228
  variants: function
10113
10229
  dispatch:
10114
10230
  CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
10231
+ CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
10115
10232
  autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
10116
10233
 
10117
10234
  - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
10118
10235
  variants: function
10119
10236
  dispatch:
10120
10237
  CUDA: _amp_update_scale_cuda_
10238
+ CPU: _amp_update_scale_cpu_
10121
10239
  autogen: _amp_update_scale, _amp_update_scale.out
10122
10240
 
10123
10241
  #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
@@ -12341,6 +12459,7 @@
12341
12459
  dispatch:
12342
12460
  CPU: upsample_linear1d_out_cpu
12343
12461
  CUDA: upsample_linear1d_out_cuda
12462
+ MPS: upsample_linear1d_out_mps
12344
12463
 
12345
12464
  - func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
12346
12465
  python_module: nn
@@ -12352,6 +12471,7 @@
12352
12471
  dispatch:
12353
12472
  CPU: upsample_linear1d_backward_out_cpu
12354
12473
  CUDA: upsample_linear1d_backward_out_cuda
12474
+ MPS: upsample_linear1d_backward_out_mps
12355
12475
 
12356
12476
  - func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
12357
12477
  python_module: nn
@@ -12824,7 +12944,7 @@
12824
12944
  SparseMeta: isinf_sparse_meta
12825
12945
  SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
12826
12946
  autogen: isinf.out
12827
- tags: core
12947
+ tags: [core, pointwise]
12828
12948
 
12829
12949
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
12830
12950
  variants: method
@@ -13750,11 +13870,18 @@
13750
13870
  dispatch:
13751
13871
  CPU, CUDA: linalg_eig_out
13752
13872
 
13873
+ - func: _linalg_eigvals(Tensor self) -> Tensor
13874
+ python_module: linalg
13875
+ dispatch:
13876
+ CPU, CUDA: _linalg_eigvals
13877
+
13753
13878
  - func: linalg_eigvals(Tensor self) -> Tensor
13754
13879
  python_module: linalg
13755
13880
 
13756
13881
  - func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13757
13882
  python_module: linalg
13883
+ dispatch:
13884
+ CPU, CUDA: linalg_eigvals_out
13758
13885
 
13759
13886
  # This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
13760
13887
  # `linalg.eigvalsh` as composite functions that call this one
@@ -14058,6 +14185,12 @@
14058
14185
  # It is undocumented and should not be used outside of tests.
14059
14186
  - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
14060
14187
 
14188
+ # Note: for testing COW materialization within `at::parallel_for` loop function
14189
+ - func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
14190
+ variants: function
14191
+ dispatch:
14192
+ CompositeExplicitAutograd: _test_parallel_materialize
14193
+
14061
14194
  # Note: this function is only for testing.
14062
14195
  - func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
14063
14196
  python_module: nn
@@ -14392,6 +14525,7 @@
14392
14525
  variants: function
14393
14526
  dispatch:
14394
14527
  CompositeExplicitAutograd: split_with_sizes_copy_out
14528
+ CUDA: split_with_sizes_copy_out_cuda
14395
14529
 
14396
14530
  - func: view_copy(Tensor self, SymInt[] size) -> Tensor
14397
14531
  variants: function
@@ -14468,19 +14602,28 @@
14468
14602
 
14469
14603
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14470
14604
  dispatch:
14471
- CPU: _scaled_dot_product_flash_attention_cpu
14472
14605
  CUDA: _scaled_dot_product_flash_attention_cuda
14473
14606
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
14474
14607
  tags: nondeterministic_seeded
14475
14608
 
14609
+ - func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
14610
+ dispatch:
14611
+ CPU: _scaled_dot_product_flash_attention_cpu
14612
+ tags: nondeterministic_seeded
14613
+
14476
14614
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14477
14615
  device_check: NoCheck
14478
14616
  variants: function
14479
14617
  dispatch:
14480
- CPU: _scaled_dot_product_flash_attention_backward_cpu
14481
14618
  CUDA: _scaled_dot_product_flash_attention_backward_cuda
14482
14619
  NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
14483
14620
 
14621
+ - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14622
+ device_check: NoCheck
14623
+ variants: function
14624
+ dispatch:
14625
+ CPU: _scaled_dot_product_flash_attention_cpu_backward
14626
+
14484
14627
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14485
14628
  dispatch:
14486
14629
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14493,6 +14636,11 @@
14493
14636
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14494
14637
  tags: nondeterministic_seeded
14495
14638
 
14639
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14640
+ dispatch:
14641
+ CUDA: _scaled_dot_product_cudnn_attention_cuda
14642
+ tags: nondeterministic_seeded
14643
+
14496
14644
  - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14497
14645
  variants: function
14498
14646
  dispatch:
@@ -14505,8 +14653,8 @@
14505
14653
  dispatch:
14506
14654
  CUDA: _flash_attention_backward
14507
14655
 
14508
- # Returns ouput, logsumexp if compute_logsumexp
14509
- - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14656
+ # Returns output, logsumexp if compute_logsumexp
14657
+ - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14510
14658
  variants: function
14511
14659
  dispatch:
14512
14660
  CUDA: _efficient_attention_forward
@@ -15345,6 +15493,22 @@
15345
15493
  CUDA: _fused_adamw_kernel_cuda_
15346
15494
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15347
15495
 
15496
+ - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15497
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15498
+ variants: function
15499
+ dispatch:
15500
+ CUDA: _fused_sgd_kernel_cuda_
15501
+ autogen: _fused_sgd, _fused_sgd.out
15502
+
15503
+ - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15504
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15505
+ # but still skip the device check as the Tensor LR can be on CPU
15506
+ device_check: NoCheck
15507
+ variants: function
15508
+ dispatch:
15509
+ CUDA: _fused_sgd_kernel_cuda_
15510
+ autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15511
+
15348
15512
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
15349
15513
  - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
15350
15514
  variants: function
data/ext/torch/utils.h CHANGED
@@ -6,7 +6,7 @@
6
6
  #include <rice/stl.hpp>
7
7
 
8
8
  static_assert(
9
- TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 2,
9
+ TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 3,
10
10
  "Incompatible LibTorch version"
11
11
  );
12
12
 
@@ -0,0 +1,20 @@
1
+ module Torch
2
+ module NN
3
+ class ELU < Module
4
+ def initialize(alpha: 1, inplace: false)
5
+ super()
6
+ @alpha = alpha
7
+ @inplace = inplace
8
+ end
9
+
10
+ def forward(input)
11
+ F.elu(input, alpha: @alpha, inplace: @inplace)
12
+ end
13
+
14
+ def extra_inspect
15
+ inplace_str = @inplace ? ", inplace: true" : ""
16
+ format("alpha: %s", @alpha) + inplace_str
17
+ end
18
+ end
19
+ end
20
+ end
@@ -174,6 +174,18 @@ module Torch
174
174
 
175
175
  # activation layers
176
176
 
177
+ def elu(input, alpha: 1, inplace: false)
178
+ if inplace
179
+ NN.elu!(input, alpha)
180
+ else
181
+ NN.elu(input, alpha)
182
+ end
183
+ end
184
+
185
+ def gelu(input, approximate: 'none')
186
+ NN.gelu(input, approximate: approximate)
187
+ end
188
+
177
189
  def hardshrink(input, lambd = 0.5)
178
190
  Torch.hardshrink(input, lambd)
179
191
  end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class GELU < Module
4
+ def initialize(approximate: 'none')
5
+ super()
6
+ @approximate = approximate
7
+ end
8
+
9
+ def forward(input)
10
+ F.gelu(input, approximate: @approximate)
11
+ end
12
+
13
+ def extra_inspect
14
+ "approximate: #{@approximate.inspect}"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -13,7 +13,7 @@ module Torch
13
13
 
14
14
  def extra_inspect
15
15
  inplace_str = @inplace ? ", inplace: true" : ""
16
- format("negative_slope: %s%s", @negative_slope, inplace_str)
16
+ format("negative_slope: %s", @negative_slope) + inplace_str
17
17
  end
18
18
  end
19
19
  end
data/lib/torch/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Torch
2
- VERSION = "0.15.0"
2
+ VERSION = "0.16.0"
3
3
  end
data/lib/torch.rb CHANGED
@@ -123,6 +123,8 @@ require_relative "torch/nn/dropout3d"
123
123
  require_relative "torch/nn/feature_alpha_dropout"
124
124
 
125
125
  # nn activations
126
+ require_relative "torch/nn/elu"
127
+ require_relative "torch/nn/gelu"
126
128
  require_relative "torch/nn/hardshrink"
127
129
  require_relative "torch/nn/leaky_relu"
128
130
  require_relative "torch/nn/log_sigmoid"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: torch-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-29 00:00:00.000000000 Z
11
+ date: 2024-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -43,24 +43,17 @@ files:
43
43
  - ext/torch/ext.cpp
44
44
  - ext/torch/extconf.rb
45
45
  - ext/torch/fft.cpp
46
- - ext/torch/fft_functions.h
47
46
  - ext/torch/generator.cpp
48
47
  - ext/torch/ivalue.cpp
49
48
  - ext/torch/linalg.cpp
50
- - ext/torch/linalg_functions.h
51
49
  - ext/torch/nn.cpp
52
- - ext/torch/nn_functions.h
53
50
  - ext/torch/random.cpp
54
51
  - ext/torch/ruby_arg_parser.cpp
55
52
  - ext/torch/ruby_arg_parser.h
56
- - ext/torch/sparse_functions.h
57
53
  - ext/torch/special.cpp
58
- - ext/torch/special_functions.h
59
54
  - ext/torch/templates.h
60
55
  - ext/torch/tensor.cpp
61
- - ext/torch/tensor_functions.h
62
56
  - ext/torch/torch.cpp
63
- - ext/torch/torch_functions.h
64
57
  - ext/torch/utils.h
65
58
  - ext/torch/wrap_outputs.h
66
59
  - lib/torch-rb.rb
@@ -103,12 +96,14 @@ files:
103
96
  - lib/torch/nn/dropout2d.rb
104
97
  - lib/torch/nn/dropout3d.rb
105
98
  - lib/torch/nn/dropoutnd.rb
99
+ - lib/torch/nn/elu.rb
106
100
  - lib/torch/nn/embedding.rb
107
101
  - lib/torch/nn/embedding_bag.rb
108
102
  - lib/torch/nn/feature_alpha_dropout.rb
109
103
  - lib/torch/nn/fold.rb
110
104
  - lib/torch/nn/functional.rb
111
105
  - lib/torch/nn/functional_attention.rb
106
+ - lib/torch/nn/gelu.rb
112
107
  - lib/torch/nn/group_norm.rb
113
108
  - lib/torch/nn/gru.rb
114
109
  - lib/torch/nn/hardshrink.rb
@@ -230,14 +225,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
230
225
  requirements:
231
226
  - - ">="
232
227
  - !ruby/object:Gem::Version
233
- version: '3'
228
+ version: '3.1'
234
229
  required_rubygems_version: !ruby/object:Gem::Requirement
235
230
  requirements:
236
231
  - - ">="
237
232
  - !ruby/object:Gem::Version
238
233
  version: '0'
239
234
  requirements: []
240
- rubygems_version: 3.5.3
235
+ rubygems_version: 3.5.11
241
236
  signing_key:
242
237
  specification_version: 4
243
238
  summary: Deep learning for Ruby, powered by LibTorch
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_fft_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_linalg_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_nn_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_sparse_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_special_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_tensor_functions(Rice::Module& m);
@@ -1,6 +0,0 @@
1
- // generated by rake generate:functions
2
- // do not edit by hand
3
-
4
- #pragma once
5
-
6
- void add_torch_functions(Rice::Module& m);