torch-rb 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 03164cc479d8f8a32f0669d597e8fe5310d91955e6954cfdc0fffdc8983c5768
4
- data.tar.gz: 87fc733016b6f4489b38a419a3879cacbdb1e190cfaa5c02397aceb57c012d16
3
+ metadata.gz: 39fe7c12b647086c46318bb77d1aac80bac1874cf3b71f1a0db4a1671322799e
4
+ data.tar.gz: 81a1c4d3ad4474b06dac82df2c38c498c7b4f5296e700d3bc98f596df14d10a4
5
5
  SHA512:
6
- metadata.gz: 6ba0480138a10ba43dff625dc1bcf99e2287f238dc4607ea6813e82914f1e335133f55408fa59579343b161c54850316f744c594cf6687b7f2de64a0d71746d1
7
- data.tar.gz: 859015641dd14bf919a7982c6673acb296f858518552b8c924fc7e59b9c1b2a9491aa598c01b019b392c8c2bba7b9f65ff0923f838e6cbde7ddaede9c4b69191
6
+ metadata.gz: d3677acb4621a6e47194acb81612a369716dcecb86518b1f0840e4d0e3e9fe48e8eaa3c7888cb1adc0c8e4ca78aa920a23c5c452d7a33fde92461fc6fb27b9bc
7
+ data.tar.gz: a02603711a10d16c6521c55c4389ade96503e60d21748d566552679acc2ab10805ada2c8083813b74ccfbba0742b68adc3cff5461b9dd000761326ee6e1ccffb
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.20.0 (2025-04-26)
2
+
3
+ - Updated LibTorch to 2.7.0
4
+ - Added `Normal` distribution
5
+ - Fixed `SystemStackError` with certain tensor comparisons
6
+
7
+ ## 0.19.1 (2025-02-10)
8
+
9
+ - Fixed error with Rice 4.5
10
+
1
11
  ## 0.19.0 (2025-01-29)
2
12
 
3
13
  - Updated LibTorch to 2.6.0
data/README.md CHANGED
@@ -17,7 +17,7 @@ Check out:
17
17
  First, [download LibTorch](https://pytorch.org/get-started/locally/). For Mac arm64, use:
18
18
 
19
19
  ```sh
20
- curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.6.0.zip > libtorch.zip
20
+ curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.7.0.zip > libtorch.zip
21
21
  unzip -q libtorch.zip
22
22
  ```
23
23
 
@@ -413,6 +413,7 @@ Here’s the list of compatible versions.
413
413
 
414
414
  Torch.rb | LibTorch
415
415
  --- | ---
416
+ 0.20.x | 2.7.x
416
417
  0.19.x | 2.6.x
417
418
  0.18.x | 2.5.x
418
419
  0.17.x | 2.4.x
@@ -403,6 +403,7 @@
403
403
  variants: function, method
404
404
  dispatch:
405
405
  CPU, CUDA: angle
406
+ MPS: angle_mps
406
407
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
407
408
  tags: pointwise
408
409
 
@@ -410,6 +411,7 @@
410
411
  device_check: NoCheck # TensorIterator
411
412
  dispatch:
412
413
  CPU, CUDA: angle_out
414
+ MPS: angle_out_mps
413
415
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
414
416
  tags: pointwise
415
417
 
@@ -1456,8 +1458,7 @@
1456
1458
  structured: True
1457
1459
  structured_inherits: TensorIteratorBase
1458
1460
  dispatch:
1459
- CPU, CUDA: ceil_out
1460
- MPS: ceil_out_mps
1461
+ CPU, CUDA, MPS: ceil_out
1461
1462
  SparseCPU, SparseCUDA: ceil_sparse_out
1462
1463
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1463
1464
  tags: pointwise
@@ -2576,8 +2577,7 @@
2576
2577
  structured: True
2577
2578
  structured_inherits: TensorIteratorBase
2578
2579
  dispatch:
2579
- CPU, CUDA: exp_out
2580
- MPS: exp_out_mps
2580
+ CPU, CUDA, MPS: exp_out
2581
2581
  tags: pointwise
2582
2582
 
2583
2583
  - func: exp2(Tensor self) -> Tensor
@@ -2740,8 +2740,7 @@
2740
2740
  structured: True
2741
2741
  structured_inherits: TensorIteratorBase
2742
2742
  dispatch:
2743
- CPU, CUDA: floor_out
2744
- MPS: floor_out_mps
2743
+ CPU, CUDA, MPS: floor_out
2745
2744
  SparseCPU, SparseCUDA: floor_sparse_out
2746
2745
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2747
2746
  tags: pointwise
@@ -3000,6 +2999,7 @@
3000
2999
  CPU: _fft_r2c_mkl
3001
3000
  CUDA: _fft_r2c_cufft
3002
3001
  MPS: _fft_r2c_mps
3002
+ tags: core
3003
3003
 
3004
3004
  - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
3005
3005
  variants: function
@@ -3864,6 +3864,7 @@
3864
3864
  device_guard: False
3865
3865
  dispatch:
3866
3866
  CompositeImplicitAutograd: value_selecting_reduction_backward_symint
3867
+ NestedTensorCPU, NestedTensorCUDA: value_selecting_reduction_backward_nested_symint
3867
3868
 
3868
3869
  - func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
3869
3870
  variants: function, method
@@ -4177,6 +4178,14 @@
4177
4178
  dispatch:
4178
4179
  CPU: _weight_int4pack_mm_cpu
4179
4180
 
4181
+ - func: _dyn_quant_pack_4bit_weight(Tensor weights, Tensor scales_zeros, Tensor? bias, int block_size, int in_features, int out_features) -> Tensor
4182
+ dispatch:
4183
+ CPU: _dyn_quant_pack_4bit_weight_cpu
4184
+
4185
+ - func: _dyn_quant_matmul_4bit(Tensor inp, Tensor packed_weights, int block_size, int in_features, int out_features) -> Tensor
4186
+ dispatch:
4187
+ CPU: _dyn_quant_matmul_4bit_cpu
4188
+
4180
4189
  - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4181
4190
  dispatch:
4182
4191
  CPU: _weight_int8pack_mm_cpu
@@ -4989,9 +4998,7 @@
4989
4998
  structured: True
4990
4999
  structured_inherits: TensorIteratorBase
4991
5000
  dispatch:
4992
- CPU: round_out
4993
- CUDA: round_out
4994
- MPS: round_out_mps
5001
+ CPU, CUDA, MPS: round_out
4995
5002
  SparseCPU, SparseCUDA: round_sparse_out
4996
5003
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
4997
5004
  tags: pointwise
@@ -5013,8 +5020,7 @@
5013
5020
  structured: True
5014
5021
  structured_inherits: TensorIteratorBase
5015
5022
  dispatch:
5016
- CPU: round_decimals_out
5017
- CUDA: round_decimals_out
5023
+ CPU, CUDA, MPS: round_decimals_out
5018
5024
  tags: pointwise
5019
5025
 
5020
5026
  - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
@@ -5376,7 +5382,7 @@
5376
5382
  structured: True
5377
5383
  structured_inherits: TensorIteratorBase
5378
5384
  dispatch:
5379
- CPU, CUDA: sinc_out
5385
+ CPU, CUDA, MPS: sinc_out
5380
5386
  tags: pointwise
5381
5387
 
5382
5388
  - func: sinh(Tensor self) -> Tensor
@@ -5747,11 +5753,11 @@
5747
5753
  - func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
5748
5754
 
5749
5755
  # Overload without center & pad mode, needed for forward-compatibility
5750
- - func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
5756
+ - func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
5751
5757
  variants: function, method
5752
5758
  cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
5753
5759
 
5754
- - func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
5760
+ - func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
5755
5761
  variants: function, method
5756
5762
 
5757
5763
  - func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
@@ -5856,8 +5862,7 @@
5856
5862
  structured: True
5857
5863
  structured_inherits: TensorIteratorBase
5858
5864
  dispatch:
5859
- CPU, CUDA: sqrt_out
5860
- MPS: sqrt_out_mps
5865
+ CPU, CUDA, MPS: sqrt_out
5861
5866
  SparseCPU, SparseCUDA: sqrt_sparse_out
5862
5867
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5863
5868
  tags: pointwise
@@ -6048,8 +6053,7 @@
6048
6053
  structured: True
6049
6054
  structured_inherits: TensorIteratorBase
6050
6055
  dispatch:
6051
- CPU, CUDA: tanh_out
6052
- MPS: tanh_out_mps
6056
+ CPU, CUDA, MPS: tanh_out
6053
6057
  SparseCPU, SparseCUDA: tanh_sparse_out
6054
6058
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
6055
6059
  tags: pointwise
@@ -6340,8 +6344,7 @@
6340
6344
  structured_inherits: TensorIteratorBase
6341
6345
  device_check: NoCheck # TensorIterator
6342
6346
  dispatch:
6343
- CPU, CUDA: trunc_out
6344
- MPS: trunc_out_mps
6347
+ CPU, CUDA, MPS: trunc_out
6345
6348
  SparseCPU, SparseCUDA: trunc_sparse_out
6346
6349
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6347
6350
  tags: pointwise
@@ -7070,6 +7073,12 @@
7070
7073
  dispatch:
7071
7074
  CUDA: _scaled_mm_out_cuda
7072
7075
 
7076
+
7077
+ - func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
7078
+ variants: function
7079
+ dispatch:
7080
+ CUDA: _scaled_grouped_mm_cuda
7081
+
7073
7082
  # NOTE [ Sparse: autograd and API ]
7074
7083
  #
7075
7084
  #
@@ -9274,12 +9283,12 @@
9274
9283
  MPS: nonzero_mps
9275
9284
  tags: [dynamic_output_shape, core]
9276
9285
 
9277
- - func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
9286
+ - func: nonzero_static.out(Tensor self, *, SymInt size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
9278
9287
  dispatch:
9279
9288
  CPU: nonzero_static_out_cpu
9280
9289
  CUDA: nonzero_static_out_cuda
9281
9290
 
9282
- - func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
9291
+ - func: nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor
9283
9292
  variants: method, function
9284
9293
  dispatch:
9285
9294
  CPU: nonzero_static_cpu
@@ -9428,11 +9437,13 @@
9428
9437
  - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
9429
9438
  dispatch:
9430
9439
  CPU, CUDA: cholesky_out
9440
+ MPS: cholesky_mps_out
9431
9441
 
9432
9442
  - func: cholesky(Tensor self, bool upper=False) -> Tensor
9433
9443
  variants: method, function
9434
9444
  dispatch:
9435
9445
  CPU, CUDA: cholesky
9446
+ MPS: cholesky_mps
9436
9447
 
9437
9448
  - func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
9438
9449
  dispatch:
@@ -9506,6 +9517,7 @@
9506
9517
  structured: True
9507
9518
  dispatch:
9508
9519
  CPU, CUDA: lu_unpack_out
9520
+ MPS: lu_unpack_out_mps
9509
9521
 
9510
9522
  # TODO: remove dispatch section when porting TH CUDA to ATen
9511
9523
  - func: multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
@@ -9602,8 +9614,7 @@
9602
9614
  structured: True
9603
9615
  structured_inherits: TensorIteratorBase
9604
9616
  dispatch:
9605
- CPU, CUDA: erfinv_out
9606
- MPS: erfinv_out_mps
9617
+ CPU, CUDA, MPS: erfinv_out
9607
9618
  SparseCPU, SparseCUDA: erfinv_sparse_out
9608
9619
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9609
9620
  tags: pointwise
@@ -12711,6 +12722,7 @@
12711
12722
  dispatch:
12712
12723
  CPU: _upsample_bilinear2d_aa_out_cpu
12713
12724
  CUDA: _upsample_bilinear2d_aa_out_cuda
12725
+ MPS: _upsample_bilinear2d_aa_out_mps
12714
12726
 
12715
12727
  - func: _upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12716
12728
  python_module: nn
@@ -13058,7 +13070,6 @@
13058
13070
  autogen: _slow_conv2d_backward.output_mask_out
13059
13071
 
13060
13072
  - func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
13061
- use_const_ref_for_mutable_tensors: True
13062
13073
  python_module: nn
13063
13074
  dispatch:
13064
13075
  CUDA: conv_depthwise2d_cuda_out
@@ -13225,7 +13236,7 @@
13225
13236
  python_module: special
13226
13237
  variants: function
13227
13238
  dispatch:
13228
- CPU, CUDA: special_entr_out
13239
+ CPU, CUDA, MPS: special_entr_out
13229
13240
  tags: pointwise
13230
13241
 
13231
13242
  - func: special_ndtri(Tensor self) -> Tensor
@@ -13372,7 +13383,7 @@
13372
13383
  python_module: special
13373
13384
  variants: function
13374
13385
  dispatch:
13375
- CPU, CUDA: special_xlog1py_out
13386
+ CPU, CUDA, MPS: special_xlog1py_out
13376
13387
  tags: pointwise
13377
13388
 
13378
13389
  - func: special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -13451,7 +13462,7 @@
13451
13462
  python_module: special
13452
13463
  variants: function
13453
13464
  dispatch:
13454
- CPU, CUDA: special_zeta_out
13465
+ CPU, CUDA, MPS: special_zeta_out
13455
13466
  tags: pointwise
13456
13467
 
13457
13468
  - func: special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -13744,7 +13755,6 @@
13744
13755
  CompositeImplicitAutograd: fft_hfft2_symint
13745
13756
 
13746
13757
  - func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
13747
- use_const_ref_for_mutable_tensors: True
13748
13758
  python_module: fft
13749
13759
  variants: function
13750
13760
  dispatch:
@@ -13758,7 +13768,6 @@
13758
13768
  CompositeImplicitAutograd: fft_ihfft2_symint
13759
13769
 
13760
13770
  - func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
13761
- use_const_ref_for_mutable_tensors: True
13762
13771
  python_module: fft
13763
13772
  variants: function
13764
13773
  dispatch:
@@ -13820,7 +13829,6 @@
13820
13829
  CompositeImplicitAutograd: fft_hfftn_symint
13821
13830
 
13822
13831
  - func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
13823
- use_const_ref_for_mutable_tensors: True
13824
13832
  python_module: fft
13825
13833
  variants: function
13826
13834
  dispatch:
@@ -13834,7 +13842,6 @@
13834
13842
  CompositeImplicitAutograd: fft_ihfftn_symint
13835
13843
 
13836
13844
  - func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
13837
- use_const_ref_for_mutable_tensors: True
13838
13845
  python_module: fft
13839
13846
  variants: function
13840
13847
  dispatch:
@@ -13891,6 +13898,7 @@
13891
13898
  structured: True
13892
13899
  dispatch:
13893
13900
  CPU, CUDA: linalg_cholesky_ex_out
13901
+ MPS: linalg_cholesky_ex_out_mps
13894
13902
 
13895
13903
  - func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
13896
13904
  python_module: linalg
@@ -13937,6 +13945,7 @@
13937
13945
  structured: True
13938
13946
  dispatch:
13939
13947
  CPU, CUDA: linalg_lu_factor_ex_out
13948
+ MPS: linalg_lu_factor_ex_out_mps
13940
13949
 
13941
13950
  # linalg.lu
13942
13951
  - func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
@@ -13971,7 +13980,7 @@
13971
13980
  - func: _linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)
13972
13981
  structured: True
13973
13982
  dispatch:
13974
- CPU, CUDA: _linalg_det_out
13983
+ CPU, CUDA, MPS: _linalg_det_out
13975
13984
 
13976
13985
  - func: linalg_det(Tensor A) -> Tensor
13977
13986
  python_module: linalg
@@ -14058,7 +14067,7 @@
14058
14067
  - func: _linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)
14059
14068
  structured: True
14060
14069
  dispatch:
14061
- CPU, CUDA: _linalg_slogdet_out
14070
+ CPU, CUDA, MPS: _linalg_slogdet_out
14062
14071
 
14063
14072
  - func: linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)
14064
14073
  python_module: linalg
@@ -14300,6 +14309,7 @@
14300
14309
  structured: True
14301
14310
  dispatch:
14302
14311
  CPU, CUDA: _linalg_solve_ex_out
14312
+ MPS: _linalg_solve_ex_out_mps
14303
14313
 
14304
14314
  - func: linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)
14305
14315
  python_module: linalg
@@ -14837,6 +14847,7 @@
14837
14847
  Meta: _fused_sdp_choice_meta
14838
14848
  CPU, NestedTensorCPU: _fused_sdp_choice_cpp
14839
14849
  CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
14850
+ XPU: _fused_sdp_choice_xpu
14840
14851
  tags: nondeterministic_seeded
14841
14852
 
14842
14853
  - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
@@ -14848,7 +14859,7 @@
14848
14859
  MPS: _scaled_dot_product_attention_math_mps
14849
14860
  tags: nondeterministic_seeded
14850
14861
 
14851
- - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14862
+ - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
14852
14863
  dispatch:
14853
14864
  CUDA: _scaled_dot_product_flash_attention_cuda
14854
14865
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
@@ -14862,6 +14873,7 @@
14862
14873
  - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14863
14874
  dispatch:
14864
14875
  CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
14876
+ XPU: _scaled_dot_product_fused_attention_overrideable_xpu
14865
14877
  tags: nondeterministic_seeded
14866
14878
 
14867
14879
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
@@ -14898,6 +14910,7 @@
14898
14910
  - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14899
14911
  dispatch:
14900
14912
  CUDA: _scaled_dot_product_cudnn_attention_cuda
14913
+ NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_cuda
14901
14914
  tags: nondeterministic_seeded
14902
14915
 
14903
14916
  - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
@@ -14905,13 +14918,13 @@
14905
14918
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
14906
14919
  tags: nondeterministic_seeded
14907
14920
 
14908
- - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14921
+ - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
14909
14922
  variants: function
14910
14923
  dispatch:
14911
14924
  CUDA: _flash_attention_forward
14912
14925
  tags: nondeterministic_seeded
14913
14926
 
14914
- - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
14927
+ - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor rng_state, Tensor unused, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
14915
14928
  device_check: NoCheck
14916
14929
  variants: function
14917
14930
  dispatch:
@@ -14930,6 +14943,11 @@
14930
14943
  dispatch:
14931
14944
  CUDA: _efficient_attention_backward
14932
14945
 
14946
+ - func: _cudnn_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14947
+ dispatch:
14948
+ CUDA: _cudnn_attention_forward
14949
+ tags: nondeterministic_seeded
14950
+
14933
14951
  - func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
14934
14952
  variants: function
14935
14953
  dispatch:
@@ -15710,7 +15728,7 @@
15710
15728
 
15711
15729
  - func: special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
15712
15730
  dispatch:
15713
- CPU, CUDA: special_spherical_bessel_j0_out
15731
+ CPU, CUDA, MPS: special_spherical_bessel_j0_out
15714
15732
  python_module: special
15715
15733
  structured_inherits: TensorIteratorBase
15716
15734
  structured: True
@@ -46,10 +46,7 @@ namespace Rice::detail
46
46
  template<typename T>
47
47
  struct Type<c10::complex<T>>
48
48
  {
49
- static bool verify()
50
- {
51
- return true;
52
- }
49
+ static bool verify() { return true; }
53
50
  };
54
51
 
55
52
  template<typename T>
@@ -66,6 +63,8 @@ namespace Rice::detail
66
63
  class From_Ruby<c10::complex<T>>
67
64
  {
68
65
  public:
66
+ Convertible is_convertible(VALUE value) { return Convertible::Cast; }
67
+
69
68
  c10::complex<T> convert(VALUE x)
70
69
  {
71
70
  VALUE real = rb_funcall(x, rb_intern("real"), 0);
@@ -80,16 +79,15 @@ namespace Rice::detail
80
79
  template<>
81
80
  struct Type<FanModeType>
82
81
  {
83
- static bool verify()
84
- {
85
- return true;
86
- }
82
+ static bool verify() { return true; }
87
83
  };
88
84
 
89
85
  template<>
90
86
  class From_Ruby<FanModeType>
91
87
  {
92
88
  public:
89
+ Convertible is_convertible(VALUE value) { return Convertible::Cast; }
90
+
93
91
  FanModeType convert(VALUE x)
94
92
  {
95
93
  auto s = String(x).str();
@@ -106,16 +104,15 @@ namespace Rice::detail
106
104
  template<>
107
105
  struct Type<NonlinearityType>
108
106
  {
109
- static bool verify()
110
- {
111
- return true;
112
- }
107
+ static bool verify() { return true; }
113
108
  };
114
109
 
115
110
  template<>
116
111
  class From_Ruby<NonlinearityType>
117
112
  {
118
113
  public:
114
+ Convertible is_convertible(VALUE value) { return Convertible::Cast; }
115
+
119
116
  NonlinearityType convert(VALUE x)
120
117
  {
121
118
  auto s = String(x).str();
@@ -150,16 +147,15 @@ namespace Rice::detail
150
147
  template<>
151
148
  struct Type<Scalar>
152
149
  {
153
- static bool verify()
154
- {
155
- return true;
156
- }
150
+ static bool verify() { return true; }
157
151
  };
158
152
 
159
153
  template<>
160
154
  class From_Ruby<Scalar>
161
155
  {
162
156
  public:
157
+ Convertible is_convertible(VALUE value) { return Convertible::Cast; }
158
+
163
159
  Scalar convert(VALUE x)
164
160
  {
165
161
  if (FIXNUM_P(x)) {
data/ext/torch/torch.cpp CHANGED
@@ -21,7 +21,13 @@ torch::Tensor make_tensor(Rice::Array a, const std::vector<int64_t> &size, const
21
21
  }
22
22
 
23
23
  void init_torch(Rice::Module& m) {
24
- register_handler<torch::Error>(handle_global_error);
24
+ Rice::detail::Registries::instance.handlers.set([]() {
25
+ try {
26
+ throw;
27
+ } catch (const torch::Error& ex) {
28
+ handle_global_error(ex);
29
+ }
30
+ });
25
31
  add_torch_functions(m);
26
32
  m.define_singleton_function(
27
33
  "grad_enabled?",
data/ext/torch/utils.h CHANGED
@@ -6,7 +6,7 @@
6
6
  #include <rice/stl.hpp>
7
7
 
8
8
  static_assert(
9
- TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 6,
9
+ TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 7,
10
10
  "Incompatible LibTorch version"
11
11
  );
12
12
 
@@ -0,0 +1,26 @@
1
+ module Torch
2
+ module Distributions
3
+ class Distribution
4
+ def initialize(batch_shape: [], event_shape: [], validate_args: nil)
5
+ @batch_shape = batch_shape
6
+ @event_shape = event_shape
7
+ if !validate_args.nil?
8
+ @validate_args = validate_args
9
+ end
10
+ if @validate_args
11
+ raise NotImplementedYet
12
+ end
13
+ super()
14
+ end
15
+
16
+ private
17
+
18
+ def _extended_shape(sample_shape: [])
19
+ if !sample_shape.is_a?(Array)
20
+ sample_shape = sample_shape.to_a
21
+ end
22
+ sample_shape + @batch_shape + @event_shape
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,6 @@
1
+ module Torch
2
+ module Distributions
3
+ class ExponentialFamily < Distribution
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,22 @@
1
+ module Torch
2
+ module Distributions
3
+ class Normal < ExponentialFamily
4
+ def initialize(loc, scale, validate_args: nil)
5
+ @loc, @scale = Utils.broadcast_all(loc, scale)
6
+ if loc.is_a?(Numeric) && scale.is_a?(Numeric)
7
+ batch_shape = []
8
+ else
9
+ batch_shape = @loc.size
10
+ end
11
+ super(batch_shape:, validate_args:)
12
+ end
13
+
14
+ def sample(sample_shape: [])
15
+ shape = _extended_shape(sample_shape:)
16
+ Torch.no_grad do
17
+ Torch.normal(@loc.expand(shape), @scale.expand(shape))
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module Distributions
3
+ module Utils
4
+ def self.broadcast_all(*values)
5
+ # TODO improve
6
+ values
7
+ end
8
+ end
9
+ end
10
+ end
data/lib/torch/tensor.rb CHANGED
@@ -159,6 +159,7 @@ module Torch
159
159
 
160
160
  # TODO better compare?
161
161
  def <=>(other)
162
+ other = other.item if other.is_a?(Tensor)
162
163
  item <=> other
163
164
  end
164
165
 
data/lib/torch/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Torch
2
- VERSION = "0.19.0"
2
+ VERSION = "0.20.0"
3
3
  end
data/lib/torch.rb CHANGED
@@ -13,6 +13,12 @@ require_relative "torch/inspector"
13
13
  require_relative "torch/tensor"
14
14
  require_relative "torch/version"
15
15
 
16
+ # distributions
17
+ require_relative "torch/distributions/distribution"
18
+ require_relative "torch/distributions/exponential_family"
19
+ require_relative "torch/distributions/normal"
20
+ require_relative "torch/distributions/utils"
21
+
16
22
  # optim
17
23
  require_relative "torch/optim/optimizer"
18
24
  require_relative "torch/optim/adadelta"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: torch-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-01-30 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: rice
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: 4.3.3
18
+ version: '4.5'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: 4.3.3
25
+ version: '4.5'
26
26
  email: andrew@ankane.org
27
27
  executables: []
28
28
  extensions:
@@ -64,6 +64,10 @@ files:
64
64
  - lib/torch-rb.rb
65
65
  - lib/torch.rb
66
66
  - lib/torch/device.rb
67
+ - lib/torch/distributions/distribution.rb
68
+ - lib/torch/distributions/exponential_family.rb
69
+ - lib/torch/distributions/normal.rb
70
+ - lib/torch/distributions/utils.rb
67
71
  - lib/torch/hub.rb
68
72
  - lib/torch/inspector.rb
69
73
  - lib/torch/nn/adaptive_avg_pool1d.rb
@@ -237,7 +241,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
237
241
  - !ruby/object:Gem::Version
238
242
  version: '0'
239
243
  requirements: []
240
- rubygems_version: 3.6.2
244
+ rubygems_version: 3.6.7
241
245
  specification_version: 4
242
246
  summary: Deep learning for Ruby, powered by LibTorch
243
247
  test_files: []