torch-rb 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +2 -1
- data/codegen/native_functions.yaml +56 -38
- data/ext/torch/templates.h +12 -16
- data/ext/torch/torch.cpp +7 -1
- data/ext/torch/utils.h +1 -1
- data/lib/torch/distributions/distribution.rb +26 -0
- data/lib/torch/distributions/exponential_family.rb +6 -0
- data/lib/torch/distributions/normal.rb +22 -0
- data/lib/torch/distributions/utils.rb +10 -0
- data/lib/torch/tensor.rb +1 -0
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +6 -0
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39fe7c12b647086c46318bb77d1aac80bac1874cf3b71f1a0db4a1671322799e
|
4
|
+
data.tar.gz: 81a1c4d3ad4474b06dac82df2c38c498c7b4f5296e700d3bc98f596df14d10a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3677acb4621a6e47194acb81612a369716dcecb86518b1f0840e4d0e3e9fe48e8eaa3c7888cb1adc0c8e4ca78aa920a23c5c452d7a33fde92461fc6fb27b9bc
|
7
|
+
data.tar.gz: a02603711a10d16c6521c55c4389ade96503e60d21748d566552679acc2ab10805ada2c8083813b74ccfbba0742b68adc3cff5461b9dd000761326ee6e1ccffb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.20.0 (2025-04-26)
|
2
|
+
|
3
|
+
- Updated LibTorch to 2.7.0
|
4
|
+
- Added `Normal` distribution
|
5
|
+
- Fixed `SystemStackError` with certain tensor comparisons
|
6
|
+
|
7
|
+
## 0.19.1 (2025-02-10)
|
8
|
+
|
9
|
+
- Fixed error with Rice 4.5
|
10
|
+
|
1
11
|
## 0.19.0 (2025-01-29)
|
2
12
|
|
3
13
|
- Updated LibTorch to 2.6.0
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ Check out:
|
|
17
17
|
First, [download LibTorch](https://pytorch.org/get-started/locally/). For Mac arm64, use:
|
18
18
|
|
19
19
|
```sh
|
20
|
-
curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.
|
20
|
+
curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.7.0.zip > libtorch.zip
|
21
21
|
unzip -q libtorch.zip
|
22
22
|
```
|
23
23
|
|
@@ -413,6 +413,7 @@ Here’s the list of compatible versions.
|
|
413
413
|
|
414
414
|
Torch.rb | LibTorch
|
415
415
|
--- | ---
|
416
|
+
0.20.x | 2.7.x
|
416
417
|
0.19.x | 2.6.x
|
417
418
|
0.18.x | 2.5.x
|
418
419
|
0.17.x | 2.4.x
|
@@ -403,6 +403,7 @@
|
|
403
403
|
variants: function, method
|
404
404
|
dispatch:
|
405
405
|
CPU, CUDA: angle
|
406
|
+
MPS: angle_mps
|
406
407
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
|
407
408
|
tags: pointwise
|
408
409
|
|
@@ -410,6 +411,7 @@
|
|
410
411
|
device_check: NoCheck # TensorIterator
|
411
412
|
dispatch:
|
412
413
|
CPU, CUDA: angle_out
|
414
|
+
MPS: angle_out_mps
|
413
415
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
|
414
416
|
tags: pointwise
|
415
417
|
|
@@ -1456,8 +1458,7 @@
|
|
1456
1458
|
structured: True
|
1457
1459
|
structured_inherits: TensorIteratorBase
|
1458
1460
|
dispatch:
|
1459
|
-
CPU, CUDA: ceil_out
|
1460
|
-
MPS: ceil_out_mps
|
1461
|
+
CPU, CUDA, MPS: ceil_out
|
1461
1462
|
SparseCPU, SparseCUDA: ceil_sparse_out
|
1462
1463
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
|
1463
1464
|
tags: pointwise
|
@@ -2576,8 +2577,7 @@
|
|
2576
2577
|
structured: True
|
2577
2578
|
structured_inherits: TensorIteratorBase
|
2578
2579
|
dispatch:
|
2579
|
-
CPU, CUDA: exp_out
|
2580
|
-
MPS: exp_out_mps
|
2580
|
+
CPU, CUDA, MPS: exp_out
|
2581
2581
|
tags: pointwise
|
2582
2582
|
|
2583
2583
|
- func: exp2(Tensor self) -> Tensor
|
@@ -2740,8 +2740,7 @@
|
|
2740
2740
|
structured: True
|
2741
2741
|
structured_inherits: TensorIteratorBase
|
2742
2742
|
dispatch:
|
2743
|
-
CPU, CUDA: floor_out
|
2744
|
-
MPS: floor_out_mps
|
2743
|
+
CPU, CUDA, MPS: floor_out
|
2745
2744
|
SparseCPU, SparseCUDA: floor_sparse_out
|
2746
2745
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
|
2747
2746
|
tags: pointwise
|
@@ -3000,6 +2999,7 @@
|
|
3000
2999
|
CPU: _fft_r2c_mkl
|
3001
3000
|
CUDA: _fft_r2c_cufft
|
3002
3001
|
MPS: _fft_r2c_mps
|
3002
|
+
tags: core
|
3003
3003
|
|
3004
3004
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
3005
3005
|
variants: function
|
@@ -3864,6 +3864,7 @@
|
|
3864
3864
|
device_guard: False
|
3865
3865
|
dispatch:
|
3866
3866
|
CompositeImplicitAutograd: value_selecting_reduction_backward_symint
|
3867
|
+
NestedTensorCPU, NestedTensorCUDA: value_selecting_reduction_backward_nested_symint
|
3867
3868
|
|
3868
3869
|
- func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
3869
3870
|
variants: function, method
|
@@ -4177,6 +4178,14 @@
|
|
4177
4178
|
dispatch:
|
4178
4179
|
CPU: _weight_int4pack_mm_cpu
|
4179
4180
|
|
4181
|
+
- func: _dyn_quant_pack_4bit_weight(Tensor weights, Tensor scales_zeros, Tensor? bias, int block_size, int in_features, int out_features) -> Tensor
|
4182
|
+
dispatch:
|
4183
|
+
CPU: _dyn_quant_pack_4bit_weight_cpu
|
4184
|
+
|
4185
|
+
- func: _dyn_quant_matmul_4bit(Tensor inp, Tensor packed_weights, int block_size, int in_features, int out_features) -> Tensor
|
4186
|
+
dispatch:
|
4187
|
+
CPU: _dyn_quant_matmul_4bit_cpu
|
4188
|
+
|
4180
4189
|
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4181
4190
|
dispatch:
|
4182
4191
|
CPU: _weight_int8pack_mm_cpu
|
@@ -4989,9 +4998,7 @@
|
|
4989
4998
|
structured: True
|
4990
4999
|
structured_inherits: TensorIteratorBase
|
4991
5000
|
dispatch:
|
4992
|
-
CPU: round_out
|
4993
|
-
CUDA: round_out
|
4994
|
-
MPS: round_out_mps
|
5001
|
+
CPU, CUDA, MPS: round_out
|
4995
5002
|
SparseCPU, SparseCUDA: round_sparse_out
|
4996
5003
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
|
4997
5004
|
tags: pointwise
|
@@ -5013,8 +5020,7 @@
|
|
5013
5020
|
structured: True
|
5014
5021
|
structured_inherits: TensorIteratorBase
|
5015
5022
|
dispatch:
|
5016
|
-
CPU: round_decimals_out
|
5017
|
-
CUDA: round_decimals_out
|
5023
|
+
CPU, CUDA, MPS: round_decimals_out
|
5018
5024
|
tags: pointwise
|
5019
5025
|
|
5020
5026
|
- func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
|
@@ -5376,7 +5382,7 @@
|
|
5376
5382
|
structured: True
|
5377
5383
|
structured_inherits: TensorIteratorBase
|
5378
5384
|
dispatch:
|
5379
|
-
CPU, CUDA: sinc_out
|
5385
|
+
CPU, CUDA, MPS: sinc_out
|
5380
5386
|
tags: pointwise
|
5381
5387
|
|
5382
5388
|
- func: sinh(Tensor self) -> Tensor
|
@@ -5747,11 +5753,11 @@
|
|
5747
5753
|
- func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
|
5748
5754
|
|
5749
5755
|
# Overload without center & pad mode, needed for forward-compatibility
|
5750
|
-
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
5756
|
+
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
|
5751
5757
|
variants: function, method
|
5752
5758
|
cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
|
5753
5759
|
|
5754
|
-
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
5760
|
+
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
|
5755
5761
|
variants: function, method
|
5756
5762
|
|
5757
5763
|
- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
|
@@ -5856,8 +5862,7 @@
|
|
5856
5862
|
structured: True
|
5857
5863
|
structured_inherits: TensorIteratorBase
|
5858
5864
|
dispatch:
|
5859
|
-
CPU, CUDA: sqrt_out
|
5860
|
-
MPS: sqrt_out_mps
|
5865
|
+
CPU, CUDA, MPS: sqrt_out
|
5861
5866
|
SparseCPU, SparseCUDA: sqrt_sparse_out
|
5862
5867
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
|
5863
5868
|
tags: pointwise
|
@@ -6048,8 +6053,7 @@
|
|
6048
6053
|
structured: True
|
6049
6054
|
structured_inherits: TensorIteratorBase
|
6050
6055
|
dispatch:
|
6051
|
-
CPU, CUDA: tanh_out
|
6052
|
-
MPS: tanh_out_mps
|
6056
|
+
CPU, CUDA, MPS: tanh_out
|
6053
6057
|
SparseCPU, SparseCUDA: tanh_sparse_out
|
6054
6058
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
|
6055
6059
|
tags: pointwise
|
@@ -6340,8 +6344,7 @@
|
|
6340
6344
|
structured_inherits: TensorIteratorBase
|
6341
6345
|
device_check: NoCheck # TensorIterator
|
6342
6346
|
dispatch:
|
6343
|
-
CPU, CUDA: trunc_out
|
6344
|
-
MPS: trunc_out_mps
|
6347
|
+
CPU, CUDA, MPS: trunc_out
|
6345
6348
|
SparseCPU, SparseCUDA: trunc_sparse_out
|
6346
6349
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
|
6347
6350
|
tags: pointwise
|
@@ -7070,6 +7073,12 @@
|
|
7070
7073
|
dispatch:
|
7071
7074
|
CUDA: _scaled_mm_out_cuda
|
7072
7075
|
|
7076
|
+
|
7077
|
+
- func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7078
|
+
variants: function
|
7079
|
+
dispatch:
|
7080
|
+
CUDA: _scaled_grouped_mm_cuda
|
7081
|
+
|
7073
7082
|
# NOTE [ Sparse: autograd and API ]
|
7074
7083
|
#
|
7075
7084
|
#
|
@@ -9274,12 +9283,12 @@
|
|
9274
9283
|
MPS: nonzero_mps
|
9275
9284
|
tags: [dynamic_output_shape, core]
|
9276
9285
|
|
9277
|
-
- func: nonzero_static.out(Tensor self, *,
|
9286
|
+
- func: nonzero_static.out(Tensor self, *, SymInt size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
|
9278
9287
|
dispatch:
|
9279
9288
|
CPU: nonzero_static_out_cpu
|
9280
9289
|
CUDA: nonzero_static_out_cuda
|
9281
9290
|
|
9282
|
-
- func: nonzero_static(Tensor self, *,
|
9291
|
+
- func: nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor
|
9283
9292
|
variants: method, function
|
9284
9293
|
dispatch:
|
9285
9294
|
CPU: nonzero_static_cpu
|
@@ -9428,11 +9437,13 @@
|
|
9428
9437
|
- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9429
9438
|
dispatch:
|
9430
9439
|
CPU, CUDA: cholesky_out
|
9440
|
+
MPS: cholesky_mps_out
|
9431
9441
|
|
9432
9442
|
- func: cholesky(Tensor self, bool upper=False) -> Tensor
|
9433
9443
|
variants: method, function
|
9434
9444
|
dispatch:
|
9435
9445
|
CPU, CUDA: cholesky
|
9446
|
+
MPS: cholesky_mps
|
9436
9447
|
|
9437
9448
|
- func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9438
9449
|
dispatch:
|
@@ -9506,6 +9517,7 @@
|
|
9506
9517
|
structured: True
|
9507
9518
|
dispatch:
|
9508
9519
|
CPU, CUDA: lu_unpack_out
|
9520
|
+
MPS: lu_unpack_out_mps
|
9509
9521
|
|
9510
9522
|
# TODO: remove dispatch section when porting TH CUDA to ATen
|
9511
9523
|
- func: multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
@@ -9602,8 +9614,7 @@
|
|
9602
9614
|
structured: True
|
9603
9615
|
structured_inherits: TensorIteratorBase
|
9604
9616
|
dispatch:
|
9605
|
-
CPU, CUDA: erfinv_out
|
9606
|
-
MPS: erfinv_out_mps
|
9617
|
+
CPU, CUDA, MPS: erfinv_out
|
9607
9618
|
SparseCPU, SparseCUDA: erfinv_sparse_out
|
9608
9619
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
|
9609
9620
|
tags: pointwise
|
@@ -12711,6 +12722,7 @@
|
|
12711
12722
|
dispatch:
|
12712
12723
|
CPU: _upsample_bilinear2d_aa_out_cpu
|
12713
12724
|
CUDA: _upsample_bilinear2d_aa_out_cuda
|
12725
|
+
MPS: _upsample_bilinear2d_aa_out_mps
|
12714
12726
|
|
12715
12727
|
- func: _upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
12716
12728
|
python_module: nn
|
@@ -13058,7 +13070,6 @@
|
|
13058
13070
|
autogen: _slow_conv2d_backward.output_mask_out
|
13059
13071
|
|
13060
13072
|
- func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
|
13061
|
-
use_const_ref_for_mutable_tensors: True
|
13062
13073
|
python_module: nn
|
13063
13074
|
dispatch:
|
13064
13075
|
CUDA: conv_depthwise2d_cuda_out
|
@@ -13225,7 +13236,7 @@
|
|
13225
13236
|
python_module: special
|
13226
13237
|
variants: function
|
13227
13238
|
dispatch:
|
13228
|
-
CPU, CUDA: special_entr_out
|
13239
|
+
CPU, CUDA, MPS: special_entr_out
|
13229
13240
|
tags: pointwise
|
13230
13241
|
|
13231
13242
|
- func: special_ndtri(Tensor self) -> Tensor
|
@@ -13372,7 +13383,7 @@
|
|
13372
13383
|
python_module: special
|
13373
13384
|
variants: function
|
13374
13385
|
dispatch:
|
13375
|
-
CPU, CUDA: special_xlog1py_out
|
13386
|
+
CPU, CUDA, MPS: special_xlog1py_out
|
13376
13387
|
tags: pointwise
|
13377
13388
|
|
13378
13389
|
- func: special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -13451,7 +13462,7 @@
|
|
13451
13462
|
python_module: special
|
13452
13463
|
variants: function
|
13453
13464
|
dispatch:
|
13454
|
-
CPU, CUDA: special_zeta_out
|
13465
|
+
CPU, CUDA, MPS: special_zeta_out
|
13455
13466
|
tags: pointwise
|
13456
13467
|
|
13457
13468
|
- func: special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -13744,7 +13755,6 @@
|
|
13744
13755
|
CompositeImplicitAutograd: fft_hfft2_symint
|
13745
13756
|
|
13746
13757
|
- func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13747
|
-
use_const_ref_for_mutable_tensors: True
|
13748
13758
|
python_module: fft
|
13749
13759
|
variants: function
|
13750
13760
|
dispatch:
|
@@ -13758,7 +13768,6 @@
|
|
13758
13768
|
CompositeImplicitAutograd: fft_ihfft2_symint
|
13759
13769
|
|
13760
13770
|
- func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13761
|
-
use_const_ref_for_mutable_tensors: True
|
13762
13771
|
python_module: fft
|
13763
13772
|
variants: function
|
13764
13773
|
dispatch:
|
@@ -13820,7 +13829,6 @@
|
|
13820
13829
|
CompositeImplicitAutograd: fft_hfftn_symint
|
13821
13830
|
|
13822
13831
|
- func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13823
|
-
use_const_ref_for_mutable_tensors: True
|
13824
13832
|
python_module: fft
|
13825
13833
|
variants: function
|
13826
13834
|
dispatch:
|
@@ -13834,7 +13842,6 @@
|
|
13834
13842
|
CompositeImplicitAutograd: fft_ihfftn_symint
|
13835
13843
|
|
13836
13844
|
- func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13837
|
-
use_const_ref_for_mutable_tensors: True
|
13838
13845
|
python_module: fft
|
13839
13846
|
variants: function
|
13840
13847
|
dispatch:
|
@@ -13891,6 +13898,7 @@
|
|
13891
13898
|
structured: True
|
13892
13899
|
dispatch:
|
13893
13900
|
CPU, CUDA: linalg_cholesky_ex_out
|
13901
|
+
MPS: linalg_cholesky_ex_out_mps
|
13894
13902
|
|
13895
13903
|
- func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
|
13896
13904
|
python_module: linalg
|
@@ -13937,6 +13945,7 @@
|
|
13937
13945
|
structured: True
|
13938
13946
|
dispatch:
|
13939
13947
|
CPU, CUDA: linalg_lu_factor_ex_out
|
13948
|
+
MPS: linalg_lu_factor_ex_out_mps
|
13940
13949
|
|
13941
13950
|
# linalg.lu
|
13942
13951
|
- func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
|
@@ -13971,7 +13980,7 @@
|
|
13971
13980
|
- func: _linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)
|
13972
13981
|
structured: True
|
13973
13982
|
dispatch:
|
13974
|
-
CPU, CUDA: _linalg_det_out
|
13983
|
+
CPU, CUDA, MPS: _linalg_det_out
|
13975
13984
|
|
13976
13985
|
- func: linalg_det(Tensor A) -> Tensor
|
13977
13986
|
python_module: linalg
|
@@ -14058,7 +14067,7 @@
|
|
14058
14067
|
- func: _linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)
|
14059
14068
|
structured: True
|
14060
14069
|
dispatch:
|
14061
|
-
CPU, CUDA: _linalg_slogdet_out
|
14070
|
+
CPU, CUDA, MPS: _linalg_slogdet_out
|
14062
14071
|
|
14063
14072
|
- func: linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)
|
14064
14073
|
python_module: linalg
|
@@ -14300,6 +14309,7 @@
|
|
14300
14309
|
structured: True
|
14301
14310
|
dispatch:
|
14302
14311
|
CPU, CUDA: _linalg_solve_ex_out
|
14312
|
+
MPS: _linalg_solve_ex_out_mps
|
14303
14313
|
|
14304
14314
|
- func: linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)
|
14305
14315
|
python_module: linalg
|
@@ -14837,6 +14847,7 @@
|
|
14837
14847
|
Meta: _fused_sdp_choice_meta
|
14838
14848
|
CPU, NestedTensorCPU: _fused_sdp_choice_cpp
|
14839
14849
|
CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
|
14850
|
+
XPU: _fused_sdp_choice_xpu
|
14840
14851
|
tags: nondeterministic_seeded
|
14841
14852
|
|
14842
14853
|
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
|
@@ -14848,7 +14859,7 @@
|
|
14848
14859
|
MPS: _scaled_dot_product_attention_math_mps
|
14849
14860
|
tags: nondeterministic_seeded
|
14850
14861
|
|
14851
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor
|
14862
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
14852
14863
|
dispatch:
|
14853
14864
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14854
14865
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
@@ -14862,6 +14873,7 @@
|
|
14862
14873
|
- func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14863
14874
|
dispatch:
|
14864
14875
|
CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
|
14876
|
+
XPU: _scaled_dot_product_fused_attention_overrideable_xpu
|
14865
14877
|
tags: nondeterministic_seeded
|
14866
14878
|
|
14867
14879
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
@@ -14898,6 +14910,7 @@
|
|
14898
14910
|
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14899
14911
|
dispatch:
|
14900
14912
|
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14913
|
+
NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_cuda
|
14901
14914
|
tags: nondeterministic_seeded
|
14902
14915
|
|
14903
14916
|
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
@@ -14905,13 +14918,13 @@
|
|
14905
14918
|
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14906
14919
|
tags: nondeterministic_seeded
|
14907
14920
|
|
14908
|
-
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor
|
14921
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
14909
14922
|
variants: function
|
14910
14923
|
dispatch:
|
14911
14924
|
CUDA: _flash_attention_forward
|
14912
14925
|
tags: nondeterministic_seeded
|
14913
14926
|
|
14914
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor
|
14927
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor rng_state, Tensor unused, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14915
14928
|
device_check: NoCheck
|
14916
14929
|
variants: function
|
14917
14930
|
dispatch:
|
@@ -14930,6 +14943,11 @@
|
|
14930
14943
|
dispatch:
|
14931
14944
|
CUDA: _efficient_attention_backward
|
14932
14945
|
|
14946
|
+
- func: _cudnn_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14947
|
+
dispatch:
|
14948
|
+
CUDA: _cudnn_attention_forward
|
14949
|
+
tags: nondeterministic_seeded
|
14950
|
+
|
14933
14951
|
- func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
|
14934
14952
|
variants: function
|
14935
14953
|
dispatch:
|
@@ -15710,7 +15728,7 @@
|
|
15710
15728
|
|
15711
15729
|
- func: special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15712
15730
|
dispatch:
|
15713
|
-
CPU, CUDA: special_spherical_bessel_j0_out
|
15731
|
+
CPU, CUDA, MPS: special_spherical_bessel_j0_out
|
15714
15732
|
python_module: special
|
15715
15733
|
structured_inherits: TensorIteratorBase
|
15716
15734
|
structured: True
|
data/ext/torch/templates.h
CHANGED
@@ -46,10 +46,7 @@ namespace Rice::detail
|
|
46
46
|
template<typename T>
|
47
47
|
struct Type<c10::complex<T>>
|
48
48
|
{
|
49
|
-
static bool verify()
|
50
|
-
{
|
51
|
-
return true;
|
52
|
-
}
|
49
|
+
static bool verify() { return true; }
|
53
50
|
};
|
54
51
|
|
55
52
|
template<typename T>
|
@@ -66,6 +63,8 @@ namespace Rice::detail
|
|
66
63
|
class From_Ruby<c10::complex<T>>
|
67
64
|
{
|
68
65
|
public:
|
66
|
+
Convertible is_convertible(VALUE value) { return Convertible::Cast; }
|
67
|
+
|
69
68
|
c10::complex<T> convert(VALUE x)
|
70
69
|
{
|
71
70
|
VALUE real = rb_funcall(x, rb_intern("real"), 0);
|
@@ -80,16 +79,15 @@ namespace Rice::detail
|
|
80
79
|
template<>
|
81
80
|
struct Type<FanModeType>
|
82
81
|
{
|
83
|
-
static bool verify()
|
84
|
-
{
|
85
|
-
return true;
|
86
|
-
}
|
82
|
+
static bool verify() { return true; }
|
87
83
|
};
|
88
84
|
|
89
85
|
template<>
|
90
86
|
class From_Ruby<FanModeType>
|
91
87
|
{
|
92
88
|
public:
|
89
|
+
Convertible is_convertible(VALUE value) { return Convertible::Cast; }
|
90
|
+
|
93
91
|
FanModeType convert(VALUE x)
|
94
92
|
{
|
95
93
|
auto s = String(x).str();
|
@@ -106,16 +104,15 @@ namespace Rice::detail
|
|
106
104
|
template<>
|
107
105
|
struct Type<NonlinearityType>
|
108
106
|
{
|
109
|
-
static bool verify()
|
110
|
-
{
|
111
|
-
return true;
|
112
|
-
}
|
107
|
+
static bool verify() { return true; }
|
113
108
|
};
|
114
109
|
|
115
110
|
template<>
|
116
111
|
class From_Ruby<NonlinearityType>
|
117
112
|
{
|
118
113
|
public:
|
114
|
+
Convertible is_convertible(VALUE value) { return Convertible::Cast; }
|
115
|
+
|
119
116
|
NonlinearityType convert(VALUE x)
|
120
117
|
{
|
121
118
|
auto s = String(x).str();
|
@@ -150,16 +147,15 @@ namespace Rice::detail
|
|
150
147
|
template<>
|
151
148
|
struct Type<Scalar>
|
152
149
|
{
|
153
|
-
static bool verify()
|
154
|
-
{
|
155
|
-
return true;
|
156
|
-
}
|
150
|
+
static bool verify() { return true; }
|
157
151
|
};
|
158
152
|
|
159
153
|
template<>
|
160
154
|
class From_Ruby<Scalar>
|
161
155
|
{
|
162
156
|
public:
|
157
|
+
Convertible is_convertible(VALUE value) { return Convertible::Cast; }
|
158
|
+
|
163
159
|
Scalar convert(VALUE x)
|
164
160
|
{
|
165
161
|
if (FIXNUM_P(x)) {
|
data/ext/torch/torch.cpp
CHANGED
@@ -21,7 +21,13 @@ torch::Tensor make_tensor(Rice::Array a, const std::vector<int64_t> &size, const
|
|
21
21
|
}
|
22
22
|
|
23
23
|
void init_torch(Rice::Module& m) {
|
24
|
-
|
24
|
+
Rice::detail::Registries::instance.handlers.set([]() {
|
25
|
+
try {
|
26
|
+
throw;
|
27
|
+
} catch (const torch::Error& ex) {
|
28
|
+
handle_global_error(ex);
|
29
|
+
}
|
30
|
+
});
|
25
31
|
add_torch_functions(m);
|
26
32
|
m.define_singleton_function(
|
27
33
|
"grad_enabled?",
|
data/ext/torch/utils.h
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
module Torch
|
2
|
+
module Distributions
|
3
|
+
class Distribution
|
4
|
+
def initialize(batch_shape: [], event_shape: [], validate_args: nil)
|
5
|
+
@batch_shape = batch_shape
|
6
|
+
@event_shape = event_shape
|
7
|
+
if !validate_args.nil?
|
8
|
+
@validate_args = validate_args
|
9
|
+
end
|
10
|
+
if @validate_args
|
11
|
+
raise NotImplementedYet
|
12
|
+
end
|
13
|
+
super()
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _extended_shape(sample_shape: [])
|
19
|
+
if !sample_shape.is_a?(Array)
|
20
|
+
sample_shape = sample_shape.to_a
|
21
|
+
end
|
22
|
+
sample_shape + @batch_shape + @event_shape
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Torch
|
2
|
+
module Distributions
|
3
|
+
class Normal < ExponentialFamily
|
4
|
+
def initialize(loc, scale, validate_args: nil)
|
5
|
+
@loc, @scale = Utils.broadcast_all(loc, scale)
|
6
|
+
if loc.is_a?(Numeric) && scale.is_a?(Numeric)
|
7
|
+
batch_shape = []
|
8
|
+
else
|
9
|
+
batch_shape = @loc.size
|
10
|
+
end
|
11
|
+
super(batch_shape:, validate_args:)
|
12
|
+
end
|
13
|
+
|
14
|
+
def sample(sample_shape: [])
|
15
|
+
shape = _extended_shape(sample_shape:)
|
16
|
+
Torch.no_grad do
|
17
|
+
Torch.normal(@loc.expand(shape), @scale.expand(shape))
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/torch/tensor.rb
CHANGED
data/lib/torch/version.rb
CHANGED
data/lib/torch.rb
CHANGED
@@ -13,6 +13,12 @@ require_relative "torch/inspector"
|
|
13
13
|
require_relative "torch/tensor"
|
14
14
|
require_relative "torch/version"
|
15
15
|
|
16
|
+
# distributions
|
17
|
+
require_relative "torch/distributions/distribution"
|
18
|
+
require_relative "torch/distributions/exponential_family"
|
19
|
+
require_relative "torch/distributions/normal"
|
20
|
+
require_relative "torch/distributions/utils"
|
21
|
+
|
16
22
|
# optim
|
17
23
|
require_relative "torch/optim/optimizer"
|
18
24
|
require_relative "torch/optim/adadelta"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: torch-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: rice
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: 4.
|
18
|
+
version: '4.5'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: 4.
|
25
|
+
version: '4.5'
|
26
26
|
email: andrew@ankane.org
|
27
27
|
executables: []
|
28
28
|
extensions:
|
@@ -64,6 +64,10 @@ files:
|
|
64
64
|
- lib/torch-rb.rb
|
65
65
|
- lib/torch.rb
|
66
66
|
- lib/torch/device.rb
|
67
|
+
- lib/torch/distributions/distribution.rb
|
68
|
+
- lib/torch/distributions/exponential_family.rb
|
69
|
+
- lib/torch/distributions/normal.rb
|
70
|
+
- lib/torch/distributions/utils.rb
|
67
71
|
- lib/torch/hub.rb
|
68
72
|
- lib/torch/inspector.rb
|
69
73
|
- lib/torch/nn/adaptive_avg_pool1d.rb
|
@@ -237,7 +241,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
237
241
|
- !ruby/object:Gem::Version
|
238
242
|
version: '0'
|
239
243
|
requirements: []
|
240
|
-
rubygems_version: 3.6.
|
244
|
+
rubygems_version: 3.6.7
|
241
245
|
specification_version: 4
|
242
246
|
summary: Deep learning for Ruby, powered by LibTorch
|
243
247
|
test_files: []
|