RubyGems - torch-rb - Versions diffs - 0.19.0 → 0.20.0 - Mend

torch-rb 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/README.md +2 -1
data/codegen/native_functions.yaml +56 -38
data/ext/torch/templates.h +12 -16
data/ext/torch/torch.cpp +7 -1
data/ext/torch/utils.h +1 -1
data/lib/torch/distributions/distribution.rb +26 -0
data/lib/torch/distributions/exponential_family.rb +6 -0
data/lib/torch/distributions/normal.rb +22 -0
data/lib/torch/distributions/utils.rb +10 -0
data/lib/torch/tensor.rb +1 -0
data/lib/torch/version.rb +1 -1
data/lib/torch.rb +6 -0
metadata +9 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 03164cc479d8f8a32f0669d597e8fe5310d91955e6954cfdc0fffdc8983c5768
-  data.tar.gz: 87fc733016b6f4489b38a419a3879cacbdb1e190cfaa5c02397aceb57c012d16
+  metadata.gz: 39fe7c12b647086c46318bb77d1aac80bac1874cf3b71f1a0db4a1671322799e
+  data.tar.gz: 81a1c4d3ad4474b06dac82df2c38c498c7b4f5296e700d3bc98f596df14d10a4
 SHA512:
-  metadata.gz: 6ba0480138a10ba43dff625dc1bcf99e2287f238dc4607ea6813e82914f1e335133f55408fa59579343b161c54850316f744c594cf6687b7f2de64a0d71746d1
-  data.tar.gz: 859015641dd14bf919a7982c6673acb296f858518552b8c924fc7e59b9c1b2a9491aa598c01b019b392c8c2bba7b9f65ff0923f838e6cbde7ddaede9c4b69191
+  metadata.gz: d3677acb4621a6e47194acb81612a369716dcecb86518b1f0840e4d0e3e9fe48e8eaa3c7888cb1adc0c8e4ca78aa920a23c5c452d7a33fde92461fc6fb27b9bc
+  data.tar.gz: a02603711a10d16c6521c55c4389ade96503e60d21748d566552679acc2ab10805ada2c8083813b74ccfbba0742b68adc3cff5461b9dd000761326ee6e1ccffb

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,13 @@
+## 0.20.0 (2025-04-26)
+- Updated LibTorch to 2.7.0
+- Added `Normal` distribution
+- Fixed `SystemStackError` with certain tensor comparisons
+## 0.19.1 (2025-02-10)
+- Fixed error with Rice 4.5
 ## 0.19.0 (2025-01-29)
 - Updated LibTorch to 2.6.0

data/README.md CHANGED Viewed

@@ -17,7 +17,7 @@ Check out:
 First, [download LibTorch](https://pytorch.org/get-started/locally/). For Mac arm64, use:
 ```sh
-curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.6.0.zip > libtorch.zip
+curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.7.0.zip > libtorch.zip
 unzip -q libtorch.zip
 ```
@@ -413,6 +413,7 @@ Here’s the list of compatible versions.
 Torch.rb | LibTorch
 --- | ---
+0.20.x | 2.7.x
 0.19.x | 2.6.x
 0.18.x | 2.5.x
 0.17.x | 2.4.x

data/codegen/native_functions.yaml CHANGED Viewed

@@ -403,6 +403,7 @@
   variants: function, method
   dispatch:
     CPU, CUDA: angle
+    MPS: angle_mps
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
   tags: pointwise
@@ -410,6 +411,7 @@
   device_check: NoCheck   # TensorIterator
   dispatch:
     CPU, CUDA: angle_out
+    MPS: angle_out_mps
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
   tags: pointwise
@@ -1456,8 +1458,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: ceil_out
-    MPS: ceil_out_mps
+    CPU, CUDA, MPS: ceil_out
     SparseCPU, SparseCUDA: ceil_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
   tags: pointwise
@@ -2576,8 +2577,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: exp_out
-    MPS: exp_out_mps
+    CPU, CUDA, MPS: exp_out
   tags: pointwise
 - func: exp2(Tensor self) -> Tensor
@@ -2740,8 +2740,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: floor_out
-    MPS: floor_out_mps
+    CPU, CUDA, MPS: floor_out
     SparseCPU, SparseCUDA: floor_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
   tags: pointwise
@@ -3000,6 +2999,7 @@
     CPU: _fft_r2c_mkl
     CUDA: _fft_r2c_cufft
     MPS: _fft_r2c_mps
+  tags: core
 - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
@@ -3864,6 +3864,7 @@
   device_guard: False
   dispatch:
     CompositeImplicitAutograd: value_selecting_reduction_backward_symint
+    NestedTensorCPU, NestedTensorCUDA: value_selecting_reduction_backward_nested_symint
 - func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
   variants: function, method
@@ -4177,6 +4178,14 @@
   dispatch:
     CPU: _weight_int4pack_mm_cpu
+- func: _dyn_quant_pack_4bit_weight(Tensor weights, Tensor scales_zeros, Tensor? bias, int block_size, int in_features, int out_features) -> Tensor
+  dispatch:
+    CPU: _dyn_quant_pack_4bit_weight_cpu
+- func: _dyn_quant_matmul_4bit(Tensor inp, Tensor packed_weights, int block_size, int in_features, int out_features) -> Tensor
+  dispatch:
+    CPU: _dyn_quant_matmul_4bit_cpu
 - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
   dispatch:
     CPU: _weight_int8pack_mm_cpu
@@ -4989,9 +4998,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU: round_out
-    CUDA: round_out
-    MPS: round_out_mps
+    CPU, CUDA, MPS: round_out
     SparseCPU, SparseCUDA: round_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
   tags: pointwise
@@ -5013,8 +5020,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU: round_decimals_out
-    CUDA: round_decimals_out
+    CPU, CUDA, MPS: round_decimals_out
   tags: pointwise
 - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
@@ -5376,7 +5382,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: sinc_out
+    CPU, CUDA, MPS: sinc_out
   tags: pointwise
 - func: sinh(Tensor self) -> Tensor
@@ -5747,11 +5753,11 @@
 - func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
 # Overload without center & pad mode, needed for forward-compatibility
-- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
+- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
   variants: function, method
   cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
-- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
+- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
   variants: function, method
 - func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
@@ -5856,8 +5862,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: sqrt_out
-    MPS: sqrt_out_mps
+    CPU, CUDA, MPS: sqrt_out
     SparseCPU, SparseCUDA: sqrt_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
   tags: pointwise
@@ -6048,8 +6053,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: tanh_out
-    MPS: tanh_out_mps
+    CPU, CUDA, MPS: tanh_out
     SparseCPU, SparseCUDA: tanh_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
   tags: pointwise
@@ -6340,8 +6344,7 @@
   structured_inherits: TensorIteratorBase
   device_check: NoCheck   # TensorIterator
   dispatch:
-    CPU, CUDA: trunc_out
-    MPS: trunc_out_mps
+    CPU, CUDA, MPS: trunc_out
     SparseCPU, SparseCUDA: trunc_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
   tags: pointwise
@@ -7070,6 +7073,12 @@
   dispatch:
     CUDA: _scaled_mm_out_cuda
+- func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
+  variants: function
+  dispatch:
+    CUDA: _scaled_grouped_mm_cuda
 # NOTE [ Sparse: autograd and API ]
 #
 #
@@ -9274,12 +9283,12 @@
     MPS: nonzero_mps
   tags: [dynamic_output_shape, core]
-- func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
+- func: nonzero_static.out(Tensor self, *, SymInt size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: nonzero_static_out_cpu
     CUDA: nonzero_static_out_cuda
-- func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
+- func: nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor
   variants: method, function
   dispatch:
     CPU: nonzero_static_cpu
@@ -9428,11 +9437,13 @@
 - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU, CUDA: cholesky_out
+    MPS: cholesky_mps_out
 - func: cholesky(Tensor self, bool upper=False) -> Tensor
   variants: method, function
   dispatch:
     CPU, CUDA: cholesky
+    MPS: cholesky_mps
 - func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -9506,6 +9517,7 @@
   structured: True
   dispatch:
     CPU, CUDA: lu_unpack_out
+    MPS: lu_unpack_out_mps
 # TODO: remove dispatch section when porting TH CUDA to ATen
 - func: multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
@@ -9602,8 +9614,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: erfinv_out
-    MPS: erfinv_out_mps
+    CPU, CUDA, MPS: erfinv_out
     SparseCPU, SparseCUDA: erfinv_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
   tags: pointwise
@@ -12711,6 +12722,7 @@
   dispatch:
     CPU: _upsample_bilinear2d_aa_out_cpu
     CUDA: _upsample_bilinear2d_aa_out_cuda
+    MPS: _upsample_bilinear2d_aa_out_mps
 - func: _upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
   python_module: nn
@@ -13058,7 +13070,6 @@
   autogen: _slow_conv2d_backward.output_mask_out
 - func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
-  use_const_ref_for_mutable_tensors: True
   python_module: nn
   dispatch:
     CUDA: conv_depthwise2d_cuda_out
@@ -13225,7 +13236,7 @@
   python_module: special
   variants: function
   dispatch:
-    CPU, CUDA: special_entr_out
+    CPU, CUDA, MPS: special_entr_out
   tags: pointwise
 - func: special_ndtri(Tensor self) -> Tensor
@@ -13372,7 +13383,7 @@
   python_module: special
   variants: function
   dispatch:
-    CPU, CUDA: special_xlog1py_out
+    CPU, CUDA, MPS: special_xlog1py_out
   tags: pointwise
 - func: special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -13451,7 +13462,7 @@
   python_module: special
   variants: function
   dispatch:
-    CPU, CUDA: special_zeta_out
+    CPU, CUDA, MPS: special_zeta_out
   tags: pointwise
 - func: special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -13744,7 +13755,6 @@
     CompositeImplicitAutograd: fft_hfft2_symint
 - func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
-  use_const_ref_for_mutable_tensors: True
   python_module: fft
   variants: function
   dispatch:
@@ -13758,7 +13768,6 @@
     CompositeImplicitAutograd: fft_ihfft2_symint
 - func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
-  use_const_ref_for_mutable_tensors: True
   python_module: fft
   variants: function
   dispatch:
@@ -13820,7 +13829,6 @@
     CompositeImplicitAutograd: fft_hfftn_symint
 - func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
-  use_const_ref_for_mutable_tensors: True
   python_module: fft
   variants: function
   dispatch:
@@ -13834,7 +13842,6 @@
     CompositeImplicitAutograd: fft_ihfftn_symint
 - func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
-  use_const_ref_for_mutable_tensors: True
   python_module: fft
   variants: function
   dispatch:
@@ -13891,6 +13898,7 @@
   structured: True
   dispatch:
     CPU, CUDA: linalg_cholesky_ex_out
+    MPS: linalg_cholesky_ex_out_mps
 - func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
   python_module: linalg
@@ -13937,6 +13945,7 @@
   structured: True
   dispatch:
     CPU, CUDA: linalg_lu_factor_ex_out
+    MPS: linalg_lu_factor_ex_out_mps
 # linalg.lu
 - func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
@@ -13971,7 +13980,7 @@
 - func: _linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)
   structured: True
   dispatch:
-    CPU, CUDA: _linalg_det_out
+    CPU, CUDA, MPS: _linalg_det_out
 - func: linalg_det(Tensor A) -> Tensor
   python_module: linalg
@@ -14058,7 +14067,7 @@
 - func: _linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)
   structured: True
   dispatch:
-    CPU, CUDA: _linalg_slogdet_out
+    CPU, CUDA, MPS: _linalg_slogdet_out
 - func: linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)
   python_module: linalg
@@ -14300,6 +14309,7 @@
   structured: True
   dispatch:
     CPU, CUDA: _linalg_solve_ex_out
+    MPS: _linalg_solve_ex_out_mps
 - func: linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)
   python_module: linalg
@@ -14837,6 +14847,7 @@
     Meta: _fused_sdp_choice_meta
     CPU, NestedTensorCPU: _fused_sdp_choice_cpp
     CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
+    XPU: _fused_sdp_choice_xpu
   tags: nondeterministic_seeded
 - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
@@ -14848,7 +14859,7 @@
     MPS: _scaled_dot_product_attention_math_mps
   tags: nondeterministic_seeded
-- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
   dispatch:
     CUDA: _scaled_dot_product_flash_attention_cuda
     NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
@@ -14862,6 +14873,7 @@
 - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
   dispatch:
     CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
+    XPU: _scaled_dot_product_fused_attention_overrideable_xpu
   tags: nondeterministic_seeded
 - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
@@ -14898,6 +14910,7 @@
 - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
   dispatch:
     CUDA: _scaled_dot_product_cudnn_attention_cuda
+    NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_cuda
   tags: nondeterministic_seeded
 - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
@@ -14905,13 +14918,13 @@
     CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
   tags: nondeterministic_seeded
-- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
   variants: function
   dispatch:
     CUDA: _flash_attention_forward
   tags: nondeterministic_seeded
-- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
+- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor rng_state, Tensor unused, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
   device_check: NoCheck
   variants: function
   dispatch:
@@ -14930,6 +14943,11 @@
   dispatch:
     CUDA: _efficient_attention_backward
+- func: _cudnn_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+  dispatch:
+    CUDA: _cudnn_attention_forward
+  tags: nondeterministic_seeded
 - func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
   variants: function
   dispatch:
@@ -15710,7 +15728,7 @@
 - func: special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU, CUDA: special_spherical_bessel_j0_out
+    CPU, CUDA, MPS: special_spherical_bessel_j0_out
   python_module: special
   structured_inherits: TensorIteratorBase
   structured: True

data/ext/torch/templates.h CHANGED Viewed

@@ -46,10 +46,7 @@ namespace Rice::detail
   template<typename T>
   struct Type<c10::complex<T>>
   {
-    static bool verify()
-    {
-      return true;
-    }
+    static bool verify() { return true; }
   };
   template<typename T>
@@ -66,6 +63,8 @@ namespace Rice::detail
   class From_Ruby<c10::complex<T>>
   {
   public:
+    Convertible is_convertible(VALUE value) { return Convertible::Cast; }
     c10::complex<T> convert(VALUE x)
     {
       VALUE real = rb_funcall(x, rb_intern("real"), 0);
@@ -80,16 +79,15 @@ namespace Rice::detail
   template<>
   struct Type<FanModeType>
   {
-    static bool verify()
-    {
-      return true;
-    }
+    static bool verify() { return true; }
   };
   template<>
   class From_Ruby<FanModeType>
   {
   public:
+    Convertible is_convertible(VALUE value) { return Convertible::Cast; }
     FanModeType convert(VALUE x)
     {
       auto s = String(x).str();
@@ -106,16 +104,15 @@ namespace Rice::detail
   template<>
   struct Type<NonlinearityType>
   {
-    static bool verify()
-    {
-      return true;
-    }
+    static bool verify() { return true; }
   };
   template<>
   class From_Ruby<NonlinearityType>
   {
   public:
+    Convertible is_convertible(VALUE value) { return Convertible::Cast; }
     NonlinearityType convert(VALUE x)
     {
       auto s = String(x).str();
@@ -150,16 +147,15 @@ namespace Rice::detail
   template<>
   struct Type<Scalar>
   {
-    static bool verify()
-    {
-      return true;
-    }
+    static bool verify() { return true; }
   };
   template<>
   class From_Ruby<Scalar>
   {
   public:
+    Convertible is_convertible(VALUE value) { return Convertible::Cast; }
     Scalar convert(VALUE x)
     {
       if (FIXNUM_P(x)) {

data/ext/torch/torch.cpp CHANGED Viewed

@@ -21,7 +21,13 @@ torch::Tensor make_tensor(Rice::Array a, const std::vector<int64_t> &size, const
 }
 void init_torch(Rice::Module& m) {
-  register_handler<torch::Error>(handle_global_error);
+  Rice::detail::Registries::instance.handlers.set([]() {
+    try {
+      throw;
+    } catch (const torch::Error& ex) {
+      handle_global_error(ex);
+    }
+  });
   add_torch_functions(m);
   m.define_singleton_function(
       "grad_enabled?",

data/ext/torch/utils.h CHANGED Viewed

@@ -6,7 +6,7 @@
 #include <rice/stl.hpp>
 static_assert(
-  TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 6,
+  TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 7,
   "Incompatible LibTorch version"
 );

data/lib/torch/distributions/distribution.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Torch
+  module Distributions
+    class Distribution
+      def initialize(batch_shape: [], event_shape: [], validate_args: nil)
+        @batch_shape = batch_shape
+        @event_shape = event_shape
+        if !validate_args.nil?
+          @validate_args = validate_args
+        end
+        if @validate_args
+          raise NotImplementedYet
+        end
+        super()
+      end
+      private
+      def _extended_shape(sample_shape: [])
+        if !sample_shape.is_a?(Array)
+          sample_shape = sample_shape.to_a
+        end
+        sample_shape + @batch_shape + @event_shape
+      end
+    end
+  end
+end

data/lib/torch/distributions/exponential_family.rb ADDED Viewed

@@ -0,0 +1,6 @@
+module Torch
+  module Distributions
+    class ExponentialFamily < Distribution
+    end
+  end
+end

data/lib/torch/distributions/normal.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Torch
+  module Distributions
+    class Normal < ExponentialFamily
+      def initialize(loc, scale, validate_args: nil)
+        @loc, @scale = Utils.broadcast_all(loc, scale)
+        if loc.is_a?(Numeric) && scale.is_a?(Numeric)
+          batch_shape = []
+        else
+          batch_shape = @loc.size
+        end
+        super(batch_shape:, validate_args:)
+      end
+      def sample(sample_shape: [])
+        shape = _extended_shape(sample_shape:)
+        Torch.no_grad do
+          Torch.normal(@loc.expand(shape), @scale.expand(shape))
+        end
+      end
+    end
+  end
+end

data/lib/torch/distributions/utils.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Torch
+  module Distributions
+    module Utils
+      def self.broadcast_all(*values)
+        # TODO improve
+        values
+      end
+    end
+  end
+end

data/lib/torch/tensor.rb CHANGED Viewed

@@ -159,6 +159,7 @@ module Torch
     # TODO better compare?
     def <=>(other)
+      other = other.item if other.is_a?(Tensor)
       item <=> other
     end

data/lib/torch/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Torch
-  VERSION = "0.19.0"
+  VERSION = "0.20.0"
 end

data/lib/torch.rb CHANGED Viewed

@@ -13,6 +13,12 @@ require_relative "torch/inspector"
 require_relative "torch/tensor"
 require_relative "torch/version"
+# distributions
+require_relative "torch/distributions/distribution"
+require_relative "torch/distributions/exponential_family"
+require_relative "torch/distributions/normal"
+require_relative "torch/distributions/utils"
 # optim
 require_relative "torch/optim/optimizer"
 require_relative "torch/optim/adadelta"

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: torch-rb
 version: !ruby/object:Gem::Version
-  version: 0.19.0
+  version: 0.20.0
 platform: ruby
 authors:
 - Andrew Kane
 bindir: bin
 cert_chain: []
-date: 2025-01-30 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -15,14 +15,14 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 4.3.3
+        version: '4.5'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 4.3.3
+        version: '4.5'
 email: andrew@ankane.org
 executables: []
 extensions:
@@ -64,6 +64,10 @@ files:
 - lib/torch-rb.rb
 - lib/torch.rb
 - lib/torch/device.rb
+- lib/torch/distributions/distribution.rb
+- lib/torch/distributions/exponential_family.rb
+- lib/torch/distributions/normal.rb
+- lib/torch/distributions/utils.rb
 - lib/torch/hub.rb
 - lib/torch/inspector.rb
 - lib/torch/nn/adaptive_avg_pool1d.rb
@@ -237,7 +241,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.2
+rubygems_version: 3.6.7
 specification_version: 4
 summary: Deep learning for Ruby, powered by LibTorch
 test_files: []