RubyGems - torch-rb - Versions diffs - 0.18.0 → 0.19.0 - Mend

torch-rb 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/README.md +2 -1
data/codegen/generate_functions.rb +3 -1
data/codegen/native_functions.yaml +121 -27
data/ext/torch/device.cpp +6 -1
data/ext/torch/ext.cpp +1 -1
data/ext/torch/tensor.cpp +2 -4
data/ext/torch/torch.cpp +7 -12
data/ext/torch/utils.h +1 -1
data/lib/torch/device.rb +25 -0
data/lib/torch/tensor.rb +5 -0
data/lib/torch/version.rb +1 -1
data/lib/torch.rb +6 -1
metadata +4 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0f044a5934fa8a296fe407fd79d3b9ac7e5e582b110df6e024ce01fe88d4dbd6
-  data.tar.gz: ac662c6a27bba6c173f9631b16e8832d24cfb72af9b4d845780b9c717404f923
+  metadata.gz: 03164cc479d8f8a32f0669d597e8fe5310d91955e6954cfdc0fffdc8983c5768
+  data.tar.gz: 87fc733016b6f4489b38a419a3879cacbdb1e190cfaa5c02397aceb57c012d16
 SHA512:
-  metadata.gz: 8da9c0bb9d466a81483a31b63d8c7670d14025bc364d70461e9a26264ac0567ae10af7ef464d43d6cfc4e00bca5701e846093bc6651ded5c84ef22e6652a59a2
-  data.tar.gz: 4420bf303e8ef0ed9b96c607eead13b33aa9f323d0422015868b93d28a1ccd1dd9999053792c04d8810e9dcc3ff8f4594c99587b033653031882f3d1c25310ff
+  metadata.gz: 6ba0480138a10ba43dff625dc1bcf99e2287f238dc4607ea6813e82914f1e335133f55408fa59579343b161c54850316f744c594cf6687b7f2de64a0d71746d1
+  data.tar.gz: 859015641dd14bf919a7982c6673acb296f858518552b8c924fc7e59b9c1b2a9491aa598c01b019b392c8c2bba7b9f65ff0923f838e6cbde7ddaede9c4b69191

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,10 @@
+## 0.19.0 (2025-01-29)
+- Updated LibTorch to 2.6.0
+- Improved `inspect` for `Device`
+- Fixed equality for `Device`
+- Fixed `index` method for `Device` when no index
 ## 0.18.0 (2024-10-22)
 - Updated LibTorch to 2.5.0

data/README.md CHANGED Viewed

@@ -17,7 +17,7 @@ Check out:
 First, [download LibTorch](https://pytorch.org/get-started/locally/). For Mac arm64, use:
 ```sh
-curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.5.0.zip > libtorch.zip
+curl -L https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.6.0.zip > libtorch.zip
 unzip -q libtorch.zip
 ```
@@ -413,6 +413,7 @@ Here’s the list of compatible versions.
 Torch.rb | LibTorch
 --- | ---
+0.19.x | 2.6.x
 0.18.x | 2.5.x
 0.17.x | 2.4.x
 0.16.x | 2.3.x

data/codegen/generate_functions.rb CHANGED Viewed

@@ -53,7 +53,9 @@ def skip_functions(functions)
     f.base_name == "sym_size" ||
     f.base_name == "sym_numel" ||
     f.base_name == "sym_storage_offset" ||
-    f.base_name == "sym_stride"
+    f.base_name == "sym_stride" ||
+    # TODO fix LibTorch 2.6 changes
+    f.base_name == "rrelu_with_noise"
   end
 end

data/codegen/native_functions.yaml CHANGED Viewed

@@ -187,7 +187,10 @@
   dispatch:
     CPU: _functional_assert_async_msg_cpu
-- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
+- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None, *, Device? device=None, Layout? layout=None) -> ()
+  dispatch:
+    CompositeExplicitAutograd: _assert_tensor_metadata
+    Meta: _assert_tensor_metadata_meta_symint
 - func: _print(str s) -> ()
   dispatch:
@@ -309,25 +312,25 @@
 - func: _shape_as_tensor(Tensor self) -> Tensor
 - func: dropout(Tensor input, float p, bool train) -> Tensor
-  tags: nondeterministic_seeded
+  tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
 - func: dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
   tags: nondeterministic_seeded
 - func: feature_dropout(Tensor input, float p, bool train) -> Tensor
-  tags: nondeterministic_seeded
+  tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
 - func: feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
   tags: nondeterministic_seeded
 - func: alpha_dropout(Tensor input, float p, bool train) -> Tensor
-  tags: nondeterministic_seeded
+  tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
 - func: alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
   tags: nondeterministic_seeded
 - func: feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor
-  tags: nondeterministic_seeded
+  tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
 - func: feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
   tags: nondeterministic_seeded
@@ -477,7 +480,7 @@
 - func: conj_physical(Tensor self) -> Tensor
   variants: function, method
-  tags: pointwise
+  tags: [pointwise, maybe_aliasing_or_mutating]
 - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -641,6 +644,7 @@
     CPU: addmv_out_cpu
     CUDA: addmv_out_cuda
     MPS: addmv_out_mps
+    XPU: addmv_out_xpu
     SparseCsrCPU: addmv_out_sparse_compressed
     SparseCsrCUDA: addmv_out_sparse_compressed_cuda
@@ -1031,17 +1035,20 @@
 - func: atleast_1d(Tensor self) -> Tensor
   variants: function
+  tags: maybe_aliasing_or_mutating
 - func: atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]
 - func: atleast_2d(Tensor self) -> Tensor
   variants: function
+  tags: maybe_aliasing_or_mutating
 - func: atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]
   variants: function
 - func: atleast_3d(Tensor self) -> Tensor
   variants: function
+  tags: maybe_aliasing_or_mutating
 - func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]
   variants: function
@@ -1061,6 +1068,7 @@
     CPU: baddbmm_out_cpu
     CUDA: baddbmm_out_cuda
     MPS: baddbmm_out_mps
+    XPU: baddbmm_out_xpu
     SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
 - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -1074,6 +1082,7 @@
   autogen: bartlett_window.periodic_out
 - func: batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
+  tags: maybe_aliasing_or_mutating
 - func: quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor
   dispatch:
@@ -1081,6 +1090,7 @@
   autogen: quantized_batch_norm.out
 - func: _batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)
+  tags: maybe_aliasing_or_mutating
 - func: _batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)
@@ -1358,6 +1368,7 @@
     CPU: bmm_out_cpu
     CUDA: bmm_out_cuda
     MPS: bmm_out_mps
+    XPU: bmm_out_xpu
     SparseCPU: bmm_out_sparse_cpu
     SparseCUDA: bmm_out_sparse_cuda
     SparseCsrCUDA: bmm_out_sparse_csr_cuda
@@ -1462,6 +1473,7 @@
   variants: function, method
   device_check: NoCheck
   device_guard: False
+  tags: maybe_aliasing_or_mutating
 - func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
   variants: function, method
@@ -1788,7 +1800,7 @@
   variants: function, method
   structured_delegate: cos.out
   dispatch:
-    NestedTensorCPU, NestedTensorCUDA: cos_nested
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
   tags: [core, pointwise]
 - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -2821,6 +2833,7 @@
     # non-differentiable so NonFunctional doesn't apply
     CompositeExplicitAutograd: full_like
   autogen: full_like.out
+  tags: core
 - func: from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   dispatch:
@@ -3179,6 +3192,7 @@
   device_guard: False
   dispatch:
     CPU, CUDA, MPS: isnan
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
     SparseCPU, SparseCUDA: isnan_sparse
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
   autogen: isnan.out
@@ -3289,7 +3303,9 @@
   autogen: native_layer_norm_backward.out
   tags: core
-- func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
+- func: rms_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
+  dispatch:
+    CompositeImplicitAutograd: rms_norm_symint
 - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
   variants: function, method
@@ -3355,9 +3371,10 @@
   dispatch:
     CUDA: _cslt_compress
-- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
+- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, bool split_k_one_kernel=True) -> Tensor
   dispatch:
     CUDA: _cslt_sparse_mm
+  tags: needs_fixed_stride_order
 - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
   dispatch:
@@ -4126,6 +4143,7 @@
     CPU: mm_out_cpu
     CUDA: mm_out_cuda
     MPS: mm_out_mps
+    XPU: mm_out_xpu
     SparseCPU, SparseCUDA: _sparse_mm_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
@@ -4141,16 +4159,24 @@
 - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
   dispatch:
-    CPU: _convert_weight_to_int4pack_cpu
     CUDA: _convert_weight_to_int4pack_cuda
     MPS: _convert_weight_to_int4pack_mps
 - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
   dispatch:
-    CPU: _weight_int4pack_mm_cpu
     MPS: _weight_int4pack_mm_mps
     CUDA: _weight_int4pack_mm_cuda
+# Split int4 pack weight between cpu and other devices due to
+# https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
+- func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
+  dispatch:
+    CPU: _convert_weight_to_int4pack_cpu
+- func: _weight_int4pack_mm_for_cpu(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
+  dispatch:
+    CPU: _weight_int4pack_mm_cpu
 - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
   dispatch:
     CPU: _weight_int8pack_mm_cpu
@@ -4585,6 +4611,7 @@
     CompositeExplicitAutograd: rad2deg
     SparseCPU, SparseCUDA: rad2deg_sparse
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
+  tags: pointwise
 - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
@@ -4592,12 +4619,14 @@
     CompositeExplicitAutograd: rad2deg_
     SparseCPU, SparseCUDA: rad2deg_sparse_
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
+  tags: pointwise
 - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CompositeExplicitAutograd: rad2deg_out
     SparseCPU, SparseCUDA: rad2deg_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
+  tags: pointwise
 - func: deg2rad(Tensor self) -> Tensor
   variants: function, method
@@ -4990,7 +5019,7 @@
 - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
   device_check: NoCheck   # TensorIterator
-  tags: nondeterministic_seeded
+  tags: [pointwise, nondeterministic_seeded]
 - func: rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
   tags: nondeterministic_seeded
@@ -5027,6 +5056,7 @@
 - func: relu6(Tensor self) -> Tensor
   python_module: nn
+  tags: pointwise
 - func: relu6_(Tensor(a!) self) -> Tensor(a!)
   python_module: nn
@@ -5111,6 +5141,7 @@
   structured_delegate: hardshrink.out
   device_check: NoCheck   # TensorIterator
   variants: function, method
+  tags: pointwise
 - func: hardshrink_backward.grad_input(Tensor grad_out, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
   structured: True
@@ -5175,6 +5206,7 @@
 - func: selu(Tensor self) -> Tensor
   device_check: NoCheck   # TensorIterator
+  tags: pointwise
 - func: selu_(Tensor(a!) self) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
@@ -5183,6 +5215,7 @@
   device_check: NoCheck   # TensorIterator
   dispatch:
     CompositeExplicitAutograd: celu
+  tags: pointwise
 - func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
@@ -5233,6 +5266,7 @@
 - func: mish(Tensor self) -> Tensor
   structured_delegate: mish.out
   python_module: nn
+  tags: pointwise
 - func: mish_(Tensor(a!) self) -> Tensor(a!)
   structured_delegate: mish.out
@@ -5305,7 +5339,7 @@
   dispatch:
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
     SparseCPU, SparseCUDA: sin_sparse
-    NestedTensorCPU, NestedTensorCUDA: sin_nested
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
   tags: [core, pointwise]
 - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5803,6 +5837,7 @@
   structured_delegate: sqrt.out
   variants: function, method
   dispatch:
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
     SparseCPU, SparseCUDA: sqrt_sparse
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
   tags: [core, pointwise]
@@ -6032,6 +6067,7 @@
   structured_delegate: threshold.out
   dispatch:
     QuantizedCPU: threshold_quantized_cpu
+  tags: pointwise
 - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
@@ -6486,6 +6522,7 @@
   device_check: NoCheck   # TensorIterator
   dispatch:
     CPU, CUDA, MPS: where_self_out
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
 - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
   variants: function
@@ -6988,6 +7025,7 @@
     CPU: addmm_out_cpu
     CUDA: addmm_out_cuda
     MPS: addmm_out_mps
+    XPU: addmm_out_xpu
     SparseCPU: addmm_out_sparse_dense_cpu
     SparseCUDA: addmm_out_sparse_dense_cuda
     SparseCsrCPU: addmm_out_sparse_compressed_cpu
@@ -7016,6 +7054,7 @@
   dispatch:
     CPU: addmm_activation_out_cpu
     CUDA: addmm_activation_out_cuda
+    XPU: addmm_activation_out_xpu
 - func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
   structured_delegate: _addmm_activation.out
@@ -7732,6 +7771,7 @@
 - func: cartesian_prod(Tensor[] tensors) -> Tensor
   variants: function
+  tags: maybe_aliasing_or_mutating
 - func: combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor
   variants: function
@@ -8013,6 +8053,7 @@
   variants: function, method
   dispatch:
     CompositeExplicitAutograd: masked_scatter
+  tags: core
 - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
   dispatch:
@@ -8247,7 +8288,7 @@
   structured: True
   variants: function
   dispatch:
-    CPU, CUDA: scatter_reduce_two
+    CPU, CUDA, MPS: scatter_reduce_two
 - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   structured_delegate: eq.Scalar_out
@@ -8649,18 +8690,18 @@
 - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   variants: method
   dispatch:
-    CPU, CUDA: addbmm_
+    CPU, CUDA, XPU: addbmm_
     MPS: addbmm_mps_
 - func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU, CUDA: addbmm_out
+    CPU, CUDA, XPU: addbmm_out
     MPS: addbmm_out_mps
 - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   variants: method, function
   dispatch:
-    CPU, CUDA: addbmm
+    CPU, CUDA, XPU: addbmm
     MPS: addbmm_mps
 - func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
@@ -8774,12 +8815,14 @@
   dispatch:
     CPU: tril_indices_cpu
     CUDA: tril_indices_cuda
+    MPS: tril_indices_mps
   autogen: tril_indices.out
 - func: triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   dispatch:
     CPU: triu_indices_cpu
     CUDA: triu_indices_cuda
+    MPS: triu_indices_mps
   autogen: triu_indices.out
 - func: trace(Tensor self) -> Tensor
@@ -9234,11 +9277,13 @@
 - func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: nonzero_static_out_cpu
+    CUDA: nonzero_static_out_cuda
 - func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
   variants: method, function
   dispatch:
     CPU: nonzero_static_cpu
+    CUDA: nonzero_static_cuda
 - func: nonzero_numpy(Tensor self) -> Tensor[]
   variants: method, function
@@ -9577,7 +9622,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: i0_out
+    CPU, CUDA, MPS: i0_out
   tags: pointwise
 - func: sign(Tensor self) -> Tensor
@@ -10153,7 +10198,7 @@
 - func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
   variants: function
   dispatch:
-    CPU, CUDA: unfold_backward
+    CPU, CUDA, MPS: unfold_backward
   autogen: unfold_backward.out
 - func: equal(Tensor self, Tensor other) -> bool
@@ -11083,6 +11128,22 @@
     CUDA: foreach_tensor_lerp_list_cuda_
   autogen: _foreach_lerp.Scalar_out
+- func: _foreach_lerp.ScalarList(Tensor[] self, Tensor[] tensors1, Scalar[] weight) -> Tensor[]
+  device_check: NoCheck   # foreach kernels fall back to slow path when tensors are on different devices
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow
+    CUDA: foreach_tensor_lerp_scalarlist_cuda
+  autogen: _foreach_lerp.ScalarList_out
+- func: _foreach_lerp_.ScalarList(Tensor(a!)[] self, Tensor[] tensors1, Scalar[] weight) -> ()
+  device_check: NoCheck   # foreach kernels fall back to slow path when tensors are on different devices
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow_
+    CUDA: foreach_tensor_lerp_scalarlist_cuda_
+  autogen: _foreach_lerp.ScalarList_out
 - func: _foreach_lgamma(Tensor[] self) -> Tensor[]
   device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
   variants: function
@@ -11271,6 +11332,21 @@
     CUDA: foreach_tensor_round_cuda_
   autogen: _foreach_round.out
+- func: _foreach_rsqrt(Tensor[] self) -> Tensor[]
+  device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: foreach_tensor_rsqrt_slow
+    CUDA: foreach_tensor_rsqrt_cuda
+- func: _foreach_rsqrt_(Tensor(a!)[] self) -> ()
+  device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: foreach_tensor_rsqrt_slow_
+    CUDA: foreach_tensor_rsqrt_cuda_
+  autogen: _foreach_rsqrt.out
 - func: _foreach_sigmoid(Tensor[] self) -> Tensor[]
   device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
   variants: function
@@ -11714,6 +11790,7 @@
   structured_delegate: elu.out
   device_check: NoCheck   # TensorIterator
   python_module: nn
+  tags: pointwise
 - func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
   structured: True
@@ -11787,6 +11864,7 @@
   python_module: nn
   dispatch:
     QuantizedCPU: hardsigmoid_quantized_cpu
+  tags: pointwise
 - func: hardsigmoid_(Tensor(a!) self) -> Tensor(a!)
   structured_delegate: hardsigmoid.out
@@ -11818,7 +11896,7 @@
   dispatch:
     CPU, CUDA, MPS: hardtanh
     QuantizedCPU: hardtanh_quantized_cpu
-  tags: core
+  tags: [pointwise, core]
 - func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -11942,19 +12020,20 @@
     CUDA: log_sigmoid_backward_cuda
     MPS: log_sigmoid_backward_mps
-- func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
+- func: rrelu_with_noise.out(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
   tags: nondeterministic_seeded
   dispatch:
     CPU: rrelu_with_noise_out_cpu
     CUDA: rrelu_with_noise_out_cuda
-- func: rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
+- func: rrelu_with_noise(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
   python_module: nn
   dispatch:
     CPU: rrelu_with_noise_cpu
     CUDA: rrelu_with_noise_cuda
   tags: nondeterministic_seeded
+  autogen: rrelu_with_noise_functional
 - func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
   python_module: nn
@@ -11962,7 +12041,7 @@
     CompositeExplicitAutograd: rrelu_with_noise_backward
   autogen: rrelu_with_noise_backward.out
-- func: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
+- func: rrelu_with_noise_(Tensor(a!) self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
   python_module: nn
   tags: nondeterministic_seeded
   dispatch:
@@ -11982,6 +12061,7 @@
   structured_delegate: softplus.out
   device_check: NoCheck   # TensorIterator
   python_module: nn
+  tags: pointwise
 - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
   structured: True
@@ -12008,6 +12088,7 @@
   structured_delegate: softshrink.out
   device_check: NoCheck   # TensorIterator
   python_module: nn
+  tags: pointwise
 - func: softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
   structured: True
@@ -12652,6 +12733,7 @@
   dispatch:
     CPU: upsample_bicubic2d_out_cpu
     CUDA: upsample_bicubic2d_out_cuda
+    MPS: upsample_bicubic2d_out_mps
 - func: upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
   python_module: nn
@@ -12663,6 +12745,7 @@
   dispatch:
     CPU: upsample_bicubic2d_backward_out_cpu
     CUDA: upsample_bicubic2d_backward_out_cuda
+    MPS: upsample_bicubic2d_backward_out_mps
 - func: upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
   python_module: nn
@@ -13043,17 +13126,20 @@
   dispatch:
     CPU: im2col_out_cpu
     CUDA: im2col_out_cuda
+    MPS: im2col_out_mps
 - func: im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
   python_module: nn
   dispatch:
     CPU: im2col_cpu
     CUDA: im2col_cuda
+    MPS: im2col_mps
 - func: isfinite(Tensor self) -> Tensor
   variants: function, method
   device_check: NoCheck
   device_guard: False
+  tags: pointwise
 - func: isinf(Tensor self) -> Tensor
   variants: function, method
@@ -13061,6 +13147,7 @@
   device_guard: False
   dispatch:
     CompositeExplicitAutograd: isinf
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
     SparseCPU, SparseCUDA: isinf_sparse
     SparseMeta: isinf_sparse_meta
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
@@ -13076,6 +13163,7 @@
   variants: function, method
   structured_delegate: isposinf.out
   dispatch:
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
     SparseCPU, SparseCUDA: isposinf_sparse
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
   tags: pointwise
@@ -13084,7 +13172,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: isposinf_out
+    CPU, CUDA, MPS: isposinf_out
     SparseCPU, SparseCUDA: isposinf_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
   tags: pointwise
@@ -13093,6 +13181,7 @@
   variants: function, method
   structured_delegate: isneginf.out
   dispatch:
+    NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
     SparseCPU, SparseCUDA: isneginf_sparse
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
   tags: pointwise
@@ -13101,7 +13190,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: isneginf_out
+    CPU, CUDA, MPS: isneginf_out
     SparseCPU, SparseCUDA: isneginf_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
   tags: pointwise
@@ -13114,7 +13203,7 @@
   variants: function
 # See NOTE [_add_batch_dim and _remove_batch_dim]
-- func: _remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor
+- func: _remove_batch_dim(Tensor self, int level, SymInt batch_size, int out_dim) -> Tensor
   variants: function
 ## Functions related to the `torch.special` namespace
@@ -13414,7 +13503,7 @@
   structured: True
   structured_inherits: TensorIteratorBase
   dispatch:
-    CPU, CUDA: special_i1_out
+    CPU, CUDA, MPS: special_i1_out
   tags: pointwise
 - func: special_i1e(Tensor self) -> Tensor
@@ -14706,6 +14795,11 @@
     CUDA: _fbgemm_dense_to_jagged_forward_symint
     CPU: _padded_dense_to_jagged_forward_cpu
+- func: _nested_from_padded_tensor(Tensor padded, Tensor offsets, Tensor dummy, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None, SymInt? sum_S=None) -> Tensor
+  variants: function
+  device_check: NoCheck
+  dispatch: {}
 - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
   dispatch:
     NestedTensorCPU: NestedTensor_softmax_dropout

data/ext/torch/device.cpp CHANGED Viewed

@@ -8,7 +8,7 @@ void init_device(Rice::Module& m) {
   Rice::define_class_under<torch::Device>(m, "Device")
     .define_constructor(Rice::Constructor<torch::Device, const std::string&>())
     .define_method(
-      "index",
+      "_index",
       [](torch::Device& self) {
         return self.index();
       })
@@ -23,5 +23,10 @@ void init_device(Rice::Module& m) {
         std::stringstream s;
         s << self.type();
         return s.str();
+      })
+    .define_method(
+      "_str",
+      [](torch::Device& self) {
+        return self.str();
       });
 }

data/ext/torch/ext.cpp CHANGED Viewed

@@ -31,6 +31,7 @@ void Init_ext()
   // keep this order
   init_torch(m);
+  init_device(m);
   init_tensor(m, rb_cTensor, rb_cTensorOptions);
   init_nn(m);
   init_fft(m);
@@ -39,7 +40,6 @@ void Init_ext()
   init_backends(m);
   init_cuda(m);
-  init_device(m);
   init_generator(m, rb_cGenerator);
   init_ivalue(m, rb_cIValue);
   init_random(m);

data/ext/torch/tensor.cpp CHANGED Viewed

@@ -212,11 +212,9 @@ void init_tensor(Rice::Module& m, Rice::Class& c, Rice::Class& rb_cTensorOptions
         return s.str();
       })
     .define_method(
-      "device",
+      "_device",
       [](Tensor& self) {
-        std::stringstream s;
-        s << self.device();
-        return s.str();
+        return self.device();
       })
     .define_method(
       "_data_str",

data/ext/torch/torch.cpp CHANGED Viewed

@@ -9,19 +9,14 @@
 #include "utils.h"
 template<typename T>
-torch::Tensor make_tensor(Rice::Array a, std::vector<int64_t> size, const torch::TensorOptions &options) {
+torch::Tensor make_tensor(Rice::Array a, const std::vector<int64_t> &size, const torch::TensorOptions &options) {
   std::vector<T> vec;
+  vec.reserve(a.size());
   for (long i = 0; i < a.size(); i++) {
     vec.push_back(Rice::detail::From_Ruby<T>().convert(a[i].value()));
   }
-  // hack for requires_grad error
-  auto requires_grad = options.requires_grad();
-  torch::Tensor t = torch::tensor(vec, options.requires_grad(c10::nullopt));
-  if (requires_grad) {
-    t.set_requires_grad(true);
-  }
+  torch::Tensor t = torch::tensor(vec, options);
   return t.reshape(size);
 }
@@ -46,12 +41,12 @@ void init_torch(Rice::Module& m) {
     // config
     .define_singleton_function(
       "show_config",
-      [] {
+      []() {
         return torch::show_config();
       })
     .define_singleton_function(
       "parallel_info",
-      [] {
+      []() {
         return torch::get_parallel_info();
       })
     // begin operations
@@ -74,13 +69,13 @@ void init_torch(Rice::Module& m) {
       })
     .define_singleton_function(
       "_from_blob",
-      [](Rice::String s, std::vector<int64_t> size, const torch::TensorOptions &options) {
+      [](Rice::String s, const std::vector<int64_t> &size, const torch::TensorOptions &options) {
         void *data = const_cast<char *>(s.c_str());
         return torch::from_blob(data, size, options);
       })
     .define_singleton_function(
       "_tensor",
-      [](Rice::Array a, std::vector<int64_t> size, const torch::TensorOptions &options) {
+      [](Rice::Array a, const std::vector<int64_t> &size, const torch::TensorOptions &options) {
         auto dtype = options.dtype();
         if (dtype == torch::kByte) {
           return make_tensor<uint8_t>(a, size, options);

data/ext/torch/utils.h CHANGED Viewed

@@ -6,7 +6,7 @@
 #include <rice/stl.hpp>
 static_assert(
-  TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 5,
+  TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR == 6,
   "Incompatible LibTorch version"
 );

data/lib/torch/device.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Torch
+  class Device
+    def index
+      index? ? _index : nil
+    end
+    def inspect
+      extra = ", index: #{index.inspect}" if index?
+      "device(type: #{type.inspect}#{extra})"
+    end
+    alias_method :to_s, :inspect
+    def ==(other)
+      eql?(other)
+    end
+    def eql?(other)
+      other.is_a?(Device) && other.type == type && other.index == index
+    end
+    def hash
+      [type, index].hash
+    end
+  end
+end

data/lib/torch/tensor.rb CHANGED Viewed

@@ -209,5 +209,10 @@ module Torch
         raise TypeError, "#{self.class} can't be coerced into #{other.class}"
       end
     end
+    # TODO return Device instead of String in 0.19.0
+    def device
+      _device._str
+    end
   end
 end

data/lib/torch/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Torch
-  VERSION = "0.18.0"
+  VERSION = "0.19.0"
 end

data/lib/torch.rb CHANGED Viewed

@@ -8,6 +8,7 @@ require "set"
 require "tmpdir"
 # modules
+require_relative "torch/device"
 require_relative "torch/inspector"
 require_relative "torch/tensor"
 require_relative "torch/version"
@@ -382,7 +383,11 @@ module Torch
     alias_method :set_grad_enabled, :grad_enabled
     def device(str)
-      Device.new(str)
+      if str.is_a?(Device)
+        str
+      else
+        Device.new(str)
+      end
     end
     def save(obj, f)

metadata CHANGED Viewed

@@ -1,14 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: torch-rb
 version: !ruby/object:Gem::Version
-  version: 0.18.0
+  version: 0.19.0
 platform: ruby
 authors:
 - Andrew Kane
-autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-22 00:00:00.000000000 Z
+date: 2025-01-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -24,7 +23,6 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 4.3.3
-description:
 email: andrew@ankane.org
 executables: []
 extensions:
@@ -65,6 +63,7 @@ files:
 - ext/torch/wrap_outputs.h
 - lib/torch-rb.rb
 - lib/torch.rb
+- lib/torch/device.rb
 - lib/torch/hub.rb
 - lib/torch/inspector.rb
 - lib/torch/nn/adaptive_avg_pool1d.rb
@@ -224,7 +223,6 @@ homepage: https://github.com/ankane/torch.rb
 licenses:
 - BSD-3-Clause
 metadata: {}
-post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -239,8 +237,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.16
-signing_key:
+rubygems_version: 3.6.2
 specification_version: 4
 summary: Deep learning for Ruby, powered by LibTorch
 test_files: []