RubyGems - torchcodec - Versions diffs - 0.1.0 → 0.1.1 - Mend

torchcodec 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +37 -1
data/ext/torchcodec/custom_ops.cpp +13 -2
data/lib/torchcodec/core/metadata.rb +29 -2
data/lib/torchcodec/decoders/decoder_utils.rb +5 -0
data/lib/torchcodec/decoders/video_decoder.rb +208 -0
data/lib/torchcodec/encoders/audio_encoder.rb +62 -0
data/lib/torchcodec/encoders/video_encoder.rb +65 -0
data/lib/torchcodec/transforms/decoder_transforms.rb +14 -0
data/lib/torchcodec/version.rb +1 -1
data/lib/torchcodec.rb +8 -0
metadata +5 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 292f24721c347356202f9f0c691dcb00bf4835208f7a9e8730964014731a3202
-  data.tar.gz: 9ae50e37cf15a1691aec3dddd07f46dc7a44a32f92ea0a90a99958341b4b5131
+  metadata.gz: 14bab54a5a5fd6d29ea5d09b11a85d53babcadd9a691201a0b07c759ed780216
+  data.tar.gz: dff1c8af2705ad7907d2c3b099248245d89382851cadb3bacad572e7052d15ae
 SHA512:
-  metadata.gz: a0d72c1d15a3f67dde0ee86807ad2c72e6bb6da541ec1ad9dfb2ddd1fca4ceb35d428bf279ef704b923a5f6faaccdb2aa6ec2b5b62542aec8d1a186e64788eba
-  data.tar.gz: 34d6865ce8da7c5a0d451633c9b8a4b4b42aafe55ad0c200e870b2e3809c83b3c872b40ba829d68fcacddaeac0de88fd206322248621fb0e077dac8aacc833e9
+  metadata.gz: deffa4d9dbc71fb9ea9551efdf0092d304f757dc7fd5fae57b7c88671f78fbf9fafeca4454cb0b369a12d6ea43ff9c2b46cfc5ba27d2637325252bbab890d20e
+  data.tar.gz: 074d93f9852592f64dd09ac449f90f9db194fb72c7088f5fefab3190e0bcfef61f86732472733b970645cc7eddaf002924520fc933ee816952dbde1b3d66d2b9

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## 0.1.1 (2026-02-27)
+- Added `AudioEncoder` class
+- Added `VideoEncoder` and `VideoDecoder` classes
 ## 0.1.0 (2026-02-26)
 - First release

data/README.md CHANGED Viewed

@@ -20,12 +20,48 @@ gem "torchcodec"
 ## Getting Started
-This library follows the [Python API](https://meta-pytorch.org/torchcodec/). Most functionality is missing at the moment. PRs welcome!
+This library follows the [Python API](https://meta-pytorch.org/torchcodec/). Some functionality is missing at the moment. PRs welcome!
+### Audio
+Encoding
+```ruby
+encoder = TorchCodec::Encoders::AudioEncoder.new(samples, sample_rate: 8000)
+encoder.to_file("file.mp3")
+tensor = encoder.to_tensor("mp3")
+```
+Decoding
 ```ruby
 decoder = TorchCodec::Decoders::AudioDecoder.new("file.mp3")
 decoder.metadata
 decoder.get_all_samples
+decoder.get_samples_played_in_range(start_seconds: 0, stop_seconds: 1)
+```
+### Video
+Encoding
+```ruby
+encoder = TorchCodec::Encoders::VideoEncoder.new(frames, frame_rate: 24)
+encoder.to_file("file.mp4")
+tensor = encoder.to_tensor("mp4")
+```
+Decoding
+```ruby
+decoder = TorchCodec::Decoders::VideoDecoder.new("file.mp4")
+decoder.metadata
+decoder.get_frame_at(0)
+decoder.get_frames_at(Torch.tensor([0, 1, 2]))
+decoder.get_frames_in_range(0, 10, step: 3)
+decoder.get_frame_played_at(0)
+decoder.get_frames_played_at(Torch.tensor([0, 1, 2], dtype: :float64))
+decoder.get_frames_played_in_range(0, 10)
 ```
 ## FFmpeg Installation

data/ext/torchcodec/custom_ops.cpp CHANGED Viewed

@@ -906,8 +906,19 @@ void scan_all_streams_to_update_metadata(at::Tensor& decoder) {
 void init_core(Rice::Module m) {
   m
     .define_singleton_function("create_from_file", &facebook::torchcodec::create_from_file)
+    .define_singleton_function("encode_audio_to_file", &facebook::torchcodec::encode_audio_to_file)
+    .define_singleton_function("encode_audio_to_tensor", &facebook::torchcodec::encode_audio_to_tensor)
+    .define_singleton_function("encode_video_to_file", &facebook::torchcodec::encode_video_to_file)
+    .define_singleton_function("encode_video_to_tensor", &facebook::torchcodec::encode_video_to_tensor)
+    .define_singleton_function("add_video_stream", &facebook::torchcodec::add_video_stream)
+    .define_singleton_function("add_audio_stream", &facebook::torchcodec::add_audio_stream)
     .define_singleton_function("_get_container_json_metadata", &facebook::torchcodec::get_container_json_metadata)
     .define_singleton_function("_get_stream_json_metadata", &facebook::torchcodec::get_stream_json_metadata)
-    .define_singleton_function("add_audio_stream", &facebook::torchcodec::add_audio_stream)
-    .define_singleton_function("get_frames_by_pts_in_range_audio", &facebook::torchcodec::get_frames_by_pts_in_range_audio);
+    .define_singleton_function("get_frame_at_pts", &facebook::torchcodec::get_frame_at_pts)
+    .define_singleton_function("get_frame_at_index", &facebook::torchcodec::get_frame_at_index)
+    .define_singleton_function("get_frames_at_indices", &facebook::torchcodec::get_frames_at_indices)
+    .define_singleton_function("get_frames_in_range", &facebook::torchcodec::get_frames_in_range)
+    .define_singleton_function("get_frames_by_pts_in_range", &facebook::torchcodec::get_frames_by_pts_in_range)
+    .define_singleton_function("get_frames_by_pts_in_range_audio", &facebook::torchcodec::get_frames_by_pts_in_range_audio)
+    .define_singleton_function("get_frames_by_pts", &facebook::torchcodec::get_frames_by_pts);
 }

data/lib/torchcodec/core/metadata.rb CHANGED Viewed

@@ -1,5 +1,16 @@
 module TorchCodec
   module Core
+    def self._get_optional_par_fraction(stream_dict)
+      begin
+        Rational(
+          stream_dict.fetch("sampleAspectRatioNum"),
+          stream_dict.fetch("sampleAspectRatioDen")
+        )
+      rescue KeyError
+        nil
+      end
+    end
     def self.get_container_metadata(decoder)
       container_dict = JSON.parse(_get_container_json_metadata(decoder))
       streams_metadata = []
@@ -16,7 +27,20 @@ module TorchCodec
           stream_index: stream_index
         }
         if stream_dict["mediaType"] == "video"
-          raise Todo
+          streams_metadata << {
+            begin_stream_seconds_from_content: stream_dict["beginStreamSecondsFromContent"],
+            end_stream_seconds_from_content: stream_dict["endStreamSecondsFromContent"],
+            end_stream_seconds: stream_dict["endStreamSeconds"],
+            num_frames: stream_dict["numFrames"],
+            average_fps: stream_dict["averageFps"],
+            width: stream_dict["width"],
+            height: stream_dict["height"],
+            num_frames_from_header: stream_dict["numFramesFromHeader"],
+            num_frames_from_content: stream_dict["numFramesFromContent"],
+            average_fps_from_header: stream_dict["averageFpsFromHeader"],
+            pixel_aspect_ratio: _get_optional_par_fraction(stream_dict),
+            **common_meta
+          }
         elsif stream_dict["mediaType"] == "audio"
           streams_metadata << {
             sample_rate: stream_dict["sampleRate"],
@@ -25,7 +49,10 @@ module TorchCodec
             **common_meta
           }
         else
-          raise Todo
+          # This is neither a video nor audio stream. Could be e.g. subtitles.
+          # We still need to add a dummy entry so that len(streams_metadata)
+          # is consistent with the number of streams.
+          streams_metadata << common_meta
         end
       end

data/lib/torchcodec/decoders/decoder_utils.rb CHANGED Viewed

@@ -7,5 +7,10 @@ module TorchCodec
         raise TypeError, "Unknown source type: #{source.class.name}"
       end
     end
+    def self._get_cuda_backend
+      # TODO improve
+      "ffmpeg"
+    end
   end
 end

data/lib/torchcodec/decoders/video_decoder.rb ADDED Viewed

@@ -0,0 +1,208 @@
+module TorchCodec
+  module Decoders
+    class VideoDecoder
+      attr_reader :metadata
+      def initialize(
+        source,
+        stream_index: nil,
+        dimension_order: "NCHW",
+        num_ffmpeg_threads: 1,
+        device: nil,
+        seek_mode: "exact",
+        transforms: nil,
+        custom_frame_mappings: nil
+      )
+        allowed_seek_modes = ["exact", "approximate"]
+        if !allowed_seek_modes.include?(seek_mode)
+          raise ArgumentError, "Invalid seek mode (#{seek_mode})."
+        end
+        # Validate seek_mode and custom_frame_mappings are not mismatched
+        if !custom_frame_mappings.nil? && seek_mode == "approximate"
+          raise ArgumentError,
+            "custom_frame_mappings is incompatible with seek_mode: 'approximate'. " +
+            "Use seek_mode: 'custom_frame_mappings' or leave it unspecified to automatically use custom frame mappings."
+        end
+        # Auto-select custom_frame_mappings seek_mode and process data when mappings are provided
+        custom_frame_mappings_data = nil
+        if !custom_frame_mappings.nil?
+          raise Todo
+        end
+        @decoder = Decoders.create_decoder(source, seek_mode)
+        (
+          @metadata,
+          @stream_index,
+          @begin_stream_seconds,
+          @end_stream_seconds,
+          @num_frames
+        ) = _get_and_validate_stream_metadata(
+          decoder: @decoder, stream_index: stream_index
+        )
+        allowed_dimension_orders = ["NCHW", "NHWC"]
+        if !allowed_dimension_orders.include?(dimension_order)
+          raise ArgumentError, "Invalid dimension order (#{dimension_order})."
+        end
+        if num_ffmpeg_threads.nil?
+          raise ArgumentError, "#{num_ffmpeg_threads} should be an int."
+        end
+        if device.nil?
+          device = "cpu" # TODO Torch.get_default_device.to_s
+        elsif device.is_a?(Torch::Device)
+          device = device.to_s
+        end
+        device_variant = Decoders._get_cuda_backend
+        transform_specs = Transforms._make_transform_specs(
+          transforms,
+          [@metadata[:height], @metadata[:width]]
+        )
+        Core.add_video_stream(
+          @decoder,
+          num_ffmpeg_threads,
+          dimension_order,
+          @stream_index,
+          device,
+          device_variant,
+          transform_specs,
+          custom_frame_mappings_data
+        )
+      end
+      def get_frame_at(index)
+        data, pts_seconds, duration_seconds = Core.get_frame_at_index(@decoder, index)
+        {
+          data: data,
+          pts_seconds: pts_seconds.item,
+          duration_seconds: duration_seconds.item
+        }
+      end
+      def get_frames_at(indices)
+        data, pts_seconds, duration_seconds = Core.get_frames_at_indices(@decoder, indices)
+        {
+          data: data,
+          pts_seconds: pts_seconds,
+          duration_seconds: duration_seconds
+        }
+      end
+      def get_frames_in_range(start, stop, step: 1)
+        frames = Core.get_frames_in_range(
+          @decoder,
+          start,
+          stop,
+          step
+        )
+        {
+          data: frames[0],
+          pts_seconds: frames[1],
+          duration_seconds: frames[2]
+        }
+      end
+      def get_frame_played_at(seconds)
+        if !(@begin_stream_seconds <= seconds && seconds < @end_stream_seconds)
+          raise IndexError, "Invalid pts in seconds: #{seconds}."
+        end
+        data, pts_seconds, duration_seconds = Core.get_frame_at_pts(
+          @decoder, seconds
+        )
+        {
+          data: data,
+          pts_seconds: pts_seconds.item,
+          duration_seconds: duration_seconds.item
+        }
+      end
+      def get_frames_played_at(seconds)
+        data, pts_seconds, duration_seconds = Core.get_frames_by_pts(
+          @decoder, seconds
+        )
+        {
+          data: data,
+          pts_seconds: pts_seconds,
+          duration_seconds: duration_seconds
+        }
+      end
+      def get_frames_played_in_range(start_seconds, stop_seconds)
+        if !(start_seconds <= stop_seconds)
+          raise ArgumentError, "Invalid start seconds: #{start_seconds}. It must be less than or equal to stop seconds (#{stop_seconds})."
+        end
+        if !(@begin_stream_seconds <= start_seconds && start_seconds < @end_stream_seconds)
+          raise ArgumentError, "Invalid start seconds: #{start_seconds}."
+        end
+        if !(stop_seconds <= @end_stream_seconds)
+          raise ArgumentError, "Invalid stop seconds: #{stop_seconds}."
+        end
+        frames = Core.get_frames_by_pts_in_range(
+          @decoder,
+          start_seconds,
+          stop_seconds
+        )
+        {
+          data: frames[0],
+          pts_seconds: frames[1],
+          duration_seconds: frames[2]
+        }
+      end
+      private
+      def _get_and_validate_stream_metadata(
+        decoder:,
+        stream_index: nil
+      )
+        container_metadata = Core.get_container_metadata(decoder)
+        if stream_index.nil?
+          if (stream_index = container_metadata[:best_video_stream_index]).nil?
+            raise ArgumentError, "The best video stream is unknown and there is no specified stream."
+          end
+        end
+        if stream_index >= container_metadata[:streams].length
+          raise ArgumentError, "The stream index #{stream_index} is not a valid stream."
+        end
+        metadata = container_metadata[:streams][stream_index]
+        if !metadata.key?(:begin_stream_seconds_from_content)
+          raise ArgumentError, "The stream at index #{stream_index} is not a video stream."
+        end
+        if metadata[:begin_stream_seconds].nil?
+          raise ArgumentError, "The minimum pts value in seconds is unknown."
+        end
+        begin_stream_seconds = metadata[:begin_stream_seconds]
+        if metadata[:end_stream_seconds].nil?
+          raise ArgumentError, "The maximum pts value in seconds is unknown."
+        end
+        end_stream_seconds = metadata[:end_stream_seconds]
+        if metadata[:num_frames].nil?
+          raise ArgumentError, "The number of frames is unknown."
+        end
+        num_frames = metadata[:num_frames]
+        [
+          metadata,
+          stream_index,
+          begin_stream_seconds,
+          end_stream_seconds,
+          num_frames
+        ]
+      end
+    end
+  end
+end

data/lib/torchcodec/encoders/audio_encoder.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module TorchCodec
+  module Encoders
+    class AudioEncoder
+      def initialize(samples, sample_rate:)
+        # Some of these checks are also done in C++: it's OK, they're cheap, and
+        # doing them here allows to surface them when the AudioEncoder is
+        # instantiated, rather than later when the encoding methods are called.
+        if !samples.is_a?(Torch::Tensor)
+          raise ArgumentError, "Expected samples to be a Tensor, got #{samples.class.name}."
+        end
+        if samples.ndim == 1
+          # make it 2D and assume 1 channel
+          samples = Torch.unsqueeze(samples, 0)
+        end
+        if samples.ndim != 2
+          raise ArgumentError, "Expected 1D or 2D samples, got #{samples.shape}."
+        end
+        if samples.dtype != Torch.float32
+          raise ArgumentError, "Expected float32 samples, got #{samples.dtype}."
+        end
+        if sample_rate <= 0
+          raise ArgumentError, "#{sample_rate} must be > 0."
+        end
+        @samples = samples
+        @sample_rate = sample_rate
+      end
+      def to_file(
+        dest,
+        bit_rate: nil,
+        num_channels: nil,
+        sample_rate: nil
+      )
+        Core.encode_audio_to_file(
+          @samples,
+          @sample_rate,
+          dest.to_s,
+          bit_rate,
+          num_channels,
+          sample_rate
+        )
+      end
+      def to_tensor(
+        format,
+        bit_rate: nil,
+        num_channels: nil,
+        sample_rate: nil
+      )
+        Core.encode_audio_to_tensor(
+          @samples,
+          @sample_rate,
+          format,
+          bit_rate,
+          num_channels,
+          sample_rate
+        )
+      end
+    end
+  end
+end

data/lib/torchcodec/encoders/video_encoder.rb ADDED Viewed

@@ -0,0 +1,65 @@
+module TorchCodec
+  module Encoders
+    class VideoEncoder
+      def initialize(frames, frame_rate:)
+        if !frames.is_a?(Torch::Tensor)
+          raise ArgumentError, "Expected frames to be a Tensor, got #{frames.class.name}."
+        end
+        if frames.ndim != 4
+          raise ArgumentError, "Expected 4D frames, got #{frames.shape}."
+        end
+        if frames.dtype != Torch.uint8
+          raise ArgumentError, "Expected uint8 frames, got #{frames.dtype}."
+        end
+        if frame_rate <= 0
+          raise ArgumentError, "#{frame_rate} must be > 0."
+        end
+        @frames = frames
+        @frame_rate = frame_rate
+      end
+      def to_file(
+        dest,
+        codec: nil,
+        pixel_format: nil,
+        crf: nil,
+        preset: nil,
+        extra_options: nil
+      )
+        preset = preset.is_a?(Integer) ? preset.to_s : preset
+        Core.encode_video_to_file(
+          @frames,
+          @frame_rate,
+          dest.to_s,
+          codec,
+          pixel_format,
+          crf,
+          preset,
+          extra_options
+        )
+      end
+      def to_tensor(
+        format,
+        codec: nil,
+        pixel_format: nil,
+        crf: nil,
+        preset: nil,
+        extra_options: nil
+      )
+        preset_value = preset.is_a?(Integer) ? preset.to_s : preset
+        Core.encode_video_to_tensor(
+          @frames,
+          @frame_rate,
+          format,
+          codec,
+          pixel_format,
+          crf,
+          preset_value,
+          extra_options
+        )
+      end
+    end
+  end
+end

data/lib/torchcodec/transforms/decoder_transforms.rb ADDED Viewed

@@ -0,0 +1,14 @@
+module TorchCodec
+  module Transforms
+    def self._make_transform_specs(
+      transforms,
+      input_dims
+    )
+      if transforms.nil?
+        return ""
+      end
+      raise Todo
+    end
+  end
+end

data/lib/torchcodec/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TorchCodec
-  VERSION = "0.1.0"
+  VERSION = "0.1.1"
 end

data/lib/torchcodec.rb CHANGED Viewed

@@ -16,6 +16,14 @@ require_relative "torchcodec/core/metadata"
 # decoders
 require_relative "torchcodec/decoders/audio_decoder"
 require_relative "torchcodec/decoders/decoder_utils"
+require_relative "torchcodec/decoders/video_decoder"
+# encoders
+require_relative "torchcodec/encoders/audio_encoder"
+require_relative "torchcodec/encoders/video_encoder"
+# transforms
+require_relative "torchcodec/transforms/decoder_transforms"
 module TorchCodec
   class Error < StandardError; end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: torchcodec
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Andrew Kane
@@ -65,6 +65,10 @@ files:
 - lib/torchcodec/core/metadata.rb
 - lib/torchcodec/decoders/audio_decoder.rb
 - lib/torchcodec/decoders/decoder_utils.rb
+- lib/torchcodec/decoders/video_decoder.rb
+- lib/torchcodec/encoders/audio_encoder.rb
+- lib/torchcodec/encoders/video_encoder.rb
+- lib/torchcodec/transforms/decoder_transforms.rb
 - lib/torchcodec/version.rb
 homepage: https://github.com/ankane/torchcodec-ruby
 licenses: