RubyGems - classifier - Versions diffs - 2.1.0 → 2.2.0 - Mend

classifier 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.md +66 -199
data/ext/classifier/classifier_ext.c +1 -0
data/ext/classifier/incremental_svd.c +393 -0
data/ext/classifier/linalg.h +8 -0
data/lib/classifier/bayes.rb +177 -53
data/lib/classifier/errors.rb +3 -0
data/lib/classifier/knn.rb +351 -0
data/lib/classifier/logistic_regression.rb +571 -0
data/lib/classifier/lsi/incremental_svd.rb +166 -0
data/lib/classifier/lsi/summary.rb +25 -5
data/lib/classifier/lsi.rb +365 -17
data/lib/classifier/streaming/line_reader.rb +99 -0
data/lib/classifier/streaming/progress.rb +96 -0
data/lib/classifier/streaming.rb +122 -0
data/lib/classifier/tfidf.rb +408 -0
data/lib/classifier.rb +4 -0
data/sig/vendor/matrix.rbs +25 -14
data/sig/vendor/streaming.rbs +14 -0
metadata +17 -4

data/lib/classifier/lsi.rb CHANGED Viewed

@@ -58,6 +58,7 @@ require 'mutex_m'
 require 'classifier/lsi/word_list'
 require 'classifier/lsi/content_node'
 require 'classifier/lsi/summary'
+require 'classifier/lsi/incremental_svd'
 module Classifier
   # This class implements a Latent Semantic Indexer, which can search, classify and cluster
@@ -65,6 +66,7 @@ module Classifier
   # please consult Wikipedia[http://en.wikipedia.org/wiki/Latent_Semantic_Indexing].
   class LSI
     include Mutex_m
+    include Streaming
     # @rbs @auto_rebuild: bool
     # @rbs @word_list: WordList
@@ -74,14 +76,24 @@ module Classifier
     # @rbs @singular_values: Array[Float]?
     # @rbs @dirty: bool
     # @rbs @storage: Storage::Base?
+    # @rbs @incremental_mode: bool
+    # @rbs @u_matrix: Matrix?
+    # @rbs @max_rank: Integer
+    # @rbs @initial_vocab_size: Integer?
     attr_reader :word_list, :singular_values
     attr_accessor :auto_rebuild, :storage
+    # Default maximum rank for incremental SVD
+    DEFAULT_MAX_RANK = 100
     # Create a fresh index.
     # If you want to call #build_index manually, use
     #      Classifier::LSI.new auto_rebuild: false
     #
+    # For incremental SVD mode (adds documents without full rebuild):
+    #      Classifier::LSI.new incremental: true, max_rank: 100
+    #
     # @rbs (?Hash[Symbol, untyped]) -> void
     def initialize(options = {})
       super()
@@ -92,6 +104,12 @@ module Classifier
       @built_at_version = -1
       @dirty = false
       @storage = nil
+      # Incremental SVD settings
+      @incremental_mode = options[:incremental] == true
+      @max_rank = options[:max_rank] || DEFAULT_MAX_RANK
+      @u_matrix = nil
+      @initial_vocab_size = nil
     end
     # Returns true if the index needs to be rebuilt.  The index needs
@@ -122,12 +140,73 @@ module Classifier
       end
     end
+    # Returns true if incremental mode is enabled and active.
+    # Incremental mode becomes active after the first build_index call.
+    #
+    # @rbs () -> bool
+    def incremental_enabled?
+      @incremental_mode && !@u_matrix.nil?
+    end
+    # Returns the current rank of the incremental SVD (number of singular values kept).
+    # Returns nil if incremental mode is not active.
+    #
+    # @rbs () -> Integer?
+    def current_rank
+      @singular_values&.count(&:positive?)
+    end
+    # Disables incremental mode. Subsequent adds will trigger full rebuilds.
+    #
+    # @rbs () -> void
+    def disable_incremental_mode!
+      @incremental_mode = false
+      @u_matrix = nil
+      @initial_vocab_size = nil
+    end
+    # Enables incremental mode with optional max_rank setting.
+    # The next build_index call will store the U matrix for incremental updates.
+    #
+    # @rbs (?max_rank: Integer) -> void
+    def enable_incremental_mode!(max_rank: DEFAULT_MAX_RANK)
+      @incremental_mode = true
+      @max_rank = max_rank
+    end
+    # Adds items to the index using hash-style syntax.
+    # The hash keys are categories, and values are items (or arrays of items).
+    #
+    # For example:
+    #   lsi = Classifier::LSI.new
+    #   lsi.add("Dog" => "Dogs are loyal pets")
+    #   lsi.add("Cat" => "Cats are independent")
+    #   lsi.add(Bird: "Birds can fly")  # Symbol keys work too
+    #
+    # Multiple items with the same category:
+    #   lsi.add("Dog" => ["Dogs are loyal", "Puppies are cute"])
+    #
+    # Batch operations with multiple categories:
+    #   lsi.add(
+    #     "Dog" => ["Dogs are loyal", "Puppies are cute"],
+    #     "Cat" => ["Cats are independent", "Kittens are playful"]
+    #   )
+    #
+    # @rbs (**untyped items) -> void
+    def add(**items)
+      items.each do |category, value|
+        Array(value).each { |doc| add_item(doc, category.to_s) }
+      end
+    end
     # Adds an item to the index. item is assumed to be a string, but
     # any item may be indexed so long as it responds to #to_s or if
     # you provide an optional block explaining how the indexer can
     # fetch fresh string data. This optional block is passed the item,
     # so the item may only be a reference to a URL or file name.
     #
+    # @deprecated Use {#add} instead for clearer hash-style syntax.
+    #
     # For example:
     #   lsi = Classifier::LSI.new
     #   lsi.add_item "This is just plain text"
@@ -138,11 +217,18 @@ module Classifier
     # @rbs (String, *String | Symbol) ?{ (String) -> String } -> void
     def add_item(item, *categories, &block)
       clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
+      node = nil
       synchronize do
-        @items[item] = ContentNode.new(clean_word_hash, *categories)
+        node = ContentNode.new(clean_word_hash, *categories)
+        @items[item] = node
         @version += 1
         @dirty = true
       end
+      # Use incremental update if enabled and we have a U matrix
+      return perform_incremental_update(node, clean_word_hash) if @incremental_mode && @u_matrix
       build_index if @auto_rebuild
     end
@@ -203,12 +289,12 @@ module Classifier
     # A value of 1 for cutoff means that no semantic analysis will take place,
     # turning the LSI class into a simple vector search engine.
     #
-    # @rbs (?Float) -> void
-    def build_index(cutoff = 0.75)
+    # @rbs (?Float, ?force: bool) -> void
+    def build_index(cutoff = 0.75, force: false)
       validate_cutoff!(cutoff)
       synchronize do
-        return unless needs_rebuild_unlocked?
+        return unless force || needs_rebuild_unlocked?
         make_word_list
@@ -219,14 +305,20 @@ module Classifier
           # Convert vectors to arrays for matrix construction
           tda_arrays = tda.map { |v| v.respond_to?(:to_a) ? v.to_a : v }
           tdm = self.class.matrix_class.alloc(*tda_arrays).trans
-          ntdm = build_reduced_matrix(tdm, cutoff)
+          ntdm, u_mat = build_reduced_matrix_with_u(tdm, cutoff)
           assign_native_ext_lsi_vectors(ntdm, doc_list)
         else
           tdm = Matrix.rows(tda).trans
-          ntdm = build_reduced_matrix(tdm, cutoff)
+          ntdm, u_mat = build_reduced_matrix_with_u(tdm, cutoff)
           assign_ruby_lsi_vectors(ntdm, doc_list)
         end
+        # Store U matrix for incremental mode
+        if @incremental_mode
+          @u_matrix = u_mat
+          @initial_vocab_size = @word_list.size
+        end
         @built_at_version = @version
       end
     end
@@ -532,6 +624,100 @@ module Classifier
       from_json(File.read(path))
     end
+    # Loads an LSI index from a checkpoint.
+    #
+    # @rbs (storage: Storage::Base, checkpoint_id: String) -> LSI
+    def self.load_checkpoint(storage:, checkpoint_id:)
+      raise ArgumentError, 'Storage must be File storage for checkpoints' unless storage.is_a?(Storage::File)
+      dir = File.dirname(storage.path)
+      base = File.basename(storage.path, '.*')
+      ext = File.extname(storage.path)
+      checkpoint_path = File.join(dir, "#{base}_checkpoint_#{checkpoint_id}#{ext}")
+      checkpoint_storage = Storage::File.new(path: checkpoint_path)
+      instance = load(storage: checkpoint_storage)
+      instance.storage = storage
+      instance
+    end
+    # Trains the LSI index from an IO stream.
+    # Each line in the stream is treated as a separate document.
+    # Documents are added without rebuilding, then the index is rebuilt at the end.
+    #
+    # @example Train from a file
+    #   lsi.train_from_stream(:category, File.open('corpus.txt'))
+    #
+    # @example With progress tracking
+    #   lsi.train_from_stream(:category, io, batch_size: 500) do |progress|
+    #     puts "#{progress.completed} documents processed"
+    #   end
+    #
+    # @rbs (String | Symbol, IO, ?batch_size: Integer) { (Streaming::Progress) -> void } -> void
+    def train_from_stream(category, io, batch_size: Streaming::DEFAULT_BATCH_SIZE)
+      original_auto_rebuild = @auto_rebuild
+      @auto_rebuild = false
+      begin
+        reader = Streaming::LineReader.new(io, batch_size: batch_size)
+        total = reader.estimate_line_count
+        progress = Streaming::Progress.new(total: total)
+        reader.each_batch do |batch|
+          batch.each { |text| add_item(text, category) }
+          progress.completed += batch.size
+          progress.current_batch += 1
+          yield progress if block_given?
+        end
+      ensure
+        @auto_rebuild = original_auto_rebuild
+        build_index if original_auto_rebuild
+      end
+    end
+    # Adds items to the index in batches from an array.
+    # Documents are added without rebuilding, then the index is rebuilt at the end.
+    #
+    # @example Batch add with progress
+    #   lsi.add_batch(Dog: documents, batch_size: 100) do |progress|
+    #     puts "#{progress.percent}% complete"
+    #   end
+    #
+    # @rbs (?batch_size: Integer, **Array[String]) { (Streaming::Progress) -> void } -> void
+    def add_batch(batch_size: Streaming::DEFAULT_BATCH_SIZE, **items)
+      original_auto_rebuild = @auto_rebuild
+      @auto_rebuild = false
+      begin
+        total_docs = items.values.sum { |v| Array(v).size }
+        progress = Streaming::Progress.new(total: total_docs)
+        items.each do |category, documents|
+          Array(documents).each_slice(batch_size) do |batch|
+            batch.each { |doc| add_item(doc, category.to_s) }
+            progress.completed += batch.size
+            progress.current_batch += 1
+            yield progress if block_given?
+          end
+        end
+      ensure
+        @auto_rebuild = original_auto_rebuild
+        build_index if original_auto_rebuild
+      end
+    end
+    # Alias train_batch to add_batch for API consistency with other classifiers.
+    # Note: LSI uses categories differently (items have categories, not the training call).
+    #
+    # @rbs (?(String | Symbol)?, ?Array[String]?, ?batch_size: Integer, **Array[String]) { (Streaming::Progress) -> void } -> void
+    def train_batch(category = nil, documents = nil, batch_size: Streaming::DEFAULT_BATCH_SIZE, **categories, &block)
+      if category && documents
+        add_batch(batch_size: batch_size, **{ category.to_sym => documents }, &block)
+      else
+        add_batch(batch_size: batch_size, **categories, &block)
+      end
+    end
     private
     # Restores LSI state from a JSON string (used by reload)
@@ -602,6 +788,7 @@ module Classifier
     # @rbs (String) ?{ (String) -> String } -> Array[[String, Float]]
     def proximity_array_for_content_unlocked(doc, &)
       return [] if needs_rebuild_unlocked?
+      return @items.keys.map { |item| [item, 1.0] } if @items.size == 1
       content_node = node_for_content_unlocked(doc, &)
       result =
@@ -651,7 +838,7 @@ module Classifier
       votes
     end
-    # Unlocked version of node_for_content for internal use
+    # Unlocked version of node_for_content for internal use.
     # @rbs (String) ?{ (String) -> String } -> ContentNode
     def node_for_content_unlocked(item, &block)
       return @items[item] if @items[item]
@@ -659,31 +846,68 @@ module Classifier
       clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
       cn = ContentNode.new(clean_word_hash, &block)
       cn.raw_vector_with(@word_list) unless needs_rebuild_unlocked?
+      assign_lsi_vector_incremental(cn) if incremental_enabled?
       cn
     end
     # @rbs (untyped, ?Float) -> untyped
     def build_reduced_matrix(matrix, cutoff = 0.75)
-      # TODO: Check that M>=N on these dimensions! Transpose helps assure this
-      u, v, s = matrix.SV_decomp
+      result, _u = build_reduced_matrix_with_u(matrix, cutoff)
+      result
+    end
-      @singular_values = s.sort.reverse
+    # Builds reduced matrix and returns both the result and the U matrix.
+    # U matrix is needed for incremental SVD updates.
+    # @rbs (untyped, ?Float) -> [untyped, Matrix]
+    def build_reduced_matrix_with_u(matrix, cutoff = 0.75)
+      u, v, s = matrix.SV_decomp
+      all_singular_values = s.sort.reverse
       s_cutoff_index = [(s.size * cutoff).round - 1, 0].max
-      s_cutoff = @singular_values[s_cutoff_index]
+      s_cutoff = all_singular_values[s_cutoff_index]
+      kept_indices = []
+      kept_singular_values = []
       s.size.times do |ord|
-        s[ord] = 0.0 if s[ord] < s_cutoff
+        if s[ord] >= s_cutoff
+          kept_indices << ord
+          kept_singular_values << s[ord]
+        else
+          s[ord] = 0.0
+        end
       end
-      # Reconstruct the term document matrix, only with reduced rank
-      result = u * self.class.matrix_class.diag(s) * v.trans
-      # SVD may return transposed dimensions when row_size < column_size
-      # Ensure result matches input dimensions
+      @singular_values = kept_singular_values.sort.reverse
+      result = u * self.class.matrix_class.diag(s) * v.trans
       result = result.trans if result.row_size != matrix.row_size
+      u_reduced = extract_reduced_u(u, kept_indices, s)
-      result
+      [result, u_reduced]
     end
+    # Extracts columns from U corresponding to kept singular values.
+    # Columns are sorted by descending singular value to match @singular_values order.
+    # rubocop:disable Naming/MethodParameterName
+    # @rbs (untyped, Array[Integer], Array[Float]) -> Matrix
+    def extract_reduced_u(u, kept_indices, singular_values)
+      return Matrix.empty(u.row_size, 0) if kept_indices.empty?
+      sorted_indices = kept_indices.sort_by { |i| -singular_values[i] }
+      if u.respond_to?(:to_ruby_matrix)
+        u = u.to_ruby_matrix
+      elsif !u.is_a?(::Matrix)
+        rows = u.row_size.times.map do |i|
+          sorted_indices.map { |j| u[i, j] }
+        end
+        return Matrix.rows(rows)
+      end
+      cols = sorted_indices.map { |i| u.column(i).to_a }
+      Matrix.columns(cols)
+    end
+    # rubocop:enable Naming/MethodParameterName
     # @rbs () -> void
     def make_word_list
       @word_list = WordList.new
@@ -691,5 +915,129 @@ module Classifier
         node.word_hash.each_key { |key| @word_list.add_word key }
       end
     end
+    # Performs incremental SVD update for a new document.
+    # @rbs (ContentNode, Hash[Symbol, Integer]) -> void
+    def perform_incremental_update(node, word_hash)
+      needs_full_rebuild = false
+      old_rank = nil
+      synchronize do
+        if vocabulary_growth_exceeds_threshold?(word_hash)
+          disable_incremental_mode!
+          needs_full_rebuild = true
+          next
+        end
+        old_rank = @u_matrix.column_size
+        extend_vocabulary_for_incremental(word_hash)
+        raw_vec = node.raw_vector_with(@word_list)
+        raw_vector = Vector[*raw_vec.to_a]
+        @u_matrix, @singular_values = IncrementalSVD.update(
+          @u_matrix, @singular_values, raw_vector, max_rank: @max_rank
+        )
+        new_rank = @u_matrix.column_size
+        if new_rank > old_rank
+          reproject_all_documents
+        else
+          assign_lsi_vector_incremental(node)
+        end
+        @built_at_version = @version
+      end
+      build_index if needs_full_rebuild
+    end
+    # Checks if vocabulary growth would exceed threshold (20%)
+    # @rbs (Hash[Symbol, Integer]) -> bool
+    def vocabulary_growth_exceeds_threshold?(word_hash)
+      return false unless @initial_vocab_size&.positive?
+      new_words = word_hash.keys.count { |w| @word_list[w].nil? }
+      growth_ratio = new_words.to_f / @initial_vocab_size
+      growth_ratio > 0.2
+    end
+    # Extends vocabulary and U matrix for new words.
+    # @rbs (Hash[Symbol, Integer]) -> void
+    def extend_vocabulary_for_incremental(word_hash)
+      new_words = word_hash.keys.select { |w| @word_list[w].nil? }
+      return if new_words.empty?
+      new_words.each { |word| @word_list.add_word(word) }
+      extend_u_matrix(new_words.size)
+    end
+    # Extends U matrix with zero rows for new vocabulary terms.
+    # @rbs (Integer) -> void
+    def extend_u_matrix(num_new_rows)
+      return if num_new_rows.zero? || @u_matrix.nil?
+      if self.class.native_available? && @u_matrix.is_a?(self.class.matrix_class)
+        new_rows = self.class.matrix_class.zeros(num_new_rows, @u_matrix.column_size)
+        @u_matrix = self.class.matrix_class.vstack(@u_matrix, new_rows)
+      else
+        new_rows = Matrix.zero(num_new_rows, @u_matrix.column_size)
+        @u_matrix = Matrix.vstack(@u_matrix, new_rows)
+      end
+    end
+    # Re-projects all documents onto the current U matrix
+    # Called when rank grows to ensure consistent LSI vector sizes
+    # Uses native batch_project for performance when available
+    # @rbs () -> void
+    def reproject_all_documents
+      return unless @u_matrix
+      return reproject_all_documents_native if self.class.native_available? && @u_matrix.respond_to?(:batch_project)
+      reproject_all_documents_ruby
+    end
+    # Native batch re-projection using C extension.
+    # @rbs () -> void
+    def reproject_all_documents_native
+      nodes = @items.values
+      raw_vectors = nodes.map do |node|
+        raw = node.raw_vector_with(@word_list)
+        raw.is_a?(self.class.vector_class) ? raw : self.class.vector_class.alloc(raw.to_a)
+      end
+      lsi_vectors = @u_matrix.batch_project(raw_vectors)
+      nodes.each_with_index do |node, i|
+        lsi_vec = lsi_vectors[i].row
+        node.lsi_vector = lsi_vec
+        node.lsi_norm = lsi_vec.normalize
+      end
+    end
+    # Pure Ruby re-projection (fallback)
+    # @rbs () -> void
+    def reproject_all_documents_ruby
+      @items.each_value do |node|
+        assign_lsi_vector_incremental(node)
+      end
+    end
+    # Assigns LSI vector to a node using projection: lsi_vec = U^T * raw_vec.
+    # @rbs (ContentNode) -> void
+    def assign_lsi_vector_incremental(node)
+      return unless @u_matrix
+      raw_vec = node.raw_vector_with(@word_list)
+      raw_vector = Vector[*raw_vec.to_a]
+      lsi_arr = (@u_matrix.transpose * raw_vector).to_a
+      lsi_vec = if self.class.native_available?
+                  self.class.vector_class.alloc(lsi_arr).row
+                else
+                  Vector[*lsi_arr]
+                end
+      node.lsi_vector = lsi_vec
+      node.lsi_norm = lsi_vec.normalize
+    end
   end
 end

data/lib/classifier/streaming/line_reader.rb ADDED Viewed

@@ -0,0 +1,99 @@
+# rbs_inline: enabled
+module Classifier
+  module Streaming
+    # Memory-efficient line reader for large files and IO streams.
+    # Reads lines one at a time and can yield in configurable batches.
+    #
+    # @example Reading line by line
+    #   reader = LineReader.new(File.open('large_corpus.txt'))
+    #   reader.each { |line| process(line) }
+    #
+    # @example Reading in batches
+    #   reader = LineReader.new(io, batch_size: 100)
+    #   reader.each_batch { |batch| process_batch(batch) }
+    class LineReader
+      include Enumerable #[String]
+      # @rbs @io: IO
+      # @rbs @batch_size: Integer
+      attr_reader :batch_size
+      # Creates a new LineReader.
+      #
+      # @rbs (IO, ?batch_size: Integer) -> void
+      def initialize(io, batch_size: 100)
+        @io = io
+        @batch_size = batch_size
+      end
+      # Iterates over each line in the IO stream.
+      # Lines are chomped (trailing newlines removed).
+      #
+      # @rbs () { (String) -> void } -> void
+      # @rbs () -> Enumerator[String, void]
+      def each
+        return enum_for(:each) unless block_given?
+        @io.each_line do |line|
+          yield line.chomp
+        end
+      end
+      # Iterates over batches of lines.
+      # Each batch is an array of chomped lines.
+      #
+      # @rbs () { (Array[String]) -> void } -> void
+      # @rbs () -> Enumerator[Array[String], void]
+      def each_batch
+        return enum_for(:each_batch) unless block_given?
+        batch = [] #: Array[String]
+        each do |line|
+          batch << line
+          if batch.size >= @batch_size
+            yield batch
+            batch = []
+          end
+        end
+        yield batch unless batch.empty?
+      end
+      # Estimates the total number of lines in the IO stream.
+      # This is a rough estimate based on file size and average line length.
+      # Returns nil for non-seekable streams.
+      #
+      # @rbs (?sample_size: Integer) -> Integer?
+      def estimate_line_count(sample_size: 100)
+        return nil unless @io.respond_to?(:size) && @io.respond_to?(:rewind)
+        begin
+          original_pos = @io.pos
+          @io.rewind
+          sample_bytes = 0
+          sample_lines = 0
+          sample_size.times do
+            line = @io.gets
+            break unless line
+            sample_bytes += line.bytesize
+            sample_lines += 1
+          end
+          @io.seek(original_pos)
+          return nil if sample_lines.zero?
+          avg_line_size = sample_bytes.to_f / sample_lines
+          io_size = @io.__send__(:size) #: Integer
+          (io_size / avg_line_size).round
+        rescue IOError, Errno::ESPIPE
+          nil
+        end
+      end
+    end
+  end
+end

data/lib/classifier/streaming/progress.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# rbs_inline: enabled
+module Classifier
+  module Streaming
+    # Progress tracking object yielded to blocks during batch/stream operations.
+    # Provides information about training progress including completion percentage,
+    # elapsed time, processing rate, and estimated time remaining.
+    #
+    # @example Basic usage with train_batch
+    #   classifier.train_batch(:spam, documents, batch_size: 100) do |progress|
+    #     puts "#{progress.completed}/#{progress.total} (#{progress.percent}%)"
+    #     puts "Rate: #{progress.rate.round(1)} docs/sec"
+    #     puts "ETA: #{progress.eta&.round}s" if progress.eta
+    #   end
+    class Progress
+      # @rbs @completed: Integer
+      # @rbs @total: Integer?
+      # @rbs @start_time: Time
+      # @rbs @current_batch: Integer
+      attr_reader :start_time, :total
+      attr_accessor :completed, :current_batch
+      # @rbs (?total: Integer?, ?completed: Integer) -> void
+      def initialize(total: nil, completed: 0)
+        @completed = completed
+        @total = total
+        @start_time = Time.now
+        @current_batch = 0
+      end
+      # Returns the completion percentage (0-100).
+      # Returns nil if total is unknown.
+      #
+      # @rbs () -> Float?
+      def percent
+        return nil unless @total&.positive?
+        (@completed.to_f / @total * 100).round(2)
+      end
+      # Returns the elapsed time in seconds since the operation started.
+      #
+      # @rbs () -> Float
+      def elapsed
+        Time.now - @start_time
+      end
+      # Returns the processing rate in items per second.
+      # Returns 0 if no time has elapsed.
+      #
+      # @rbs () -> Float
+      def rate
+        e = elapsed
+        return 0.0 if e.zero?
+        @completed / e
+      end
+      # Returns the estimated time remaining in seconds.
+      # Returns nil if total is unknown or rate is zero.
+      #
+      # @rbs () -> Float?
+      def eta
+        return nil unless @total
+        return nil if rate.zero?
+        return 0.0 if @completed >= @total
+        (@total - @completed) / rate
+      end
+      # Returns true if the operation is complete.
+      #
+      # @rbs () -> bool
+      def complete?
+        return false unless @total
+        @completed >= @total
+      end
+      # Returns a hash representation of the progress state.
+      #
+      # @rbs () -> Hash[Symbol, untyped]
+      def to_h
+        {
+          completed: @completed,
+          total: @total,
+          percent: percent,
+          elapsed: elapsed.round(2),
+          rate: rate.round(2),
+          eta: eta&.round(2)
+        }
+      end
+    end
+  end
+end