RubyGems - rubyllm-semantic_router - Versions diffs - 0.1.0 → 0.4.0 - Mend

rubyllm-semantic_router 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/.gitignore +4 -0
data/ARCHITECTURE.md +329 -0
data/CHANGELOG.md +98 -0
data/CONTRIBUTING.md +103 -0
data/Gemfile.lock +10 -10
data/README.md +136 -179
data/lib/rubyllm/semantic_router/configuration.rb +101 -5
data/lib/rubyllm/semantic_router/embedding_cache.rb +74 -0
data/lib/rubyllm/semantic_router/errors.rb +7 -0
data/lib/rubyllm/semantic_router/router.rb +178 -32
data/lib/rubyllm/semantic_router/strategies/semantic.rb +11 -13
data/lib/rubyllm/semantic_router/utils.rb +51 -0
data/lib/rubyllm/semantic_router/version.rb +1 -1
data/lib/rubyllm/semantic_router.rb +2 -0
metadata +7 -2

data/lib/rubyllm/semantic_router/router.rb CHANGED Viewed

@@ -10,6 +10,7 @@ module RubyLLM
       :fallback,
       :default_agent,
       :scope,
+      :max_words,
       keyword_init: true
     )
@@ -37,7 +38,7 @@ module RubyLLM
     #   router.ask("What laptops do you have?")
     #
     class Router
-      attr_reader :agents, :current_agent, :last_routing_decision
+      attr_reader :agents, :current_agent, :last_routing_decision, :embedding_cache
       # In-memory routing example for non-Rails usage
       InMemoryExample = Struct.new(:agent_name, :example_text, :embedding, keyword_init: true)
@@ -52,7 +53,12 @@ module RubyLLM
         scope: nil,
         strategy: nil,
         examples: nil,
-        find_examples: nil
+        find_examples: nil,
+        max_words: nil,
+        logger: nil,
+        cache_ttl: nil,
+        max_retries: nil,
+        retry_base_delay: nil
       )
         @agents = normalize_agents(agents)
         @default_agent = default_agent.to_sym
@@ -64,15 +70,28 @@ module RubyLLM
         validate_default_agent!
+        global_config = SemanticRouter.configuration || Configuration.new
+        @logger = logger || global_config.logger
+        @max_retries = max_retries || global_config.max_retries
+        @retry_base_delay = retry_base_delay || global_config.retry_base_delay
+        # Set up embedding cache if TTL is configured
+        ttl = cache_ttl || global_config.cache_ttl
+        @embedding_cache = ttl ? EmbeddingCache.new(ttl: ttl) : nil
         @config = build_config(
           embedding_model: embedding_model,
           similarity_threshold: similarity_threshold,
           k_neighbors: k_neighbors,
-          fallback: fallback
+          fallback: fallback,
+          max_words: max_words
         )
         @chat = nil
         @last_routing_decision = nil
+        log(:debug, "Router initialized with agents: #{@agents.keys.join(', ')}")
       end
       # Send a message to the router and get a response
@@ -81,10 +100,19 @@ module RubyLLM
       # @yield [chunk] Optional block for streaming responses
       # @return [RubyLLM::Message] The response from the selected agent
       def ask(message, &block)
+        log(:debug, "Routing message: #{message[0..100]}...")
         @last_routing_decision = route(message)
+        log(:info, "Routed to :#{@last_routing_decision.agent} " \
+                   "(confidence: #{@last_routing_decision.confidence.round(3)}, " \
+                   "reason: #{@last_routing_decision.reason})")
         target_agent = @last_routing_decision.agent
-        switch_to(target_agent) if target_agent != @current_agent
+        if target_agent != @current_agent
+          log(:debug, "Switching from :#{@current_agent} to :#{target_agent}")
+          switch_to(target_agent)
+        end
         if @last_routing_decision.needs_clarification?
           inject_clarification_prompt
@@ -93,6 +121,36 @@ module RubyLLM
         current_chat.ask(message, &block)
       end
+      # Route multiple messages and return their routing decisions
+      # Useful for batch analysis or pre-routing without conversation
+      #
+      # @param messages [Array<String>] Messages to route
+      # @return [Array<RoutingDecision>] Routing decisions for each message
+      def ask_batch(messages)
+        log(:debug, "Batch routing #{messages.size} messages")
+        # Generate embeddings for all messages at once
+        truncated = messages.map { |m| truncate_to_max_words(m) }
+        embeddings = generate_embeddings_batch_with_retry(truncated)
+        # Route each message using its pre-computed embedding
+        messages.each_with_index.map do |message, i|
+          decision = @strategy.route(
+            message,
+            agents: @agents,
+            examples: scoped_examples,
+            current_agent: @current_agent,
+            config: @config,
+            find_examples: @find_examples,
+            precomputed_embedding: embeddings[i]
+          )
+          log(:debug, "Batch[#{i}] -> :#{decision.agent} (confidence: #{decision.confidence.round(3)})")
+          emit(:on_route, decision)
+          decision
+        end
+      end
       # Add a routing example
       #
       # @param text [String] Example user message
@@ -284,40 +342,83 @@ module RubyLLM
       end
       def generate_embedding(text)
-        response = RubyLLM.embed(text, model: @config.embedding_model)
-        vectors = response.vectors
-        # RubyLLM returns the vector directly for single inputs,
-        # or wrapped in an array for batch inputs
-        vectors.first.is_a?(Array) ? vectors.first : vectors
-      rescue StandardError => e
-        raise EmbeddingError, e
+        truncated = truncate_to_max_words(text)
+        # Check cache first
+        if @embedding_cache
+          cached = @embedding_cache.get(truncated)
+          if cached
+            log(:debug, "Cache hit for embedding")
+            return cached
+          end
+        end
+        embedding = generate_embedding_with_retry(truncated)
+        # Store in cache
+        @embedding_cache&.set(truncated, embedding)
+        embedding
+      end
+      def generate_embedding_with_retry(text)
+        attempts = 0
+        begin
+          attempts += 1
+          response = RubyLLM.embed(text, model: @config.embedding_model)
+          vectors = response.vectors
+          # RubyLLM returns the vector directly for single inputs,
+          # or wrapped in an array for batch inputs
+          vectors.first.is_a?(Array) ? vectors.first : vectors
+        rescue StandardError => e
+          if attempts <= @max_retries
+            delay = @retry_base_delay * (2**(attempts - 1))
+            log(:warn, "Embedding failed (attempt #{attempts}/#{@max_retries + 1}), retrying in #{delay}s: #{e.message}")
+            sleep(delay)
+            retry
+          end
+          log(:error, "Embedding failed after #{attempts} attempts: #{e.message}")
+          raise EmbeddingError, e
+        end
       end
       def generate_embeddings_batch(texts)
-        response = RubyLLM.embed(texts, model: @config.embedding_model)
-        vectors = response.vectors
-        # For batch, RubyLLM returns array of vectors
-        # But if single text was passed, it returns vector directly
-        vectors.first.is_a?(Array) ? vectors : [vectors]
-      rescue StandardError => e
-        raise EmbeddingError, e
+        truncated_texts = texts.map { |t| truncate_to_max_words(t) }
+        generate_embeddings_batch_with_retry(truncated_texts)
+      end
+      def generate_embeddings_batch_with_retry(truncated_texts)
+        attempts = 0
+        begin
+          attempts += 1
+          response = RubyLLM.embed(truncated_texts, model: @config.embedding_model)
+          vectors = response.vectors
+          # For batch, RubyLLM returns array of vectors
+          # But if single text was passed, it returns vector directly
+          vectors.first.is_a?(Array) ? vectors : [vectors]
+        rescue StandardError => e
+          if attempts <= @max_retries
+            delay = @retry_base_delay * (2**(attempts - 1))
+            log(:warn, "Batch embedding failed (attempt #{attempts}/#{@max_retries + 1}), retrying in #{delay}s: #{e.message}")
+            sleep(delay)
+            retry
+          end
+          log(:error, "Batch embedding failed after #{attempts} attempts: #{e.message}")
+          raise EmbeddingError, e
+        end
+      end
+      def truncate_to_max_words(text)
+        Utils.truncate_to_max_words(text, @config.max_words)
       end
       def find_nearest_in_memory(examples, query_embedding, k)
         examples.map do |example|
-          distance = cosine_distance(query_embedding, example.embedding)
+          distance = Utils.cosine_distance(query_embedding, example.embedding)
           Strategies::Semantic::InMemoryMatch.new(example, distance)
         end.sort_by(&:distance).first(k)
       end
-      def cosine_distance(a, b)
-        dot_product = a.zip(b).sum { |x, y| x * y }
-        magnitude_a = Math.sqrt(a.sum { |x| x**2 })
-        magnitude_b = Math.sqrt(b.sum { |x| x**2 })
-        return 1.0 if magnitude_a.zero? || magnitude_b.zero?
-        1.0 - (dot_product / (magnitude_a * magnitude_b))
-      end
       def extract_agent_name(match)
         match.respond_to?(:agent_name) ? match.agent_name : match.example&.agent_name
       end
@@ -440,23 +541,68 @@ module RubyLLM
         raise AgentNotFoundError.new(agent_name, @agents.keys)
       end
-      def build_config(embedding_model:, similarity_threshold:, k_neighbors:, fallback:)
+      def build_config(embedding_model:, similarity_threshold:, k_neighbors:, fallback:, max_words:)
         global_config = SemanticRouter.configuration || Configuration.new
+        # Use provided values or fall back to global config
+        threshold = similarity_threshold || global_config.default_similarity_threshold
+        neighbors = k_neighbors || global_config.default_k_neighbors
+        words = max_words || global_config.default_max_words
+        fb = fallback || global_config.default_fallback
+        # Validate router-specific overrides
+        validate_config_values!(
+          similarity_threshold: threshold,
+          k_neighbors: neighbors,
+          max_words: words,
+          fallback: fb
+        )
         RouterConfig.new(
           embedding_model: embedding_model || global_config.default_embedding_model,
-          similarity_threshold: similarity_threshold || global_config.default_similarity_threshold,
-          k_neighbors: k_neighbors || global_config.default_k_neighbors,
-          fallback: fallback || global_config.default_fallback,
+          similarity_threshold: threshold,
+          k_neighbors: neighbors,
+          fallback: fb,
           default_agent: @default_agent,
-          scope: @scope
+          scope: @scope,
+          max_words: words
         )
       end
+      def validate_config_values!(similarity_threshold:, k_neighbors:, max_words:, fallback:)
+        unless similarity_threshold.is_a?(Numeric) && similarity_threshold >= 0.0 && similarity_threshold <= 1.0
+          raise ConfigurationError, "similarity_threshold must be between 0.0 and 1.0, got: #{similarity_threshold.inspect}"
+        end
+        unless k_neighbors.is_a?(Integer) && k_neighbors.positive?
+          raise ConfigurationError, "k_neighbors must be a positive integer, got: #{k_neighbors.inspect}"
+        end
+        unless max_words.nil? || (max_words.is_a?(Integer) && max_words.positive?)
+          raise ConfigurationError, "max_words must be nil or a positive integer, got: #{max_words.inspect}"
+        end
+        valid_fallbacks = %i[default_agent keep_current ask_clarification]
+        unless valid_fallbacks.include?(fallback)
+          raise ConfigurationError, "fallback must be one of #{valid_fallbacks.join(', ')}, got: #{fallback.inspect}"
+        end
+      end
       def emit(event, *args)
         @callbacks ||= {}
         @callbacks[event]&.call(*args)
       end
+      def log(level, message)
+        return unless @logger
+        case level
+        when :debug then @logger.debug("[SemanticRouter] #{message}")
+        when :info then @logger.info("[SemanticRouter] #{message}")
+        when :warn then @logger.warn("[SemanticRouter] #{message}")
+        when :error then @logger.error("[SemanticRouter] #{message}")
+        end
+      end
     end
   end
 end

data/lib/rubyllm/semantic_router/strategies/semantic.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
       # 3. Routes to the agent associated with the best match
       # 4. Falls back if confidence is below threshold
       class Semantic < Base
-        def route(message, agents:, examples:, current_agent:, config:, find_examples: nil)
+        def route(message, agents:, examples:, current_agent:, config:, find_examples: nil, precomputed_embedding: nil)
           # If custom find_examples provided, use it
           # Otherwise, check if we have examples to search
           has_search = find_examples.respond_to?(:call) ||
@@ -25,8 +25,8 @@ module RubyLLM
             )
           end
-          # Generate embedding for the message
-          embedding = generate_embedding(message, config.embedding_model)
+          # Use precomputed embedding if provided (for batch operations), otherwise generate
+          embedding = precomputed_embedding || generate_embedding(message, config.embedding_model, max_words: config.max_words)
           # Find nearest neighbors using custom search or built-in
           matches = if find_examples.respond_to?(:call)
@@ -78,8 +78,9 @@ module RubyLLM
           end
         end
-        def generate_embedding(message, model)
-          response = RubyLLM.embed(message, model: model)
+        def generate_embedding(message, model, max_words: nil)
+          truncated = truncate_to_max_words(message, max_words)
+          response = RubyLLM.embed(truncated, model: model)
           vectors = response.vectors
           # RubyLLM returns vector directly for single input, array of vectors for batch
           vectors.first.is_a?(Array) ? vectors.first : vectors
@@ -87,6 +88,10 @@ module RubyLLM
           raise EmbeddingError, e
         end
+        def truncate_to_max_words(text, max_words)
+          Utils.truncate_to_max_words(text, max_words)
+        end
         def find_nearest_neighbors(examples, embedding, config)
           # Support both ActiveRecord (with neighbor gem) and in-memory arrays
           if examples.respond_to?(:nearest_neighbors)
@@ -125,14 +130,7 @@ module RubyLLM
         end
         def cosine_distance(a, b)
-          # Cosine distance = 1 - cosine similarity
-          dot_product = a.zip(b).sum { |x, y| x * y }
-          magnitude_a = Math.sqrt(a.sum { |x| x**2 })
-          magnitude_b = Math.sqrt(b.sum { |x| x**2 })
-          return 1.0 if magnitude_a.zero? || magnitude_b.zero?
-          1.0 - (dot_product / (magnitude_a * magnitude_b))
+          Utils.cosine_distance(a, b)
         end
         def calculate_confidence(match)

data/lib/rubyllm/semantic_router/utils.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module RubyLLM
+  module SemanticRouter
+    # Shared utility methods for semantic routing operations
+    module Utils
+      module_function
+      # Calculate cosine distance between two vectors
+      # Cosine distance = 1 - cosine similarity
+      # Returns value in range [0, 2] where 0 = identical, 2 = opposite
+      #
+      # @param a [Array<Numeric>] First vector
+      # @param b [Array<Numeric>] Second vector
+      # @return [Float] Cosine distance
+      def cosine_distance(a, b)
+        dot_product = a.zip(b).sum { |x, y| x * y }
+        magnitude_a = Math.sqrt(a.sum { |x| x**2 })
+        magnitude_b = Math.sqrt(b.sum { |x| x**2 })
+        return 1.0 if magnitude_a.zero? || magnitude_b.zero?
+        1.0 - (dot_product / (magnitude_a * magnitude_b))
+      end
+      # Calculate cosine similarity between two vectors
+      # Returns value in range [-1, 1] where 1 = identical, -1 = opposite
+      #
+      # @param a [Array<Numeric>] First vector
+      # @param b [Array<Numeric>] Second vector
+      # @return [Float] Cosine similarity
+      def cosine_similarity(a, b)
+        1.0 - cosine_distance(a, b)
+      end
+      # Truncate text to a maximum number of words
+      #
+      # @param text [String] Text to truncate
+      # @param max_words [Integer, nil] Maximum words (nil = no truncation)
+      # @return [String] Truncated text
+      def truncate_to_max_words(text, max_words)
+        return text unless max_words
+        words = text.split
+        return text if words.size <= max_words
+        words.first(max_words).join(" ")
+      end
+    end
+  end
+end

data/lib/rubyllm/semantic_router/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module RubyLLM
   module SemanticRouter
-    VERSION = "0.1.0"
+    VERSION = "0.4.0"
   end
 end

data/lib/rubyllm/semantic_router.rb CHANGED Viewed

@@ -14,7 +14,9 @@ end
 require_relative "semantic_router/version"
 require_relative "semantic_router/errors"
+require_relative "semantic_router/utils"
 require_relative "semantic_router/configuration"
+require_relative "semantic_router/embedding_cache"
 require_relative "semantic_router/routing_decision"
 require_relative "semantic_router/strategies/base"
 require_relative "semantic_router/strategies/semantic"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rubyllm-semantic_router
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.4.0
 platform: ruby
 authors:
 - Chris Hasiński
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-12-31 00:00:00.000000000 Z
+date: 2026-05-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby_llm
@@ -76,6 +76,9 @@ extra_rdoc_files: []
 files:
 - ".gitignore"
 - ".rspec"
+- ARCHITECTURE.md
+- CHANGELOG.md
+- CONTRIBUTING.md
 - Gemfile
 - Gemfile.lock
 - LICENSE.txt
@@ -85,11 +88,13 @@ files:
 - bin/setup
 - lib/rubyllm/semantic_router.rb
 - lib/rubyllm/semantic_router/configuration.rb
+- lib/rubyllm/semantic_router/embedding_cache.rb
 - lib/rubyllm/semantic_router/errors.rb
 - lib/rubyllm/semantic_router/router.rb
 - lib/rubyllm/semantic_router/routing_decision.rb
 - lib/rubyllm/semantic_router/strategies/base.rb
 - lib/rubyllm/semantic_router/strategies/semantic.rb
+- lib/rubyllm/semantic_router/utils.rb
 - lib/rubyllm/semantic_router/version.rb
 - mise.toml
 - rubyllm-semantic_router.gemspec