rubyllm-semantic_router 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  :fallback,
11
11
  :default_agent,
12
12
  :scope,
13
+ :max_words,
13
14
  keyword_init: true
14
15
  )
15
16
 
@@ -37,7 +38,7 @@ module RubyLLM
37
38
  # router.ask("What laptops do you have?")
38
39
  #
39
40
  class Router
40
- attr_reader :agents, :current_agent, :last_routing_decision
41
+ attr_reader :agents, :current_agent, :last_routing_decision, :embedding_cache
41
42
 
42
43
  # In-memory routing example for non-Rails usage
43
44
  InMemoryExample = Struct.new(:agent_name, :example_text, :embedding, keyword_init: true)
@@ -52,7 +53,12 @@ module RubyLLM
52
53
  scope: nil,
53
54
  strategy: nil,
54
55
  examples: nil,
55
- find_examples: nil
56
+ find_examples: nil,
57
+ max_words: nil,
58
+ logger: nil,
59
+ cache_ttl: nil,
60
+ max_retries: nil,
61
+ retry_base_delay: nil
56
62
  )
57
63
  @agents = normalize_agents(agents)
58
64
  @default_agent = default_agent.to_sym
@@ -64,15 +70,28 @@ module RubyLLM
64
70
 
65
71
  validate_default_agent!
66
72
 
73
+ global_config = SemanticRouter.configuration || Configuration.new
74
+
75
+ @logger = logger || global_config.logger
76
+ @max_retries = max_retries || global_config.max_retries
77
+ @retry_base_delay = retry_base_delay || global_config.retry_base_delay
78
+
79
+ # Set up embedding cache if TTL is configured
80
+ ttl = cache_ttl || global_config.cache_ttl
81
+ @embedding_cache = ttl ? EmbeddingCache.new(ttl: ttl) : nil
82
+
67
83
  @config = build_config(
68
84
  embedding_model: embedding_model,
69
85
  similarity_threshold: similarity_threshold,
70
86
  k_neighbors: k_neighbors,
71
- fallback: fallback
87
+ fallback: fallback,
88
+ max_words: max_words
72
89
  )
73
90
 
74
91
  @chat = nil
75
92
  @last_routing_decision = nil
93
+
94
+ log(:debug, "Router initialized with agents: #{@agents.keys.join(', ')}")
76
95
  end
77
96
 
78
97
  # Send a message to the router and get a response
@@ -81,10 +100,19 @@ module RubyLLM
81
100
  # @yield [chunk] Optional block for streaming responses
82
101
  # @return [RubyLLM::Message] The response from the selected agent
83
102
  def ask(message, &block)
103
+ log(:debug, "Routing message: #{message[0..100]}...")
104
+
84
105
  @last_routing_decision = route(message)
85
106
 
107
+ log(:info, "Routed to :#{@last_routing_decision.agent} " \
108
+ "(confidence: #{@last_routing_decision.confidence.round(3)}, " \
109
+ "reason: #{@last_routing_decision.reason})")
110
+
86
111
  target_agent = @last_routing_decision.agent
87
- switch_to(target_agent) if target_agent != @current_agent
112
+ if target_agent != @current_agent
113
+ log(:debug, "Switching from :#{@current_agent} to :#{target_agent}")
114
+ switch_to(target_agent)
115
+ end
88
116
 
89
117
  if @last_routing_decision.needs_clarification?
90
118
  inject_clarification_prompt
@@ -93,6 +121,36 @@ module RubyLLM
93
121
  current_chat.ask(message, &block)
94
122
  end
95
123
 
124
+ # Route multiple messages and return their routing decisions
125
+ # Useful for batch analysis or pre-routing without conversation
126
+ #
127
+ # @param messages [Array<String>] Messages to route
128
+ # @return [Array<RoutingDecision>] Routing decisions for each message
129
+ def ask_batch(messages)
130
+ log(:debug, "Batch routing #{messages.size} messages")
131
+
132
+ # Generate embeddings for all messages at once
133
+ truncated = messages.map { |m| truncate_to_max_words(m) }
134
+ embeddings = generate_embeddings_batch_with_retry(truncated)
135
+
136
+ # Route each message using its pre-computed embedding
137
+ messages.each_with_index.map do |message, i|
138
+ decision = @strategy.route(
139
+ message,
140
+ agents: @agents,
141
+ examples: scoped_examples,
142
+ current_agent: @current_agent,
143
+ config: @config,
144
+ find_examples: @find_examples,
145
+ precomputed_embedding: embeddings[i]
146
+ )
147
+
148
+ log(:debug, "Batch[#{i}] -> :#{decision.agent} (confidence: #{decision.confidence.round(3)})")
149
+ emit(:on_route, decision)
150
+ decision
151
+ end
152
+ end
153
+
96
154
  # Add a routing example
97
155
  #
98
156
  # @param text [String] Example user message
@@ -284,40 +342,83 @@ module RubyLLM
284
342
  end
285
343
 
286
344
  def generate_embedding(text)
287
- response = RubyLLM.embed(text, model: @config.embedding_model)
288
- vectors = response.vectors
289
- # RubyLLM returns the vector directly for single inputs,
290
- # or wrapped in an array for batch inputs
291
- vectors.first.is_a?(Array) ? vectors.first : vectors
292
- rescue StandardError => e
293
- raise EmbeddingError, e
345
+ truncated = truncate_to_max_words(text)
346
+
347
+ # Check cache first
348
+ if @embedding_cache
349
+ cached = @embedding_cache.get(truncated)
350
+ if cached
351
+ log(:debug, "Cache hit for embedding")
352
+ return cached
353
+ end
354
+ end
355
+
356
+ embedding = generate_embedding_with_retry(truncated)
357
+
358
+ # Store in cache
359
+ @embedding_cache&.set(truncated, embedding)
360
+
361
+ embedding
362
+ end
363
+
364
+ def generate_embedding_with_retry(text)
365
+ attempts = 0
366
+ begin
367
+ attempts += 1
368
+ response = RubyLLM.embed(text, model: @config.embedding_model)
369
+ vectors = response.vectors
370
+ # RubyLLM returns the vector directly for single inputs,
371
+ # or wrapped in an array for batch inputs
372
+ vectors.first.is_a?(Array) ? vectors.first : vectors
373
+ rescue StandardError => e
374
+ if attempts <= @max_retries
375
+ delay = @retry_base_delay * (2**(attempts - 1))
376
+ log(:warn, "Embedding failed (attempt #{attempts}/#{@max_retries + 1}), retrying in #{delay}s: #{e.message}")
377
+ sleep(delay)
378
+ retry
379
+ end
380
+ log(:error, "Embedding failed after #{attempts} attempts: #{e.message}")
381
+ raise EmbeddingError, e
382
+ end
294
383
  end
295
384
 
296
385
  def generate_embeddings_batch(texts)
297
- response = RubyLLM.embed(texts, model: @config.embedding_model)
298
- vectors = response.vectors
299
- # For batch, RubyLLM returns array of vectors
300
- # But if single text was passed, it returns vector directly
301
- vectors.first.is_a?(Array) ? vectors : [vectors]
302
- rescue StandardError => e
303
- raise EmbeddingError, e
386
+ truncated_texts = texts.map { |t| truncate_to_max_words(t) }
387
+ generate_embeddings_batch_with_retry(truncated_texts)
388
+ end
389
+
390
+ def generate_embeddings_batch_with_retry(truncated_texts)
391
+ attempts = 0
392
+ begin
393
+ attempts += 1
394
+ response = RubyLLM.embed(truncated_texts, model: @config.embedding_model)
395
+ vectors = response.vectors
396
+ # For batch, RubyLLM returns array of vectors
397
+ # But if single text was passed, it returns vector directly
398
+ vectors.first.is_a?(Array) ? vectors : [vectors]
399
+ rescue StandardError => e
400
+ if attempts <= @max_retries
401
+ delay = @retry_base_delay * (2**(attempts - 1))
402
+ log(:warn, "Batch embedding failed (attempt #{attempts}/#{@max_retries + 1}), retrying in #{delay}s: #{e.message}")
403
+ sleep(delay)
404
+ retry
405
+ end
406
+ log(:error, "Batch embedding failed after #{attempts} attempts: #{e.message}")
407
+ raise EmbeddingError, e
408
+ end
409
+ end
410
+
411
+ def truncate_to_max_words(text)
412
+ Utils.truncate_to_max_words(text, @config.max_words)
304
413
  end
305
414
 
306
415
  def find_nearest_in_memory(examples, query_embedding, k)
307
416
  examples.map do |example|
308
- distance = cosine_distance(query_embedding, example.embedding)
417
+ distance = Utils.cosine_distance(query_embedding, example.embedding)
309
418
  Strategies::Semantic::InMemoryMatch.new(example, distance)
310
419
  end.sort_by(&:distance).first(k)
311
420
  end
312
421
 
313
- def cosine_distance(a, b)
314
- dot_product = a.zip(b).sum { |x, y| x * y }
315
- magnitude_a = Math.sqrt(a.sum { |x| x**2 })
316
- magnitude_b = Math.sqrt(b.sum { |x| x**2 })
317
- return 1.0 if magnitude_a.zero? || magnitude_b.zero?
318
- 1.0 - (dot_product / (magnitude_a * magnitude_b))
319
- end
320
-
321
422
  def extract_agent_name(match)
322
423
  match.respond_to?(:agent_name) ? match.agent_name : match.example&.agent_name
323
424
  end
@@ -440,23 +541,68 @@ module RubyLLM
440
541
  raise AgentNotFoundError.new(agent_name, @agents.keys)
441
542
  end
442
543
 
443
- def build_config(embedding_model:, similarity_threshold:, k_neighbors:, fallback:)
544
+ def build_config(embedding_model:, similarity_threshold:, k_neighbors:, fallback:, max_words:)
444
545
  global_config = SemanticRouter.configuration || Configuration.new
445
546
 
547
+ # Use provided values or fall back to global config
548
+ threshold = similarity_threshold || global_config.default_similarity_threshold
549
+ neighbors = k_neighbors || global_config.default_k_neighbors
550
+ words = max_words || global_config.default_max_words
551
+ fb = fallback || global_config.default_fallback
552
+
553
+ # Validate router-specific overrides
554
+ validate_config_values!(
555
+ similarity_threshold: threshold,
556
+ k_neighbors: neighbors,
557
+ max_words: words,
558
+ fallback: fb
559
+ )
560
+
446
561
  RouterConfig.new(
447
562
  embedding_model: embedding_model || global_config.default_embedding_model,
448
- similarity_threshold: similarity_threshold || global_config.default_similarity_threshold,
449
- k_neighbors: k_neighbors || global_config.default_k_neighbors,
450
- fallback: fallback || global_config.default_fallback,
563
+ similarity_threshold: threshold,
564
+ k_neighbors: neighbors,
565
+ fallback: fb,
451
566
  default_agent: @default_agent,
452
- scope: @scope
567
+ scope: @scope,
568
+ max_words: words
453
569
  )
454
570
  end
455
571
 
572
+ def validate_config_values!(similarity_threshold:, k_neighbors:, max_words:, fallback:)
573
+ unless similarity_threshold.is_a?(Numeric) && similarity_threshold >= 0.0 && similarity_threshold <= 1.0
574
+ raise ConfigurationError, "similarity_threshold must be between 0.0 and 1.0, got: #{similarity_threshold.inspect}"
575
+ end
576
+
577
+ unless k_neighbors.is_a?(Integer) && k_neighbors.positive?
578
+ raise ConfigurationError, "k_neighbors must be a positive integer, got: #{k_neighbors.inspect}"
579
+ end
580
+
581
+ unless max_words.nil? || (max_words.is_a?(Integer) && max_words.positive?)
582
+ raise ConfigurationError, "max_words must be nil or a positive integer, got: #{max_words.inspect}"
583
+ end
584
+
585
+ valid_fallbacks = %i[default_agent keep_current ask_clarification]
586
+ unless valid_fallbacks.include?(fallback)
587
+ raise ConfigurationError, "fallback must be one of #{valid_fallbacks.join(', ')}, got: #{fallback.inspect}"
588
+ end
589
+ end
590
+
456
591
  def emit(event, *args)
457
592
  @callbacks ||= {}
458
593
  @callbacks[event]&.call(*args)
459
594
  end
595
+
596
+ def log(level, message)
597
+ return unless @logger
598
+
599
+ case level
600
+ when :debug then @logger.debug("[SemanticRouter] #{message}")
601
+ when :info then @logger.info("[SemanticRouter] #{message}")
602
+ when :warn then @logger.warn("[SemanticRouter] #{message}")
603
+ when :error then @logger.error("[SemanticRouter] #{message}")
604
+ end
605
+ end
460
606
  end
461
607
  end
462
608
  end
@@ -11,7 +11,7 @@ module RubyLLM
11
11
  # 3. Routes to the agent associated with the best match
12
12
  # 4. Falls back if confidence is below threshold
13
13
  class Semantic < Base
14
- def route(message, agents:, examples:, current_agent:, config:, find_examples: nil)
14
+ def route(message, agents:, examples:, current_agent:, config:, find_examples: nil, precomputed_embedding: nil)
15
15
  # If custom find_examples provided, use it
16
16
  # Otherwise, check if we have examples to search
17
17
  has_search = find_examples.respond_to?(:call) ||
@@ -25,8 +25,8 @@ module RubyLLM
25
25
  )
26
26
  end
27
27
 
28
- # Generate embedding for the message
29
- embedding = generate_embedding(message, config.embedding_model)
28
+ # Use precomputed embedding if provided (for batch operations), otherwise generate
29
+ embedding = precomputed_embedding || generate_embedding(message, config.embedding_model, max_words: config.max_words)
30
30
 
31
31
  # Find nearest neighbors using custom search or built-in
32
32
  matches = if find_examples.respond_to?(:call)
@@ -78,8 +78,9 @@ module RubyLLM
78
78
  end
79
79
  end
80
80
 
81
- def generate_embedding(message, model)
82
- response = RubyLLM.embed(message, model: model)
81
+ def generate_embedding(message, model, max_words: nil)
82
+ truncated = truncate_to_max_words(message, max_words)
83
+ response = RubyLLM.embed(truncated, model: model)
83
84
  vectors = response.vectors
84
85
  # RubyLLM returns vector directly for single input, array of vectors for batch
85
86
  vectors.first.is_a?(Array) ? vectors.first : vectors
@@ -87,6 +88,10 @@ module RubyLLM
87
88
  raise EmbeddingError, e
88
89
  end
89
90
 
91
+ def truncate_to_max_words(text, max_words)
92
+ Utils.truncate_to_max_words(text, max_words)
93
+ end
94
+
90
95
  def find_nearest_neighbors(examples, embedding, config)
91
96
  # Support both ActiveRecord (with neighbor gem) and in-memory arrays
92
97
  if examples.respond_to?(:nearest_neighbors)
@@ -125,14 +130,7 @@ module RubyLLM
125
130
  end
126
131
 
127
132
  def cosine_distance(a, b)
128
- # Cosine distance = 1 - cosine similarity
129
- dot_product = a.zip(b).sum { |x, y| x * y }
130
- magnitude_a = Math.sqrt(a.sum { |x| x**2 })
131
- magnitude_b = Math.sqrt(b.sum { |x| x**2 })
132
-
133
- return 1.0 if magnitude_a.zero? || magnitude_b.zero?
134
-
135
- 1.0 - (dot_product / (magnitude_a * magnitude_b))
133
+ Utils.cosine_distance(a, b)
136
134
  end
137
135
 
138
136
  def calculate_confidence(match)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticRouter
5
+ # Shared utility methods for semantic routing operations
6
+ module Utils
7
+ module_function
8
+
9
+ # Calculate cosine distance between two vectors
10
+ # Cosine distance = 1 - cosine similarity
11
+ # Returns value in range [0, 2] where 0 = identical, 2 = opposite
12
+ #
13
+ # @param a [Array<Numeric>] First vector
14
+ # @param b [Array<Numeric>] Second vector
15
+ # @return [Float] Cosine distance
16
+ def cosine_distance(a, b)
17
+ dot_product = a.zip(b).sum { |x, y| x * y }
18
+ magnitude_a = Math.sqrt(a.sum { |x| x**2 })
19
+ magnitude_b = Math.sqrt(b.sum { |x| x**2 })
20
+
21
+ return 1.0 if magnitude_a.zero? || magnitude_b.zero?
22
+
23
+ 1.0 - (dot_product / (magnitude_a * magnitude_b))
24
+ end
25
+
26
+ # Calculate cosine similarity between two vectors
27
+ # Returns value in range [-1, 1] where 1 = identical, -1 = opposite
28
+ #
29
+ # @param a [Array<Numeric>] First vector
30
+ # @param b [Array<Numeric>] Second vector
31
+ # @return [Float] Cosine similarity
32
+ def cosine_similarity(a, b)
33
+ 1.0 - cosine_distance(a, b)
34
+ end
35
+
36
+ # Truncate text to a maximum number of words
37
+ #
38
+ # @param text [String] Text to truncate
39
+ # @param max_words [Integer, nil] Maximum words (nil = no truncation)
40
+ # @return [String] Truncated text
41
+ def truncate_to_max_words(text, max_words)
42
+ return text unless max_words
43
+
44
+ words = text.split
45
+ return text if words.size <= max_words
46
+
47
+ words.first(max_words).join(" ")
48
+ end
49
+ end
50
+ end
51
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module RubyLLM
4
4
  module SemanticRouter
5
- VERSION = "0.1.0"
5
+ VERSION = "0.4.0"
6
6
  end
7
7
  end
@@ -14,7 +14,9 @@ end
14
14
 
15
15
  require_relative "semantic_router/version"
16
16
  require_relative "semantic_router/errors"
17
+ require_relative "semantic_router/utils"
17
18
  require_relative "semantic_router/configuration"
19
+ require_relative "semantic_router/embedding_cache"
18
20
  require_relative "semantic_router/routing_decision"
19
21
  require_relative "semantic_router/strategies/base"
20
22
  require_relative "semantic_router/strategies/semantic"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyllm-semantic_router
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Hasiński
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-31 00:00:00.000000000 Z
11
+ date: 2026-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby_llm
@@ -76,6 +76,9 @@ extra_rdoc_files: []
76
76
  files:
77
77
  - ".gitignore"
78
78
  - ".rspec"
79
+ - ARCHITECTURE.md
80
+ - CHANGELOG.md
81
+ - CONTRIBUTING.md
79
82
  - Gemfile
80
83
  - Gemfile.lock
81
84
  - LICENSE.txt
@@ -85,11 +88,13 @@ files:
85
88
  - bin/setup
86
89
  - lib/rubyllm/semantic_router.rb
87
90
  - lib/rubyllm/semantic_router/configuration.rb
91
+ - lib/rubyllm/semantic_router/embedding_cache.rb
88
92
  - lib/rubyllm/semantic_router/errors.rb
89
93
  - lib/rubyllm/semantic_router/router.rb
90
94
  - lib/rubyllm/semantic_router/routing_decision.rb
91
95
  - lib/rubyllm/semantic_router/strategies/base.rb
92
96
  - lib/rubyllm/semantic_router/strategies/semantic.rb
97
+ - lib/rubyllm/semantic_router/utils.rb
93
98
  - lib/rubyllm/semantic_router/version.rb
94
99
  - mise.toml
95
100
  - rubyllm-semantic_router.gemspec