rubyllm-semantic_router 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/ARCHITECTURE.md +329 -0
- data/CHANGELOG.md +98 -0
- data/CONTRIBUTING.md +103 -0
- data/Gemfile.lock +10 -10
- data/README.md +136 -179
- data/lib/rubyllm/semantic_router/configuration.rb +101 -5
- data/lib/rubyllm/semantic_router/embedding_cache.rb +74 -0
- data/lib/rubyllm/semantic_router/errors.rb +7 -0
- data/lib/rubyllm/semantic_router/router.rb +178 -32
- data/lib/rubyllm/semantic_router/strategies/semantic.rb +11 -13
- data/lib/rubyllm/semantic_router/utils.rb +51 -0
- data/lib/rubyllm/semantic_router/version.rb +1 -1
- data/lib/rubyllm/semantic_router.rb +2 -0
- metadata +7 -2
data/README.md
CHANGED
|
@@ -1,261 +1,218 @@
|
|
|
1
1
|
# RubyLLM Semantic Router
|
|
2
2
|
|
|
3
|
-
Route user messages to specialized LLM agents based on semantic similarity.
|
|
3
|
+
Route user messages to specialized LLM agents based on semantic similarity.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Installation
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
-
|
|
9
|
-
- An **account manager** that handles billing and settings
|
|
10
|
-
- A **support agent** that troubleshoots issues
|
|
11
|
-
|
|
12
|
-
How do you decide which one handles "I can't log in" vs "What's your return policy" vs "Show me laptops under $1000"?
|
|
13
|
-
|
|
14
|
-
This gem provides fast, embedding-based routing - no LLM call needed for the routing decision itself.
|
|
15
|
-
|
|
16
|
-
## How It Works
|
|
17
|
-
|
|
18
|
-
```
|
|
19
|
-
User: "What's your cheapest laptop?"
|
|
20
|
-
│
|
|
21
|
-
▼
|
|
22
|
-
┌─────────────────┐
|
|
23
|
-
│ Embed message │ ← ~2ms, $0.00001
|
|
24
|
-
└────────┬────────┘
|
|
25
|
-
│
|
|
26
|
-
▼
|
|
27
|
-
┌─────────────────┐
|
|
28
|
-
│ Find similar │ ← Compare to your examples
|
|
29
|
-
│ examples (kNN) │ "Show me computers" → product
|
|
30
|
-
└────────┬────────┘ "Reset password" → account
|
|
31
|
-
│
|
|
32
|
-
▼
|
|
33
|
-
┌─────────────────┐
|
|
34
|
-
│ Route to │ ← Product agent handles it
|
|
35
|
-
│ Product Agent │
|
|
36
|
-
└─────────────────┘
|
|
7
|
+
```ruby
|
|
8
|
+
gem 'rubyllm-semantic_router'
|
|
37
9
|
```
|
|
38
10
|
|
|
39
|
-
**Key insight**: The routing decision is just an embedding + kNN lookup. No LLM call needed. Fast and cheap.
|
|
40
|
-
|
|
41
11
|
## Quick Start
|
|
42
12
|
|
|
43
13
|
```ruby
|
|
44
14
|
require 'rubyllm/semantic_router'
|
|
45
15
|
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
16
|
+
# Create agents as RubyLLM chat objects
|
|
17
|
+
product = RubyLLM.chat(model: "gpt-4o-mini")
|
|
18
|
+
.with_instructions("You're a product expert.")
|
|
49
19
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
.with_tools(DiagnosticTool, TicketCreator)
|
|
20
|
+
support = RubyLLM.chat(model: "gpt-4o")
|
|
21
|
+
.with_instructions("You're technical support.")
|
|
53
22
|
|
|
54
|
-
#
|
|
23
|
+
# Create router
|
|
55
24
|
router = RubyLLM::SemanticRouter.new(
|
|
56
|
-
agents: {
|
|
57
|
-
|
|
58
|
-
support: support_chat
|
|
59
|
-
},
|
|
60
|
-
default_agent: :product # Fallback when uncertain
|
|
25
|
+
agents: { product: product, support: support },
|
|
26
|
+
default_agent: :product
|
|
61
27
|
)
|
|
62
28
|
|
|
63
|
-
#
|
|
29
|
+
# Add training examples
|
|
64
30
|
router.import_examples([
|
|
65
31
|
{ text: "Show me laptops", agent: :product },
|
|
66
|
-
{ text: "Compare these two phones", agent: :product },
|
|
67
|
-
{ text: "What's on sale?", agent: :product },
|
|
68
32
|
{ text: "I can't log in", agent: :support },
|
|
69
|
-
{ text: "App keeps crashing", agent: :support },
|
|
70
|
-
{ text: "Error message when I checkout", agent: :support },
|
|
71
33
|
])
|
|
72
34
|
|
|
73
|
-
#
|
|
74
|
-
router.ask("What gaming laptops do you have?") # → product
|
|
75
|
-
router.ask("My order is stuck") # → support
|
|
35
|
+
# Chat - routing happens automatically
|
|
36
|
+
router.ask("What gaming laptops do you have?") # → product
|
|
37
|
+
router.ask("My order is stuck") # → support
|
|
76
38
|
```
|
|
77
39
|
|
|
78
|
-
##
|
|
40
|
+
## How It Works
|
|
79
41
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
42
|
+
1. User sends a message
|
|
43
|
+
2. Router embeds the message (~2ms, ~$0.00001)
|
|
44
|
+
3. Finds similar examples using kNN
|
|
45
|
+
4. Routes to the matching agent
|
|
46
|
+
5. Agent responds with full conversation history
|
|
85
47
|
|
|
86
|
-
|
|
87
|
-
- Small apps with <1000 daily users
|
|
88
|
-
- Overlapping domains where context matters more than classification
|
|
89
|
-
- No training examples available
|
|
48
|
+
No LLM call needed for routing - just embeddings.
|
|
90
49
|
|
|
91
|
-
##
|
|
50
|
+
## Options
|
|
92
51
|
|
|
93
|
-
|
|
52
|
+
```ruby
|
|
53
|
+
router = RubyLLM::SemanticRouter.new(
|
|
54
|
+
agents: { ... },
|
|
55
|
+
default_agent: :product,
|
|
56
|
+
similarity_threshold: 0.3, # Route only if confidence > threshold
|
|
57
|
+
fallback: :default_agent, # :default_agent | :keep_current | :ask_clarification
|
|
58
|
+
embedding_model: "text-embedding-3-small",
|
|
59
|
+
max_words: 50, # Truncate messages to first N words (default: unlimited)
|
|
60
|
+
logger: Rails.logger, # Enable debug logging (default: nil)
|
|
61
|
+
cache_ttl: 300, # Cache embeddings for 5 minutes (default: nil)
|
|
62
|
+
max_retries: 3, # Retry failed embedding calls (default: 3)
|
|
63
|
+
retry_base_delay: 0.5 # Base delay for exponential backoff (default: 0.5s)
|
|
64
|
+
)
|
|
65
|
+
```
|
|
94
66
|
|
|
95
|
-
|
|
67
|
+
## Debugging
|
|
96
68
|
|
|
97
69
|
```ruby
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
70
|
+
# Preview without sending
|
|
71
|
+
decision = router.match("test message")
|
|
72
|
+
decision.agent # => :product
|
|
73
|
+
decision.confidence # => 0.85
|
|
74
|
+
|
|
75
|
+
# Detailed routing info
|
|
76
|
+
router.debug_routing("test message")
|
|
102
77
|
```
|
|
103
78
|
|
|
104
|
-
|
|
79
|
+
## Batch Routing
|
|
80
|
+
|
|
81
|
+
Route multiple messages efficiently with a single embedding API call:
|
|
105
82
|
|
|
106
83
|
```ruby
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
default_agent: :product,
|
|
84
|
+
messages = [
|
|
85
|
+
"Show me products",
|
|
86
|
+
"I need help with my account",
|
|
87
|
+
"What's your return policy?"
|
|
88
|
+
]
|
|
113
89
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
# fallback: :keep_current, # Stay with current agent
|
|
117
|
-
# fallback: :ask_clarification # Ask user to rephrase
|
|
90
|
+
decisions = router.ask_batch(messages)
|
|
91
|
+
# => [RoutingDecision, RoutingDecision, RoutingDecision]
|
|
118
92
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
93
|
+
decisions.each do |decision|
|
|
94
|
+
puts "#{decision.agent}: confidence #{decision.confidence}"
|
|
95
|
+
end
|
|
122
96
|
```
|
|
123
97
|
|
|
124
|
-
|
|
98
|
+
## Error Handling
|
|
99
|
+
|
|
100
|
+
### Configuration Validation
|
|
101
|
+
|
|
102
|
+
All configuration values are validated. Invalid values raise `ConfigurationError`:
|
|
125
103
|
|
|
126
104
|
```ruby
|
|
127
|
-
#
|
|
128
|
-
router.
|
|
105
|
+
# These will raise ConfigurationError:
|
|
106
|
+
router = RubyLLM::SemanticRouter.new(
|
|
107
|
+
agents: agents,
|
|
108
|
+
default_agent: :product,
|
|
109
|
+
similarity_threshold: 1.5 # Must be 0.0-1.0
|
|
110
|
+
)
|
|
129
111
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
{ text: "...", agent: :support },
|
|
134
|
-
])
|
|
112
|
+
RubyLLM::SemanticRouter.configure do |config|
|
|
113
|
+
config.default_k_neighbors = 0 # Must be positive integer
|
|
114
|
+
end
|
|
135
115
|
```
|
|
136
116
|
|
|
137
|
-
|
|
117
|
+
**Validation rules:**
|
|
118
|
+
- `similarity_threshold`: Must be between 0.0 and 1.0
|
|
119
|
+
- `k_neighbors`: Must be a positive integer
|
|
120
|
+
- `max_words`: Must be `nil` or a positive integer
|
|
121
|
+
- `fallback`: Must be `:default_agent`, `:keep_current`, or `:ask_clarification`
|
|
122
|
+
- `cache_ttl`: Must be `nil` or a positive number
|
|
123
|
+
- `max_retries`: Must be a non-negative integer
|
|
138
124
|
|
|
139
|
-
|
|
140
|
-
# Preview routing without sending message
|
|
141
|
-
decision = router.match("test message")
|
|
142
|
-
decision.agent # => :product
|
|
143
|
-
decision.confidence # => 0.85
|
|
125
|
+
### Embedding Errors
|
|
144
126
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
# { agent: :support, example: "help me", confidence: 0.42 }
|
|
154
|
-
# ]
|
|
155
|
-
# }
|
|
127
|
+
Failed embedding API calls raise `EmbeddingError` after exhausting retries:
|
|
128
|
+
|
|
129
|
+
```ruby
|
|
130
|
+
begin
|
|
131
|
+
router.ask("Hello")
|
|
132
|
+
rescue RubyLLM::SemanticRouter::EmbeddingError => e
|
|
133
|
+
puts "Embedding failed: #{e.message}"
|
|
134
|
+
end
|
|
156
135
|
```
|
|
157
136
|
|
|
158
|
-
|
|
137
|
+
## Global Configuration
|
|
138
|
+
|
|
139
|
+
Set defaults for all routers:
|
|
159
140
|
|
|
160
141
|
```ruby
|
|
161
|
-
|
|
162
|
-
|
|
142
|
+
RubyLLM::SemanticRouter.configure do |config|
|
|
143
|
+
config.default_embedding_model = "text-embedding-3-small"
|
|
144
|
+
config.default_similarity_threshold = 0.3
|
|
145
|
+
config.default_k_neighbors = 3
|
|
146
|
+
config.default_fallback = :default_agent
|
|
147
|
+
config.default_max_words = nil
|
|
148
|
+
config.logger = Rails.logger
|
|
149
|
+
config.cache_ttl = 300 # 5 minute cache
|
|
150
|
+
config.max_retries = 3
|
|
151
|
+
config.retry_base_delay = 0.5
|
|
152
|
+
end
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Storage Options
|
|
163
156
|
|
|
164
|
-
|
|
165
|
-
router.current_agent # => :support
|
|
157
|
+
### In-Memory (default)
|
|
166
158
|
|
|
167
|
-
|
|
168
|
-
router.
|
|
159
|
+
```ruby
|
|
160
|
+
router.add_example("Show products", agent: :product)
|
|
161
|
+
router.import_examples([...])
|
|
169
162
|
```
|
|
170
163
|
|
|
171
|
-
### ActiveRecord +
|
|
164
|
+
### ActiveRecord + neighbor gem
|
|
172
165
|
|
|
173
|
-
|
|
166
|
+
Works with PostgreSQL (pgvector), SQLite (sqlite-vec), MySQL (vector), and [more](https://github.com/ankane/neighbor):
|
|
174
167
|
|
|
175
168
|
```ruby
|
|
176
|
-
# Migration
|
|
177
|
-
create_table :routing_examples do |t|
|
|
178
|
-
t.string :agent_name, null: false
|
|
179
|
-
t.text :example_text, null: false
|
|
180
|
-
t.vector :embedding, limit: 1536 # text-embedding-3-small dimensions
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
# Model
|
|
184
169
|
class RoutingExample < ApplicationRecord
|
|
185
170
|
has_neighbors :embedding
|
|
186
171
|
end
|
|
187
172
|
|
|
188
|
-
# Usage
|
|
189
|
-
router = RubyLLM::SemanticRouter.new(
|
|
190
|
-
agents: { product: product_chat, support: support_chat },
|
|
191
|
-
default_agent: :product
|
|
192
|
-
)
|
|
193
173
|
router.with_examples(RoutingExample.all)
|
|
194
|
-
|
|
195
|
-
# Scoped for multi-tenant
|
|
196
174
|
router.with_examples(RoutingExample.where(tenant_id: current_tenant.id))
|
|
197
175
|
```
|
|
198
176
|
|
|
199
|
-
###
|
|
177
|
+
### Multi-tenant Scoping
|
|
200
178
|
|
|
201
|
-
|
|
179
|
+
For multi-tenant applications, use the `scope` parameter to isolate routing examples:
|
|
202
180
|
|
|
203
181
|
```ruby
|
|
182
|
+
# Create scoped router
|
|
204
183
|
router = RubyLLM::SemanticRouter.new(
|
|
205
|
-
agents: { product:
|
|
184
|
+
agents: { product: product, support: support },
|
|
206
185
|
default_agent: :product,
|
|
207
|
-
|
|
208
|
-
# Pinecone
|
|
209
|
-
Pinecone.index("examples").query(vector: embedding, top_k: limit).matches.map do |m|
|
|
210
|
-
{ agent_name: m.metadata[:agent], text: m.metadata[:text], score: m.score }
|
|
211
|
-
end
|
|
212
|
-
}
|
|
186
|
+
scope: "tenant_123"
|
|
213
187
|
)
|
|
214
188
|
|
|
215
|
-
#
|
|
189
|
+
# With ActiveRecord, add a router_scope column to your model
|
|
190
|
+
class RoutingExample < ApplicationRecord
|
|
191
|
+
has_neighbors :embedding
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Examples are automatically filtered by scope
|
|
195
|
+
router.with_examples(RoutingExample.all) # Only queries where router_scope = "tenant_123"
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
For in-memory examples, the router filters examples that respond to `router_scope` and match the configured scope.
|
|
199
|
+
|
|
200
|
+
### Custom Vector Database
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
216
203
|
router = RubyLLM::SemanticRouter.new(
|
|
217
204
|
agents: { ... },
|
|
218
205
|
default_agent: :product,
|
|
219
206
|
find_examples: ->(embedding, limit:) {
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
207
|
+
# Pinecone, Qdrant, OpenSearch, etc.
|
|
208
|
+
YourVectorDB.search(embedding, limit: limit).map do |result|
|
|
209
|
+
{ agent_name: result.agent, score: result.score }
|
|
210
|
+
end
|
|
223
211
|
}
|
|
224
212
|
)
|
|
225
213
|
```
|
|
226
214
|
|
|
227
|
-
Return
|
|
228
|
-
- `agent_name` (or `agent`) - which agent this example routes to
|
|
229
|
-
- `text` or `example_text` - the example text (optional, for debugging)
|
|
230
|
-
- `distance` (lower is better) or `score` (higher is better)
|
|
231
|
-
|
|
232
|
-
## How Agents Share Context
|
|
233
|
-
|
|
234
|
-
When the router switches agents, the new agent sees the **full conversation history** but with its own system prompt. This means:
|
|
235
|
-
|
|
236
|
-
1. Agent A responds with context
|
|
237
|
-
2. User asks something in Agent B's domain
|
|
238
|
-
3. Router switches to Agent B
|
|
239
|
-
4. Agent B sees the full chat, responds with its own expertise
|
|
240
|
-
|
|
241
|
-
The conversation flows naturally. Users don't notice the switch.
|
|
242
|
-
|
|
243
|
-
## Caveats
|
|
244
|
-
|
|
245
|
-
1. **You need training examples.** At least 5-10 per agent, more is better.
|
|
246
|
-
|
|
247
|
-
2. **Embeddings aren't magic.** "I want to return this" and "What's your return policy" are different intents. Train for both.
|
|
248
|
-
|
|
249
|
-
3. **Threshold tuning matters.** Start with 0.7, use `debug_routing` to see scores, adjust.
|
|
250
|
-
|
|
251
|
-
4. **Tool cycles are atomic.** If Agent A calls a tool, it keeps control until done. No mid-tool handoffs.
|
|
252
|
-
|
|
253
|
-
## Development
|
|
254
|
-
|
|
255
|
-
```bash
|
|
256
|
-
bundle install
|
|
257
|
-
bundle exec rspec
|
|
258
|
-
```
|
|
215
|
+
Return hashes with `agent_name`, and either `distance` (lower=better) or `score` (higher=better).
|
|
259
216
|
|
|
260
217
|
## License
|
|
261
218
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "logger"
|
|
4
|
+
|
|
3
5
|
module RubyLLM
|
|
4
6
|
module SemanticRouter
|
|
5
7
|
# Global configuration for the semantic router
|
|
@@ -7,20 +9,114 @@ module RubyLLM
|
|
|
7
9
|
# Default embedding model to use when not specified per-router
|
|
8
10
|
attr_accessor :default_embedding_model
|
|
9
11
|
|
|
12
|
+
# Logger instance for debug output (nil = no logging)
|
|
13
|
+
attr_accessor :logger
|
|
14
|
+
|
|
15
|
+
# Embedding cache TTL in seconds (nil = no caching)
|
|
16
|
+
attr_reader :cache_ttl
|
|
17
|
+
|
|
18
|
+
# Maximum retry attempts for embedding API failures
|
|
19
|
+
attr_reader :max_retries
|
|
20
|
+
|
|
21
|
+
# Base delay in seconds for exponential backoff
|
|
22
|
+
attr_reader :retry_base_delay
|
|
23
|
+
|
|
24
|
+
# Default fallback behavior (:default_agent, :keep_current, :ask_clarification)
|
|
25
|
+
attr_reader :default_fallback
|
|
26
|
+
|
|
10
27
|
# Default similarity threshold (0.0 - 1.0)
|
|
11
|
-
|
|
28
|
+
attr_reader :default_similarity_threshold
|
|
12
29
|
|
|
13
30
|
# Default number of neighbors to consider for routing
|
|
14
|
-
|
|
31
|
+
attr_reader :default_k_neighbors
|
|
15
32
|
|
|
16
|
-
# Default
|
|
17
|
-
|
|
33
|
+
# Default max words to use for embedding (nil = unlimited)
|
|
34
|
+
attr_reader :default_max_words
|
|
35
|
+
|
|
36
|
+
VALID_FALLBACKS = %i[default_agent keep_current ask_clarification].freeze
|
|
18
37
|
|
|
19
38
|
def initialize
|
|
20
39
|
@default_embedding_model = "text-embedding-3-small"
|
|
21
|
-
@default_similarity_threshold = 0.
|
|
40
|
+
@default_similarity_threshold = 0.3
|
|
22
41
|
@default_k_neighbors = 3
|
|
23
42
|
@default_fallback = :default_agent
|
|
43
|
+
@default_max_words = nil
|
|
44
|
+
@logger = nil
|
|
45
|
+
@cache_ttl = nil
|
|
46
|
+
@max_retries = 3
|
|
47
|
+
@retry_base_delay = 0.5
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def cache_ttl=(value)
|
|
51
|
+
return @cache_ttl = nil if value.nil?
|
|
52
|
+
unless value.is_a?(Numeric) && value.positive?
|
|
53
|
+
raise ConfigurationError, "cache_ttl must be nil or a positive number, got: #{value.inspect}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
@cache_ttl = value
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def max_retries=(value)
|
|
60
|
+
unless value.is_a?(Integer) && value >= 0
|
|
61
|
+
raise ConfigurationError, "max_retries must be a non-negative integer, got: #{value.inspect}"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
@max_retries = value
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def retry_base_delay=(value)
|
|
68
|
+
unless value.is_a?(Numeric) && value.positive?
|
|
69
|
+
raise ConfigurationError, "retry_base_delay must be a positive number, got: #{value.inspect}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
@retry_base_delay = value
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def default_similarity_threshold=(value)
|
|
76
|
+
validate_similarity_threshold!(value)
|
|
77
|
+
@default_similarity_threshold = value
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def default_k_neighbors=(value)
|
|
81
|
+
validate_k_neighbors!(value)
|
|
82
|
+
@default_k_neighbors = value
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def default_max_words=(value)
|
|
86
|
+
validate_max_words!(value)
|
|
87
|
+
@default_max_words = value
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def default_fallback=(value)
|
|
91
|
+
validate_fallback!(value)
|
|
92
|
+
@default_fallback = value
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def validate_similarity_threshold!(value)
|
|
98
|
+
return if value.is_a?(Numeric) && value >= 0.0 && value <= 1.0
|
|
99
|
+
|
|
100
|
+
raise ConfigurationError, "similarity_threshold must be a number between 0.0 and 1.0, got: #{value.inspect}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def validate_k_neighbors!(value)
|
|
104
|
+
return if value.is_a?(Integer) && value.positive?
|
|
105
|
+
|
|
106
|
+
raise ConfigurationError, "k_neighbors must be a positive integer, got: #{value.inspect}"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def validate_max_words!(value)
|
|
110
|
+
return if value.nil?
|
|
111
|
+
return if value.is_a?(Integer) && value.positive?
|
|
112
|
+
|
|
113
|
+
raise ConfigurationError, "max_words must be nil or a positive integer, got: #{value.inspect}"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def validate_fallback!(value)
|
|
117
|
+
return if VALID_FALLBACKS.include?(value)
|
|
118
|
+
|
|
119
|
+
raise ConfigurationError, "fallback must be one of #{VALID_FALLBACKS.join(', ')}, got: #{value.inspect}"
|
|
24
120
|
end
|
|
25
121
|
end
|
|
26
122
|
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module SemanticRouter
|
|
5
|
+
# Simple in-memory cache for embeddings with TTL support
|
|
6
|
+
class EmbeddingCache
|
|
7
|
+
CacheEntry = Struct.new(:embedding, :expires_at, keyword_init: true)
|
|
8
|
+
|
|
9
|
+
def initialize(ttl:)
|
|
10
|
+
@ttl = ttl
|
|
11
|
+
@cache = {}
|
|
12
|
+
@mutex = Mutex.new
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Get embedding from cache
|
|
16
|
+
# @param key [String] Cache key (typically the text that was embedded)
|
|
17
|
+
# @return [Array, nil] Cached embedding or nil if not found/expired
|
|
18
|
+
def get(key)
|
|
19
|
+
@mutex.synchronize do
|
|
20
|
+
entry = @cache[key]
|
|
21
|
+
return nil unless entry
|
|
22
|
+
return nil if entry.expires_at < Time.now
|
|
23
|
+
|
|
24
|
+
entry.embedding
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Store embedding in cache
|
|
29
|
+
# @param key [String] Cache key
|
|
30
|
+
# @param embedding [Array] The embedding vector to cache
|
|
31
|
+
def set(key, embedding)
|
|
32
|
+
@mutex.synchronize do
|
|
33
|
+
@cache[key] = CacheEntry.new(
|
|
34
|
+
embedding: embedding,
|
|
35
|
+
expires_at: Time.now + @ttl
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get embedding from cache or compute it
|
|
41
|
+
# @param key [String] Cache key
|
|
42
|
+
# @yield Block that computes the embedding if not cached
|
|
43
|
+
# @return [Array] The embedding
|
|
44
|
+
def fetch(key)
|
|
45
|
+
cached = get(key)
|
|
46
|
+
return cached if cached
|
|
47
|
+
|
|
48
|
+
embedding = yield
|
|
49
|
+
set(key, embedding)
|
|
50
|
+
embedding
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Remove expired entries from cache
|
|
54
|
+
def cleanup!
|
|
55
|
+
@mutex.synchronize do
|
|
56
|
+
now = Time.now
|
|
57
|
+
@cache.delete_if { |_, entry| entry.expires_at < now }
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Clear all cached entries
|
|
62
|
+
def clear!
|
|
63
|
+
@mutex.synchronize do
|
|
64
|
+
@cache.clear
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Number of entries in cache
|
|
69
|
+
def size
|
|
70
|
+
@mutex.synchronize { @cache.size }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -55,5 +55,12 @@ module RubyLLM
|
|
|
55
55
|
super("Invalid agent definition: #{message}")
|
|
56
56
|
end
|
|
57
57
|
end
|
|
58
|
+
|
|
59
|
+
# Raised when configuration value is invalid
|
|
60
|
+
class ConfigurationError < Error
|
|
61
|
+
def initialize(message)
|
|
62
|
+
super("Invalid configuration: #{message}")
|
|
63
|
+
end
|
|
64
|
+
end
|
|
58
65
|
end
|
|
59
66
|
end
|