rubyllm-semantic_router 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5b01b92cdb4fa6f65278ec83440543f95642cf85026ea142cb2564c326c0512c
4
+ data.tar.gz: c1670b2f6379c451e711950f8d50982ec01671a1070ade3805f4fab5b557108a
5
+ SHA512:
6
+ metadata.gz: f06a9a3b105253c45af254f3fdb98346ed28af13a73c36abd0f6eb57709de9b47cdec4a8dd24d214abac004da547124b4468e98b8e6b9d574c0e81583b978489
7
+ data.tar.gz: 9e092a880b2c627850e50ce2f376fba33bd88ae7de824ab1bf50e32d88f416f74128bd9920a456bbbf394f26f6eba0ac74d2bdd8a43019d7bdd7060e41ad43a4
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ # Bundle vendor
14
+ vendor/
15
+
16
+ # IDE
17
+ .idea/
18
+ *.swp
19
+
20
+ # RSpec
21
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,68 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ rubyllm-semantic_router (0.1.0)
5
+ ruby_llm (~> 1.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ base64 (0.3.0)
11
+ diff-lcs (1.6.2)
12
+ event_stream_parser (1.0.0)
13
+ faraday (2.14.0)
14
+ faraday-net_http (>= 2.0, < 3.5)
15
+ json
16
+ logger
17
+ faraday-multipart (1.2.0)
18
+ multipart-post (~> 2.0)
19
+ faraday-net_http (3.4.2)
20
+ net-http (~> 0.5)
21
+ faraday-retry (2.4.0)
22
+ faraday (~> 2.0)
23
+ json (2.18.0)
24
+ logger (1.7.0)
25
+ marcel (1.1.0)
26
+ multipart-post (2.4.1)
27
+ net-http (0.9.1)
28
+ uri (>= 0.11.1)
29
+ rake (13.3.1)
30
+ rspec (3.13.2)
31
+ rspec-core (~> 3.13.0)
32
+ rspec-expectations (~> 3.13.0)
33
+ rspec-mocks (~> 3.13.0)
34
+ rspec-core (3.13.6)
35
+ rspec-support (~> 3.13.0)
36
+ rspec-expectations (3.13.5)
37
+ diff-lcs (>= 1.2.0, < 2.0)
38
+ rspec-support (~> 3.13.0)
39
+ rspec-mocks (3.13.7)
40
+ diff-lcs (>= 1.2.0, < 2.0)
41
+ rspec-support (~> 3.13.0)
42
+ rspec-support (3.13.6)
43
+ ruby_llm (1.9.1)
44
+ base64
45
+ event_stream_parser (~> 1)
46
+ faraday (>= 1.10.0)
47
+ faraday-multipart (>= 1)
48
+ faraday-net_http (>= 1)
49
+ faraday-retry (>= 1)
50
+ marcel (~> 1.0)
51
+ ruby_llm-schema (~> 0.2.1)
52
+ zeitwerk (~> 2)
53
+ ruby_llm-schema (0.2.5)
54
+ uri (1.1.1)
55
+ zeitwerk (2.7.4)
56
+
57
+ PLATFORMS
58
+ arm64-darwin-25
59
+ ruby
60
+
61
+ DEPENDENCIES
62
+ bundler (~> 2.0)
63
+ rake (~> 13.0)
64
+ rspec (~> 3.0)
65
+ rubyllm-semantic_router!
66
+
67
+ BUNDLED WITH
68
+ 2.5.22
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Chris Hasiński
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,262 @@
1
+ # RubyLLM Semantic Router
2
+
3
+ Route user messages to specialized LLM agents based on semantic similarity. Think of it as a fast, embedding-based classifier that decides which expert should handle each message.
4
+
5
+ ## The Problem
6
+
7
+ You have multiple specialized chat agents:
8
+ - A **product expert** that knows your catalog
9
+ - An **account manager** that handles billing and settings
10
+ - A **support agent** that troubleshoots issues
11
+
12
+ How do you decide which one handles "I can't log in" vs "What's your return policy" vs "Show me laptops under $1000"?
13
+
14
+ This gem provides fast, embedding-based routing - no LLM call needed for the routing decision itself.
15
+
16
+ ## How It Works
17
+
18
+ ```
19
+ User: "What's your cheapest laptop?"
20
+
21
+
22
+ ┌─────────────────┐
23
+ │ Embed message │ ← ~2ms, $0.00001
24
+ └────────┬────────┘
25
+
26
+
27
+ ┌─────────────────┐
28
+ │ Find similar │ ← Compare to your examples
29
+ │ examples (kNN) │ "Show me computers" → product
30
+ └────────┬────────┘ "Reset password" → account
31
+
32
+
33
+ ┌─────────────────┐
34
+ │ Route to │ ← Product agent handles it
35
+ │ Product Agent │
36
+ └─────────────────┘
37
+ ```
38
+
39
+ **Key insight**: The routing decision is just an embedding + kNN lookup. No LLM call needed. Fast and cheap.
40
+
41
+ ## Quick Start
42
+
43
+ ```ruby
44
+ require 'rubyllm/semantic_router'
45
+
46
+ # 1. Define your agents as regular RubyLLM chat objects
47
+ product_chat = RubyLLM.chat(model: "gpt-4o-mini")
48
+ .with_instructions("You're a product expert. Help users find products.")
49
+
50
+ support_chat = RubyLLM.chat(model: "gpt-4o")
51
+ .with_instructions("You're technical support. Troubleshoot issues.")
52
+ .with_tools(DiagnosticTool, TicketCreator)
53
+
54
+ # 2. Create router with your agents
55
+ router = RubyLLM::SemanticRouter.new(
56
+ agents: {
57
+ product: product_chat,
58
+ support: support_chat
59
+ },
60
+ default_agent: :product # Fallback when uncertain
61
+ )
62
+
63
+ # 3. Train with examples (the more, the better)
64
+ router.import_examples([
65
+ { text: "Show me laptops", agent: :product },
66
+ { text: "Compare these two phones", agent: :product },
67
+ { text: "What's on sale?", agent: :product },
68
+ { text: "I can't log in", agent: :support },
69
+ { text: "App keeps crashing", agent: :support },
70
+ { text: "Error message when I checkout", agent: :support },
71
+ ])
72
+
73
+ # 4. Chat! Routing happens automatically.
74
+ router.ask("What gaming laptops do you have?") # → product agent
75
+ router.ask("My order is stuck") # → support agent
76
+ ```
77
+
78
+ ## When To Use This
79
+
80
+ **Good fit:**
81
+ - High-volume customer service with 3+ clearly separated domains
82
+ - Different models per task (cheap for FAQ, expensive for reasoning)
83
+ - Compliance requirements that need audit trails per agent
84
+ - Tool sets that would confuse a single LLM if combined
85
+
86
+ **Probably overkill:**
87
+ - Small apps with <1000 daily users
88
+ - Overlapping domains where context matters more than classification
89
+ - No training examples available
90
+
91
+ ## API
92
+
93
+ ### Defining Agents
94
+
95
+ Agents are just RubyLLM chat objects - use the same API you already know:
96
+
97
+ ```ruby
98
+ my_agent = RubyLLM.chat(model: "claude-sonnet-4")
99
+ .with_instructions("You're a specialist...")
100
+ .with_tools(Tool1, Tool2)
101
+ .with_temperature(0.7)
102
+ ```
103
+
104
+ ### Router Options
105
+
106
+ ```ruby
107
+ router = RubyLLM::SemanticRouter.new(
108
+ agents: {
109
+ product: product_chat,
110
+ support: support_chat
111
+ },
112
+ default_agent: :product,
113
+
114
+ # When confidence is below threshold, what to do?
115
+ fallback: :default_agent, # Use default (default)
116
+ # fallback: :keep_current, # Stay with current agent
117
+ # fallback: :ask_clarification # Ask user to rephrase
118
+
119
+ similarity_threshold: 0.7, # 0.0-1.0, higher = stricter
120
+ embedding_model: "text-embedding-3-small"
121
+ )
122
+ ```
123
+
124
+ ### Training
125
+
126
+ ```ruby
127
+ # One at a time
128
+ router.add_example("Cancel my subscription", agent: :billing)
129
+
130
+ # Batch import (faster - single embedding API call)
131
+ router.import_examples([
132
+ { text: "...", agent: :billing },
133
+ { text: "...", agent: :support },
134
+ ])
135
+ ```
136
+
137
+ ### Debugging
138
+
139
+ ```ruby
140
+ # Preview routing without sending message
141
+ decision = router.match("test message")
142
+ decision.agent # => :product
143
+ decision.confidence # => 0.85
144
+
145
+ # See all matches and scores
146
+ router.debug_routing("test message")
147
+ # => {
148
+ # message: "test message",
149
+ # threshold: 0.7,
150
+ # would_route_to: :product,
151
+ # top_matches: [
152
+ # { agent: :product, example: "show products", confidence: 0.85 },
153
+ # { agent: :support, example: "help me", confidence: 0.42 }
154
+ # ]
155
+ # }
156
+ ```
157
+
158
+ ### Conversation Flow
159
+
160
+ ```ruby
161
+ router.ask("Show me phones") # Routes to :product
162
+ router.current_agent # => :product
163
+
164
+ router.ask("Actually, I need help") # Routes to :support
165
+ router.current_agent # => :support
166
+
167
+ # Full history is preserved across agent switches
168
+ router.messages.size # => 4 (2 exchanges)
169
+ ```
170
+
171
+ ### ActiveRecord + pgvector
172
+
173
+ Use the [neighbor](https://github.com/ankane/neighbor) gem for PostgreSQL:
174
+
175
+ ```ruby
176
+ # Migration
177
+ create_table :routing_examples do |t|
178
+ t.string :agent_name, null: false
179
+ t.text :example_text, null: false
180
+ t.vector :embedding, limit: 1536 # text-embedding-3-small dimensions
181
+ end
182
+
183
+ # Model
184
+ class RoutingExample < ApplicationRecord
185
+ has_neighbors :embedding
186
+ end
187
+
188
+ # Usage
189
+ router = RubyLLM::SemanticRouter.new(
190
+ agents: { product: product_chat, support: support_chat },
191
+ default_agent: :product
192
+ )
193
+ router.with_examples(RoutingExample.all)
194
+
195
+ # Scoped for multi-tenant
196
+ router.with_examples(RoutingExample.where(tenant_id: current_tenant.id))
197
+ ```
198
+
199
+ ### Custom Vector Search
200
+
201
+ Bring your own vector database (Pinecone, Qdrant, OpenSearch, etc.):
202
+
203
+ ```ruby
204
+ router = RubyLLM::SemanticRouter.new(
205
+ agents: { product: product_chat, support: support_chat },
206
+ default_agent: :product,
207
+ find_examples: ->(embedding, limit:) {
208
+ # Pinecone
209
+ Pinecone.index("examples").query(vector: embedding, top_k: limit).matches.map do |m|
210
+ { agent_name: m.metadata[:agent], text: m.metadata[:text], score: m.score }
211
+ end
212
+ }
213
+ )
214
+
215
+ # Or with OpenSearch/Searchkick
216
+ router = RubyLLM::SemanticRouter.new(
217
+ agents: { ... },
218
+ default_agent: :product,
219
+ find_examples: ->(embedding, limit:) {
220
+ RoutingExample.search("*",
221
+ knn: { field: :embedding, vector: embedding, k: limit }
222
+ ).map { |r| { agent_name: r.agent_name, text: r.text, distance: r.distance } }
223
+ }
224
+ )
225
+ ```
226
+
227
+ Return an array of hashes/objects with:
228
+ - `agent_name` (or `agent`) - which agent this example routes to
229
+ - `text` or `example_text` - the example text (optional, for debugging)
230
+ - `distance` (lower is better) or `score` (higher is better)
231
+
232
+ ## How Agents Share Context
233
+
234
+ When the router switches agents, the new agent sees the **full conversation history** but with its own system prompt. This means:
235
+
236
+ 1. Agent A responds with context
237
+ 2. User asks something in Agent B's domain
238
+ 3. Router switches to Agent B
239
+ 4. Agent B sees the full chat, responds with its own expertise
240
+
241
+ The conversation flows naturally. Users don't notice the switch.
242
+
243
+ ## Caveats
244
+
245
+ 1. **You need training examples.** At least 5-10 per agent, more is better.
246
+
247
+ 2. **Embeddings aren't magic.** "I want to return this" and "What's your return policy" are different intents. Train for both.
248
+
249
+ 3. **Threshold tuning matters.** Start with 0.7, use `debug_routing` to see scores, adjust.
250
+
251
+ 4. **Tool cycles are atomic.** If Agent A calls a tool, it keeps control until done. No mid-tool handoffs.
252
+
253
+ ## Development
254
+
255
+ ```bash
256
+ bundle install
257
+ bundle exec rspec
258
+ ```
259
+
260
+ ## License
261
+
262
+ MIT
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "rubyllm/semantic_router"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticRouter
5
+ # Global configuration for the semantic router
6
+ class Configuration
7
+ # Default embedding model to use when not specified per-router
8
+ attr_accessor :default_embedding_model
9
+
10
+ # Default similarity threshold (0.0 - 1.0)
11
+ attr_accessor :default_similarity_threshold
12
+
13
+ # Default number of neighbors to consider for routing
14
+ attr_accessor :default_k_neighbors
15
+
16
+ # Default fallback behavior (:default_agent, :keep_current, :ask_clarification)
17
+ attr_accessor :default_fallback
18
+
19
+ def initialize
20
+ @default_embedding_model = "text-embedding-3-small"
21
+ @default_similarity_threshold = 0.7
22
+ @default_k_neighbors = 3
23
+ @default_fallback = :default_agent
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module SemanticRouter
5
+ # Base error class for all semantic router errors
6
+ class Error < StandardError; end
7
+
8
+ # Raised when an agent name is not found in the router
9
+ class AgentNotFoundError < Error
10
+ def initialize(agent_name, available_agents)
11
+ super("Agent '#{agent_name}' not found. Available agents: #{available_agents.join(', ')}")
12
+ end
13
+ end
14
+
15
+ # Raised when no default agent is configured
16
+ class NoDefaultAgentError < Error
17
+ def initialize
18
+ super("No default agent configured. Set default_agent when creating the router.")
19
+ end
20
+ end
21
+
22
+ # Raised when no agents are configured
23
+ class NoAgentsError < Error
24
+ def initialize
25
+ super("No agents configured. Add at least one agent to the router.")
26
+ end
27
+ end
28
+
29
+ # Raised when routing examples are required but none exist
30
+ class NoRoutingExamplesError < Error
31
+ def initialize
32
+ super("No routing examples found. Add examples using router.add_example or configure a fallback.")
33
+ end
34
+ end
35
+
36
+ # Raised when embedding generation fails
37
+ class EmbeddingError < Error
38
+ def initialize(original_error)
39
+ super("Failed to generate embedding: #{original_error.message}")
40
+ end
41
+ end
42
+
43
+ # Raised when an invalid fallback behavior is specified
44
+ class InvalidFallbackError < Error
45
+ VALID_BEHAVIORS = %i[default_agent keep_current ask_clarification].freeze
46
+
47
+ def initialize(behavior)
48
+ super("Invalid fallback behavior '#{behavior}'. Valid options: #{VALID_BEHAVIORS.join(', ')}")
49
+ end
50
+ end
51
+
52
+ # Raised when agent definition is incomplete
53
+ class InvalidAgentError < Error
54
+ def initialize(message)
55
+ super("Invalid agent definition: #{message}")
56
+ end
57
+ end
58
+ end
59
+ end