legion-llm 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/CLAUDE.md +16 -16
- data/lib/legion/llm/conversation_store.rb +182 -0
- data/lib/legion/llm/errors.rb +43 -0
- data/lib/legion/llm/pipeline/audit_publisher.rb +60 -0
- data/lib/legion/llm/pipeline/enrichment_injector.rb +31 -0
- data/lib/legion/llm/pipeline/executor.rb +136 -19
- data/lib/legion/llm/pipeline/gaia_caller.rb +58 -0
- data/lib/legion/llm/pipeline/steps/gaia_advisory.rb +64 -0
- data/lib/legion/llm/pipeline/steps/mcp_discovery.rb +59 -0
- data/lib/legion/llm/pipeline/steps/post_response.rb +59 -0
- data/lib/legion/llm/pipeline/steps/rag_context.rb +85 -0
- data/lib/legion/llm/pipeline/steps/rag_guard.rb +37 -0
- data/lib/legion/llm/pipeline/steps/tool_calls.rb +63 -0
- data/lib/legion/llm/pipeline/steps.rb +18 -0
- data/lib/legion/llm/pipeline/tool_dispatcher.rb +81 -0
- data/lib/legion/llm/pipeline.rb +5 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +18 -53
- metadata +14 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2337a94da9139d5ba6bb4ddbc4ee3d11a7caab4b5502e85c1fe08c5822d9bd7c
|
|
4
|
+
data.tar.gz: bab4d942d0845165167fbc15bed8bd07b7e52b3475f6b328b49bb6a2fde0d9a5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c685d7db577f7be3ea567c7fd79eb09faaa4f9f6907ddd86c460e3c640a5573e0c15658fe7eb9b89500383a00b0d65e2a99aac7e1a454d1f2caa3877d835f261
|
|
7
|
+
data.tar.gz: 9439839a44b57f05e45b310ba6b1bcbb49a115f959292a6c31a2943d22f11584d2cd092963a9f79a625e25794cf6bb21506e0709da64a0e4d8f11413878497a1
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.1] - 2026-03-23
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Typed error hierarchy (`AuthError`, `RateLimitError`, `ContextOverflow`, `ProviderError`, `ProviderDown`, `UnsupportedCapability`, `PipelineError`) with `retryable?` predicate
|
|
7
|
+
- `ConversationStore` with in-memory LRU hot layer (256 conversations) and optional DB persistence via Sequel
|
|
8
|
+
- Streaming pipeline support via `Executor#call_stream` — pre/post steps run normally, chunks yielded to caller
|
|
9
|
+
- Pipeline steps `context_load` (Step 3) and `context_store` (Step 15) now functional
|
|
10
|
+
- `Pipeline::Steps::McpDiscovery` (step 9): discovers tools from all healthy MCP servers via `Legion::MCP::Client::Pool`
|
|
11
|
+
- `Pipeline::ToolDispatcher`: routes tool calls to MCP client, LEX extension runner, or RubyLLM builtin
|
|
12
|
+
- `Pipeline::Steps::ToolCalls` (step 14): dispatches non-builtin tool calls from LLM response via `ToolDispatcher`
|
|
13
|
+
- `pipeline/steps.rb` aggregator for all step modules
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Executor `step_provider_call` classifies Faraday errors into typed hierarchy
|
|
17
|
+
- `chat`, `embed`, and `structured` route directly without gateway delegation
|
|
18
|
+
- `_dispatch_embed` and `_dispatch_structured` removed; dispatch inlined
|
|
19
|
+
|
|
20
|
+
### Removed
|
|
21
|
+
- `lex-llm-gateway` auto-loading (`begin/rescue LoadError` block removed)
|
|
22
|
+
- `gateway_loaded?` and `gateway_chat` helper methods
|
|
23
|
+
- `_dispatch_embed` and `_dispatch_structured` indirection methods
|
|
24
|
+
|
|
3
25
|
## [0.4.0] - 2026-03-23
|
|
4
26
|
|
|
5
27
|
### Added
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.4.
|
|
11
|
+
**Version**: 0.4.1
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -33,6 +33,8 @@ Legion::LLM (lib/legion/llm.rb)
|
|
|
33
33
|
├── EscalationExhausted # Raised when all escalation attempts are exhausted
|
|
34
34
|
├── DaemonDeniedError # Raised when daemon returns HTTP 403
|
|
35
35
|
├── DaemonRateLimitedError # Raised when daemon returns HTTP 429
|
|
36
|
+
├── LLMError / AuthError / RateLimitError / ContextOverflow / ProviderError / ProviderDown / UnsupportedCapability / PipelineError # Typed error hierarchy with retryable?
|
|
37
|
+
├── ConversationStore # In-memory LRU (256 conversations) + optional DB persistence via Sequel
|
|
36
38
|
├── Settings # Default config, provider settings, routing defaults, discovery defaults
|
|
37
39
|
├── Providers # Provider configuration and Vault credential resolution (includes Azure `configure_azure`)
|
|
38
40
|
├── DaemonClient # HTTP routing to LegionIO daemon with 30s health cache
|
|
@@ -58,8 +60,9 @@ Legion::LLM (lib/legion/llm.rb)
|
|
|
58
60
|
│ ├── Tracing # Distributed trace_id, span_id, exchange_id generation
|
|
59
61
|
│ ├── Timeline # Ordered event recording with participant tracking
|
|
60
62
|
│ ├── Executor # 18-step pipeline skeleton with profile-aware execution
|
|
61
|
-
│
|
|
62
|
-
│
|
|
63
|
+
│ ├── Steps/
|
|
64
|
+
│ │ └── Metering # Metering event builder (absorbed from lex-llm-gateway)
|
|
65
|
+
│ └── Executor#call_stream # Streaming variant: pre-provider steps, yield chunks, post-provider steps
|
|
63
66
|
├── CostEstimator # Model cost estimation with fuzzy pricing (absorbed from lex-llm-gateway)
|
|
64
67
|
├── Fleet # Fleet RPC dispatch (absorbed from lex-llm-gateway)
|
|
65
68
|
│ ├── Dispatcher # Fleet dispatch with timeout and availability checks
|
|
@@ -107,16 +110,7 @@ Three-tier dispatch model. Local-first avoids unnecessary network hops; fleet of
|
|
|
107
110
|
|
|
108
111
|
### Gateway Integration (lex-llm-gateway)
|
|
109
112
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
```
|
|
113
|
-
Caller → Legion::LLM.chat(message:)
|
|
114
|
-
└─ gateway loaded? → Gateway::Runners::Inference.chat (meters, fleet dispatch)
|
|
115
|
-
└─ Legion::LLM.chat_direct (routing, escalation, RubyLLM)
|
|
116
|
-
└─ no gateway? → Legion::LLM.chat_direct (same path, no metering)
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
The `_direct` variants (`chat_direct`, `embed_direct`, `structured_direct`) bypass gateway delegation. The gateway's `call_llm` uses these to avoid infinite recursion.
|
|
113
|
+
Gateway delegation removed in v0.4.1. `chat`, `embed`, and `structured` route directly — no `begin/rescue LoadError` block, no `gateway_loaded?` check. The pipeline (when `pipeline_enabled: true`) handles metering and fleet dispatch natively. The `_direct` variants still exist as the canonical non-pipeline path for `chat_direct`, `embed_direct`, `structured_direct`.
|
|
120
114
|
|
|
121
115
|
### Integration with LegionIO
|
|
122
116
|
|
|
@@ -135,7 +129,7 @@ The `_direct` variants (`chat_direct`, `embed_direct`, `structured_direct`) bypa
|
|
|
135
129
|
| `tzinfo` (>= 2.0) | IANA timezone conversion for schedule windows |
|
|
136
130
|
| `legion-logging` | Logging |
|
|
137
131
|
| `legion-settings` | Configuration |
|
|
138
|
-
| `lex-llm-gateway` (
|
|
132
|
+
| `lex-llm-gateway` (removed) | No longer auto-loaded; pipeline handles metering and fleet dispatch natively |
|
|
139
133
|
|
|
140
134
|
## Key Interfaces
|
|
141
135
|
|
|
@@ -329,7 +323,9 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
329
323
|
| `lib/legion/llm/embeddings.rb` | Embeddings module: generate, generate_batch, default_model |
|
|
330
324
|
| `lib/legion/llm/shadow_eval.rb` | Shadow evaluation: enabled?, should_sample?, evaluate, compare |
|
|
331
325
|
| `lib/legion/llm/structured_output.rb` | JSON schema enforcement with native response_format and prompt fallback |
|
|
332
|
-
| `lib/legion/llm/
|
|
326
|
+
| `lib/legion/llm/errors.rb` | Typed error hierarchy: LLMError base + AuthError, RateLimitError, ContextOverflow, ProviderError, ProviderDown, UnsupportedCapability, PipelineError |
|
|
327
|
+
| `lib/legion/llm/conversation_store.rb` | ConversationStore: in-memory LRU (256 slots) + optional Sequel DB persistence + spool fallback |
|
|
328
|
+
| `lib/legion/llm/version.rb` | Version constant (0.4.1) |
|
|
333
329
|
| `lib/legion/llm/quality_checker.rb` | QualityChecker module with QualityResult struct |
|
|
334
330
|
| `lib/legion/llm/escalation_history.rb` | EscalationHistory mixin: `escalation_history`, `escalated?`, `final_resolution`, `escalation_chain` |
|
|
335
331
|
| `lib/legion/llm/router/escalation_chain.rb` | EscalationChain value object |
|
|
@@ -373,7 +369,11 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
373
369
|
| `spec/legion/llm/embeddings_spec.rb` | Embeddings tests |
|
|
374
370
|
| `spec/legion/llm/shadow_eval_spec.rb` | ShadowEval tests |
|
|
375
371
|
| `spec/legion/llm/structured_output_spec.rb` | StructuredOutput tests |
|
|
376
|
-
| `spec/legion/llm/
|
|
372
|
+
| `spec/legion/llm/errors_spec.rb` | Tests: typed error hierarchy, retryable? predicate |
|
|
373
|
+
| `spec/legion/llm/conversation_store_spec.rb` | Tests: LRU eviction, append, messages, DB fallback |
|
|
374
|
+
| `spec/legion/llm/pipeline/executor_stream_spec.rb` | Tests: call_stream chunk yielding, pre/post steps |
|
|
375
|
+
| `spec/legion/llm/pipeline/streaming_integration_spec.rb` | Tests: streaming end-to-end with ConversationStore |
|
|
376
|
+
| `spec/legion/llm/gateway_integration_spec.rb` | Tests: gateway teardown — verifies no delegation |
|
|
377
377
|
| `spec/legion/llm/cost_estimator_spec.rb` | Tests: cost estimation, fuzzy matching, pricing table |
|
|
378
378
|
| `spec/legion/llm/pipeline/request_spec.rb` | Tests: Request struct builder, legacy adapter |
|
|
379
379
|
| `spec/legion/llm/pipeline/response_spec.rb` | Tests: Response struct builder, RubyLLM adapter, #with |
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module ConversationStore
|
|
6
|
+
MAX_CONVERSATIONS = 256
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
def append(conversation_id, role:, content:, **metadata)
|
|
10
|
+
ensure_conversation(conversation_id)
|
|
11
|
+
seq = next_seq(conversation_id)
|
|
12
|
+
msg = { seq: seq, role: role, content: content, created_at: Time.now, **metadata }
|
|
13
|
+
conversations[conversation_id][:messages] << msg
|
|
14
|
+
touch(conversation_id)
|
|
15
|
+
persist_message(conversation_id, msg)
|
|
16
|
+
msg
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def messages(conversation_id)
|
|
20
|
+
if in_memory?(conversation_id)
|
|
21
|
+
touch(conversation_id)
|
|
22
|
+
conversations[conversation_id][:messages].sort_by { |m| m[:seq] }
|
|
23
|
+
else
|
|
24
|
+
load_from_db(conversation_id)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def create_conversation(conversation_id, **metadata)
|
|
29
|
+
conversations[conversation_id] = { messages: [], metadata: metadata, accessed_at: Time.now }
|
|
30
|
+
evict_if_needed
|
|
31
|
+
persist_conversation(conversation_id, metadata)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def conversation_exists?(conversation_id)
|
|
35
|
+
in_memory?(conversation_id) || db_conversation_exists?(conversation_id)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def in_memory?(conversation_id)
|
|
39
|
+
conversations.key?(conversation_id)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def reset!
|
|
43
|
+
@conversations = {}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def conversations
|
|
49
|
+
@conversations ||= {}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def ensure_conversation(conversation_id)
|
|
53
|
+
return if in_memory?(conversation_id)
|
|
54
|
+
|
|
55
|
+
create_conversation(conversation_id)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def next_seq(conversation_id)
|
|
59
|
+
msgs = conversations[conversation_id][:messages]
|
|
60
|
+
msgs.empty? ? 1 : msgs.last[:seq] + 1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def touch(conversation_id)
|
|
64
|
+
return unless in_memory?(conversation_id)
|
|
65
|
+
|
|
66
|
+
conversations[conversation_id][:accessed_at] = Time.now
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def evict_if_needed
|
|
70
|
+
return unless conversations.size > self::MAX_CONVERSATIONS
|
|
71
|
+
|
|
72
|
+
oldest_id = conversations.min_by { |_, v| v[:accessed_at] }&.first
|
|
73
|
+
conversations.delete(oldest_id) if oldest_id
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def persist_message(conversation_id, msg)
|
|
77
|
+
return unless db_available?
|
|
78
|
+
|
|
79
|
+
db_append_message(conversation_id, msg)
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
spool_message(conversation_id, msg)
|
|
82
|
+
Legion::Logging.warn("ConversationStore persist failed, spooled: #{e.message}") if defined?(Legion::Logging)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def persist_conversation(conversation_id, metadata)
|
|
86
|
+
return unless db_available?
|
|
87
|
+
|
|
88
|
+
db_create_conversation(conversation_id, metadata)
|
|
89
|
+
rescue StandardError => e
|
|
90
|
+
Legion::Logging.warn("ConversationStore conversation persist failed: #{e.message}") if defined?(Legion::Logging)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def load_from_db(conversation_id)
|
|
94
|
+
return [] unless db_available?
|
|
95
|
+
|
|
96
|
+
db_load_messages(conversation_id)
|
|
97
|
+
rescue StandardError
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def db_conversation_exists?(conversation_id)
|
|
102
|
+
return false unless db_available?
|
|
103
|
+
|
|
104
|
+
db_conversation_record?(conversation_id)
|
|
105
|
+
rescue StandardError
|
|
106
|
+
false
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def db_available?
|
|
110
|
+
defined?(Legion::Data) &&
|
|
111
|
+
Legion::Data.respond_to?(:connection) &&
|
|
112
|
+
Legion::Data.connection.respond_to?(:table_exists?) &&
|
|
113
|
+
Legion::Data.connection.table_exists?(:conversations)
|
|
114
|
+
rescue StandardError
|
|
115
|
+
false
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def db_create_conversation(conversation_id, metadata)
|
|
119
|
+
Legion::Data.connection[:conversations].insert_ignore.insert(
|
|
120
|
+
id: conversation_id,
|
|
121
|
+
caller_identity: metadata[:caller_identity],
|
|
122
|
+
metadata: metadata.to_json,
|
|
123
|
+
created_at: Time.now,
|
|
124
|
+
updated_at: Time.now
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def db_append_message(conversation_id, msg)
|
|
129
|
+
Legion::Data.connection[:conversation_messages].insert(
|
|
130
|
+
conversation_id: conversation_id,
|
|
131
|
+
seq: msg[:seq],
|
|
132
|
+
role: msg[:role].to_s,
|
|
133
|
+
content: msg[:content],
|
|
134
|
+
provider: msg[:provider]&.to_s,
|
|
135
|
+
model: msg[:model]&.to_s,
|
|
136
|
+
input_tokens: msg[:input_tokens],
|
|
137
|
+
output_tokens: msg[:output_tokens],
|
|
138
|
+
created_at: msg[:created_at] || Time.now
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def db_load_messages(conversation_id)
|
|
143
|
+
Legion::Data.connection[:conversation_messages]
|
|
144
|
+
.where(conversation_id: conversation_id)
|
|
145
|
+
.order(:seq)
|
|
146
|
+
.map { |row| symbolize_message(row) }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def db_conversation_record?(conversation_id)
|
|
150
|
+
Legion::Data.connection[:conversations].where(id: conversation_id).any?
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def symbolize_message(row)
|
|
154
|
+
{
|
|
155
|
+
seq: row[:seq],
|
|
156
|
+
role: row[:role]&.to_sym,
|
|
157
|
+
content: row[:content],
|
|
158
|
+
provider: row[:provider]&.to_sym,
|
|
159
|
+
model: row[:model],
|
|
160
|
+
input_tokens: row[:input_tokens],
|
|
161
|
+
output_tokens: row[:output_tokens],
|
|
162
|
+
created_at: row[:created_at]
|
|
163
|
+
}
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def spool_message(conversation_id, msg)
|
|
167
|
+
return unless defined?(Legion::Data::Spool)
|
|
168
|
+
|
|
169
|
+
dir = File.join(spool_root, 'conversations')
|
|
170
|
+
FileUtils.mkdir_p(dir)
|
|
171
|
+
filename = "#{Time.now.strftime('%s%9N')}-#{SecureRandom.uuid}.json"
|
|
172
|
+
payload = { conversation_id: conversation_id, message: msg }
|
|
173
|
+
File.write(File.join(dir, filename), payload.to_json)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def spool_root
|
|
177
|
+
@spool_root ||= File.expand_path('~/.legionio/data/spool/llm')
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
class LLMError < StandardError
|
|
6
|
+
def retryable? = false
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
class AuthError < LLMError; end
|
|
10
|
+
|
|
11
|
+
class RateLimitError < LLMError
|
|
12
|
+
attr_reader :retry_after
|
|
13
|
+
|
|
14
|
+
def initialize(msg = nil, retry_after: nil)
|
|
15
|
+
@retry_after = retry_after
|
|
16
|
+
super(msg)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def retryable? = true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class ContextOverflow < LLMError
|
|
23
|
+
def retryable? = true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class ProviderError < LLMError
|
|
27
|
+
def retryable? = true
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
class ProviderDown < LLMError; end
|
|
31
|
+
|
|
32
|
+
class UnsupportedCapability < LLMError; end
|
|
33
|
+
|
|
34
|
+
class PipelineError < LLMError
|
|
35
|
+
attr_reader :step
|
|
36
|
+
|
|
37
|
+
def initialize(msg = nil, step: nil)
|
|
38
|
+
@step = step
|
|
39
|
+
super(msg)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Pipeline
|
|
6
|
+
module AuditPublisher
|
|
7
|
+
EXCHANGE = 'llm.audit'
|
|
8
|
+
ROUTING_KEY = 'llm.audit.complete'
|
|
9
|
+
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def build_event(request:, response:)
|
|
13
|
+
{
|
|
14
|
+
request_id: response.request_id,
|
|
15
|
+
conversation_id: response.conversation_id,
|
|
16
|
+
caller: response.caller,
|
|
17
|
+
routing: response.routing,
|
|
18
|
+
tokens: response.tokens,
|
|
19
|
+
cost: response.cost,
|
|
20
|
+
enrichments: response.enrichments,
|
|
21
|
+
audit: response.audit,
|
|
22
|
+
timeline: response.timeline,
|
|
23
|
+
timestamps: response.timestamps,
|
|
24
|
+
classification: response.classification,
|
|
25
|
+
tracing: response.tracing,
|
|
26
|
+
messages: request.messages,
|
|
27
|
+
response_content: response.message[:content],
|
|
28
|
+
tools_used: response.tools,
|
|
29
|
+
timestamp: Time.now
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def publish(request:, response:)
|
|
34
|
+
event = build_event(request: request, response: response)
|
|
35
|
+
|
|
36
|
+
begin
|
|
37
|
+
if defined?(Legion::Transport) &&
|
|
38
|
+
defined?(Legion::Transport::Messages::Dynamic)
|
|
39
|
+
Legion::Transport::Messages::Dynamic.new(
|
|
40
|
+
function: 'llm_audit',
|
|
41
|
+
opts: event,
|
|
42
|
+
exchange: EXCHANGE,
|
|
43
|
+
routing_key: ROUTING_KEY
|
|
44
|
+
).publish
|
|
45
|
+
elsif defined?(Legion::Logging)
|
|
46
|
+
Legion::Logging.debug('audit publish skipped: transport unavailable')
|
|
47
|
+
end
|
|
48
|
+
rescue StandardError => e
|
|
49
|
+
Legion::Logging.warn("audit publish failed: #{e.message}") if defined?(Legion::Logging)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
event
|
|
53
|
+
rescue StandardError => e
|
|
54
|
+
Legion::Logging.warn("audit build_event failed: #{e.message}") if defined?(Legion::Logging)
|
|
55
|
+
nil
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Pipeline
|
|
6
|
+
module EnrichmentInjector
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def inject(system:, enrichments:)
|
|
10
|
+
parts = []
|
|
11
|
+
|
|
12
|
+
# GAIA system prompt (highest priority)
|
|
13
|
+
if (gaia = enrichments.dig('gaia:system_prompt', :content))
|
|
14
|
+
parts << gaia
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# RAG context
|
|
18
|
+
if (rag = enrichments.dig('rag:context_retrieval', :data, :entries))
|
|
19
|
+
context_text = rag.map { |e| "[#{e[:content_type]}] #{e[:content]}" }.join("\n")
|
|
20
|
+
parts << "Relevant context:\n#{context_text}" unless context_text.empty?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
return system if parts.empty?
|
|
24
|
+
|
|
25
|
+
parts << system if system
|
|
26
|
+
parts.join("\n\n")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -4,8 +4,15 @@ module Legion
|
|
|
4
4
|
module LLM
|
|
5
5
|
module Pipeline
|
|
6
6
|
class Executor
|
|
7
|
+
include Steps::GaiaAdvisory
|
|
8
|
+
include Steps::PostResponse
|
|
9
|
+
include Steps::RagContext
|
|
10
|
+
|
|
7
11
|
attr_reader :request, :profile, :timeline, :tracing, :enrichments,
|
|
8
|
-
:audit, :warnings
|
|
12
|
+
:audit, :warnings, :discovered_tools
|
|
13
|
+
|
|
14
|
+
include Steps::McpDiscovery
|
|
15
|
+
include Steps::ToolCalls
|
|
9
16
|
|
|
10
17
|
STEPS = %i[
|
|
11
18
|
tracing_init idempotency conversation_uuid context_load
|
|
@@ -14,6 +21,16 @@ module Legion
|
|
|
14
21
|
tool_calls context_store post_response response_return
|
|
15
22
|
].freeze
|
|
16
23
|
|
|
24
|
+
PRE_PROVIDER_STEPS = %i[
|
|
25
|
+
tracing_init idempotency conversation_uuid context_load
|
|
26
|
+
rbac classification billing gaia_advisory rag_context mcp_discovery
|
|
27
|
+
routing request_normalization
|
|
28
|
+
].freeze
|
|
29
|
+
|
|
30
|
+
POST_PROVIDER_STEPS = %i[
|
|
31
|
+
response_normalization tool_calls context_store post_response response_return
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
17
34
|
def initialize(request)
|
|
18
35
|
@request = request
|
|
19
36
|
@profile = Profile.derive(request.caller)
|
|
@@ -25,6 +42,7 @@ module Legion
|
|
|
25
42
|
@timestamps = { received: Time.now }
|
|
26
43
|
@raw_response = nil
|
|
27
44
|
@exchange_id = nil
|
|
45
|
+
@discovered_tools = []
|
|
28
46
|
@resolved_provider = nil
|
|
29
47
|
@resolved_model = nil
|
|
30
48
|
end
|
|
@@ -34,6 +52,15 @@ module Legion
|
|
|
34
52
|
build_response
|
|
35
53
|
end
|
|
36
54
|
|
|
55
|
+
def call_stream(&block)
|
|
56
|
+
return call unless block
|
|
57
|
+
|
|
58
|
+
execute_pre_provider_steps
|
|
59
|
+
step_provider_call_stream(&block)
|
|
60
|
+
execute_post_provider_steps
|
|
61
|
+
build_response
|
|
62
|
+
end
|
|
63
|
+
|
|
37
64
|
private
|
|
38
65
|
|
|
39
66
|
def execute_steps
|
|
@@ -57,7 +84,20 @@ module Legion
|
|
|
57
84
|
|
|
58
85
|
def step_conversation_uuid; end
|
|
59
86
|
|
|
60
|
-
def step_context_load
|
|
87
|
+
def step_context_load
|
|
88
|
+
conv_id = @request.conversation_id
|
|
89
|
+
return unless conv_id
|
|
90
|
+
|
|
91
|
+
history = ConversationStore.messages(conv_id)
|
|
92
|
+
return if history.empty?
|
|
93
|
+
|
|
94
|
+
@enrichments[:conversation_history] = history
|
|
95
|
+
@timeline.record(
|
|
96
|
+
category: :internal, key: 'context:loaded',
|
|
97
|
+
direction: :internal, detail: "loaded #{history.size} prior messages",
|
|
98
|
+
from: 'conversation_store', to: 'pipeline'
|
|
99
|
+
)
|
|
100
|
+
end
|
|
61
101
|
|
|
62
102
|
def step_rbac
|
|
63
103
|
@audit[:'rbac:permission_check'] = {
|
|
@@ -95,12 +135,6 @@ module Legion
|
|
|
95
135
|
)
|
|
96
136
|
end
|
|
97
137
|
|
|
98
|
-
def step_gaia_advisory; end
|
|
99
|
-
|
|
100
|
-
def step_rag_context; end
|
|
101
|
-
|
|
102
|
-
def step_mcp_discovery; end
|
|
103
|
-
|
|
104
138
|
def step_routing
|
|
105
139
|
@timestamps[:routing_start] = Time.now
|
|
106
140
|
provider = @request.routing[:provider]
|
|
@@ -158,10 +192,28 @@ module Legion
|
|
|
158
192
|
|
|
159
193
|
ToolRegistry.tools.each { |t| session.with_tool(t) } if defined?(ToolRegistry)
|
|
160
194
|
|
|
195
|
+
injected_system = EnrichmentInjector.inject(
|
|
196
|
+
system: @request.system,
|
|
197
|
+
enrichments: @enrichments
|
|
198
|
+
)
|
|
199
|
+
session.with_instructions(injected_system) if injected_system
|
|
200
|
+
|
|
161
201
|
message_content = @request.messages.last&.dig(:content)
|
|
162
202
|
@raw_response = message_content ? session.ask(message_content) : session
|
|
163
203
|
|
|
164
204
|
@timestamps[:provider_end] = Time.now
|
|
205
|
+
record_provider_response
|
|
206
|
+
rescue Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
207
|
+
raise Legion::LLM::AuthError, e.message
|
|
208
|
+
rescue Faraday::TooManyRequestsError => e
|
|
209
|
+
raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
|
|
210
|
+
rescue Faraday::ServerError => e
|
|
211
|
+
raise Legion::LLM::ProviderError, e.message
|
|
212
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
213
|
+
raise Legion::LLM::ProviderDown, e.message
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def record_provider_response
|
|
165
217
|
@timeline.record(
|
|
166
218
|
category: :provider, key: 'provider:response_received',
|
|
167
219
|
exchange_id: @exchange_id, direction: :inbound,
|
|
@@ -171,13 +223,86 @@ module Legion
|
|
|
171
223
|
)
|
|
172
224
|
end
|
|
173
225
|
|
|
226
|
+
def extract_retry_after(error)
|
|
227
|
+
return nil unless error.respond_to?(:response) && error.response.is_a?(Hash)
|
|
228
|
+
|
|
229
|
+
error.response[:headers]&.fetch('retry-after', nil)&.to_i
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def execute_pre_provider_steps
|
|
233
|
+
PRE_PROVIDER_STEPS.each do |step|
|
|
234
|
+
next if Profile.skip?(@profile, step)
|
|
235
|
+
|
|
236
|
+
send(:"step_#{step}")
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def execute_post_provider_steps
|
|
241
|
+
POST_PROVIDER_STEPS.each do |step|
|
|
242
|
+
next if Profile.skip?(@profile, step)
|
|
243
|
+
|
|
244
|
+
send(:"step_#{step}")
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def step_provider_call_stream(&)
|
|
249
|
+
@timestamps[:provider_start] = Time.now
|
|
250
|
+
@timeline.record(
|
|
251
|
+
category: :provider, key: 'provider:request_sent',
|
|
252
|
+
exchange_id: @exchange_id, direction: :outbound,
|
|
253
|
+
detail: "streaming from #{@resolved_provider}",
|
|
254
|
+
from: 'pipeline', to: "provider:#{@resolved_provider}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
opts = { model: @resolved_model, provider: @resolved_provider }.compact
|
|
258
|
+
session = RubyLLM.chat(**opts)
|
|
259
|
+
|
|
260
|
+
(@request.tools || []).each { |tool| session.with_tool(tool) if tool.is_a?(Class) }
|
|
261
|
+
ToolRegistry.tools.each { |t| session.with_tool(t) } if defined?(ToolRegistry)
|
|
262
|
+
|
|
263
|
+
message_content = @request.messages.last&.dig(:content)
|
|
264
|
+
@raw_response = session.ask(message_content, &)
|
|
265
|
+
|
|
266
|
+
@timestamps[:provider_end] = Time.now
|
|
267
|
+
record_provider_response
|
|
268
|
+
rescue Faraday::UnauthorizedError, Faraday::ForbiddenError => e
|
|
269
|
+
raise Legion::LLM::AuthError, e.message
|
|
270
|
+
rescue Faraday::TooManyRequestsError => e
|
|
271
|
+
raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
|
|
272
|
+
rescue Faraday::ServerError => e
|
|
273
|
+
raise Legion::LLM::ProviderError, e.message
|
|
274
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
275
|
+
raise Legion::LLM::ProviderDown, e.message
|
|
276
|
+
end
|
|
277
|
+
|
|
174
278
|
def step_response_normalization; end
|
|
175
279
|
|
|
176
|
-
def
|
|
280
|
+
def step_context_store
|
|
281
|
+
conv_id = @request.conversation_id
|
|
282
|
+
return unless conv_id
|
|
283
|
+
|
|
284
|
+
@request.messages.each do |msg|
|
|
285
|
+
ConversationStore.append(conv_id,
|
|
286
|
+
role: msg[:role]&.to_sym || :user,
|
|
287
|
+
content: msg[:content])
|
|
288
|
+
end
|
|
177
289
|
|
|
178
|
-
|
|
290
|
+
if @raw_response.respond_to?(:content) && @raw_response.content
|
|
291
|
+
ConversationStore.append(conv_id,
|
|
292
|
+
role: :assistant,
|
|
293
|
+
content: @raw_response.content,
|
|
294
|
+
provider: @resolved_provider,
|
|
295
|
+
model: @resolved_model,
|
|
296
|
+
input_tokens: @raw_response.respond_to?(:input_tokens) ? @raw_response.input_tokens : nil,
|
|
297
|
+
output_tokens: @raw_response.respond_to?(:output_tokens) ? @raw_response.output_tokens : nil)
|
|
298
|
+
end
|
|
179
299
|
|
|
180
|
-
|
|
300
|
+
@timeline.record(
|
|
301
|
+
category: :internal, key: 'context:stored',
|
|
302
|
+
direction: :internal, detail: "stored to #{conv_id}",
|
|
303
|
+
from: 'pipeline', to: 'conversation_store'
|
|
304
|
+
)
|
|
305
|
+
end
|
|
181
306
|
|
|
182
307
|
def step_response_return; end
|
|
183
308
|
|
|
@@ -212,14 +337,6 @@ module Legion
|
|
|
212
337
|
test: @request.test
|
|
213
338
|
)
|
|
214
339
|
end
|
|
215
|
-
|
|
216
|
-
def extract_tokens
|
|
217
|
-
return {} unless @raw_response.respond_to?(:input_tokens)
|
|
218
|
-
|
|
219
|
-
input = @raw_response.input_tokens.to_i
|
|
220
|
-
output = @raw_response.output_tokens.to_i
|
|
221
|
-
{ input: input, output: output, total: input + output }
|
|
222
|
-
end
|
|
223
340
|
end
|
|
224
341
|
end
|
|
225
342
|
end
|