legion-llm 0.6.23 → 0.6.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -0
- data/CLAUDE.md +56 -7
- data/lib/legion/llm/audit/exchange.rb +12 -0
- data/lib/legion/llm/audit/prompt_event.rb +56 -0
- data/lib/legion/llm/audit/tool_event.rb +46 -0
- data/lib/legion/llm/audit.rb +53 -0
- data/lib/legion/llm/conversation_store.rb +3 -1
- data/lib/legion/llm/fleet/dispatcher.rb +93 -22
- data/lib/legion/llm/fleet/error.rb +61 -0
- data/lib/legion/llm/fleet/exchange.rb +12 -0
- data/lib/legion/llm/fleet/reply_dispatcher.rb +40 -2
- data/lib/legion/llm/fleet/request.rb +30 -0
- data/lib/legion/llm/fleet/response.rb +49 -0
- data/lib/legion/llm/fleet.rb +9 -0
- data/lib/legion/llm/metering/event.rb +32 -0
- data/lib/legion/llm/metering/exchange.rb +12 -0
- data/lib/legion/llm/metering.rb +63 -0
- data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb +102 -0
- data/lib/legion/llm/pipeline/audit_publisher.rb +12 -17
- data/lib/legion/llm/pipeline/executor.rb +56 -4
- data/lib/legion/llm/pipeline/steps/metering.rb +3 -36
- data/lib/legion/llm/settings.rb +7 -1
- data/lib/legion/llm/transport/message.rb +82 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +3 -0
- metadata +14 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5e329176f3f041bc5cade11008d7a932325df80d510142da79f362ab5bd27e5a
|
|
4
|
+
data.tar.gz: 46f1e1a12ebd64bafdbe0e4d4f799df4addf586e5c89dfd7824cef4694582681
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 464cf343578d082092d57ed8a8091724a3508925088f795b1c1a1724250ec8876bb18ae32b8957b738df7439a31d1ce436275274997925f7cdc8ebafcdf99080
|
|
7
|
+
data.tar.gz: 0be8ab0bd23eed8cf0320aed8d422f9d1a63937e888ece646ead1c7615add65c8f769f6da250859c8f8e32013bb7daffd3310a311df9d279339902568e4151fb
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,51 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.6.25] - 2026-04-08
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM::Transport::Message` — LLM base message class with `message_context` propagation, LLM-specific headers (`x-legion-llm-provider`, `x-legion-llm-model`, `x-legion-llm-request-type`, `x-legion-llm-schema-version`), context header promotion, and `tracing_headers` stub for future OpenTelemetry integration
|
|
7
|
+
- `Legion::LLM::Fleet::Exchange` — declares `llm.request` topic exchange (source of truth for fleet routing)
|
|
8
|
+
- `Legion::LLM::Fleet::Request` — fleet inference request message with priority mapping, TTL-to-expiration conversion, and `req_` prefixed message IDs
|
|
9
|
+
- `Legion::LLM::Fleet::Response` — fleet inference response message with default-exchange publish override, Bunny error rescue, and `resp_` prefixed message IDs
|
|
10
|
+
- `Legion::LLM::Fleet::Error` — fleet error message with `ERROR_CODES` registry (12 codes), `x-legion-fleet-error` header, default-exchange publish override, and `err_` prefixed message IDs
|
|
11
|
+
- `Legion::LLM::Metering::Exchange` — declares `llm.metering` topic exchange
|
|
12
|
+
- `Legion::LLM::Metering::Event` — metering event message with tier header, `metering.<type>` routing keys, and `meter_` prefixed message IDs
|
|
13
|
+
- `Legion::LLM::Metering` module — `emit(event)` and `flush_spool` public API replacing gateway dependency for metering
|
|
14
|
+
- `Legion::LLM::Audit::Exchange` — declares `llm.audit` topic exchange (supersedes `Transport::Exchanges::Audit`)
|
|
15
|
+
- `Legion::LLM::Audit::PromptEvent` — prompt audit message (always encrypted) with classification, caller, retention, and tier headers
|
|
16
|
+
- `Legion::LLM::Audit::ToolEvent` — tool call audit message (always encrypted) with tool metadata headers
|
|
17
|
+
- `Legion::LLM::Audit` module — `emit_prompt(event)` and `emit_tools(event)` public API (no spool — audit data too sensitive for plaintext disk)
|
|
18
|
+
- `Fleet::Dispatcher.build_routing_key` — builds `llm.request.<provider>.<type>.<model>` routing keys with `:` to `.` sanitization
|
|
19
|
+
- `Fleet::Dispatcher` per-type timeout resolution (`embed: 10s`, `chat: 30s`, `generate: 30s`) from settings or `TIMEOUTS` constant
|
|
20
|
+
- `Fleet::Dispatcher` backwards-compatible shim supporting both old `(model:, messages:)` and new `(request:, message_context:)` dispatch signatures
|
|
21
|
+
- `Fleet::ReplyDispatcher.fulfill_return` — handles `basic.return` with `no_fleet_queue` error
|
|
22
|
+
- `Fleet::ReplyDispatcher.fulfill_nack` — handles `basic.nack` with `fleet_backpressure` error
|
|
23
|
+
- `Fleet::ReplyDispatcher` type-aware delivery dispatch — handles `llm.fleet.response`, `llm.fleet.error`, and legacy (no type) formats
|
|
24
|
+
- `routing.tier_priority` setting — default `[local, fleet, direct]` three-tier ordering
|
|
25
|
+
- `routing.tiers.fleet.timeouts` setting — per-request-type timeout configuration
|
|
26
|
+
|
|
27
|
+
### Changed
|
|
28
|
+
- `Fleet::Dispatcher#publish_request` now uses `Fleet::Request` message class (falls back to gateway `InferenceRequest` when `Fleet::Request` unavailable)
|
|
29
|
+
- `Pipeline::Steps::Metering#publish_event` now delegates to `Legion::LLM::Metering.emit` instead of `Gateway::Transport::Messages::MeteringEvent`
|
|
30
|
+
- `Pipeline::AuditPublisher#publish` now delegates to `Legion::LLM::Audit.emit_prompt` instead of raw `Transport::Messages::AuditEvent`
|
|
31
|
+
- `routing.tiers.fleet.queue` default changed from `llm.inference` to `llm.request` (fleet exchange rename)
|
|
32
|
+
|
|
33
|
+
## [0.6.24] - 2026-04-08
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
- `Legion::LLM::Patches::RubyLLMParallelTools`: monkey-patch that replaces RubyLLM's serial `handle_tool_calls` loop with concurrent thread execution so all tool calls in a batch run in parallel
|
|
37
|
+
- `ToolResultWrapper` struct exposes `tool_call_id`, `id`, `tool_name`, `result`, and `content` so bridge scripts can match results back to UI slots without falling back to name-based matching
|
|
38
|
+
- `emit_tool_result_event` in `Pipeline::Executor`: fires `tool_event_handler` with `type: :tool_result`, `duration_ms`, `started_at`, and `finished_at` after each tool completes
|
|
39
|
+
- `tool_event_handler` now also fires `type: :model_fallback` events (with `from_model`, `to_model`, `error`, `reason`) on auth-failed provider fallback in both regular and streaming paths
|
|
40
|
+
- `max_tool_rounds` setting (default `200`) in LLM settings; `install_tool_loop_guard` now reads it at call time so callers can override the cap per-session
|
|
41
|
+
- `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
|
|
42
|
+
|
|
43
|
+
### Changed
|
|
44
|
+
- `MAX_RUBY_LLM_TOOL_ROUNDS` constant raised from `25` to `200` (now serves as a fallback default for the configurable `max_tool_rounds` setting)
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
- `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
|
|
48
|
+
|
|
3
49
|
## [0.6.23] - 2026-04-07
|
|
4
50
|
|
|
5
51
|
### Fixed
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.6.
|
|
11
|
+
**Version**: 0.6.25
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -69,9 +69,20 @@ Legion::LLM (lib/legion/llm.rb)
|
|
|
69
69
|
│ McpToolAdapter renamed to ToolAdapter; McpToolAdapter kept as a backwards-compatible alias.
|
|
70
70
|
├── CostEstimator # Model cost estimation with fuzzy pricing (absorbed from lex-llm-gateway)
|
|
71
71
|
├── Fleet # Fleet RPC dispatch (absorbed from lex-llm-gateway)
|
|
72
|
-
│ ├──
|
|
72
|
+
│ ├── Exchange # Declares `llm.request` topic exchange (source of truth)
|
|
73
|
+
│ ├── Request # Fleet inference request message (type: 'llm.fleet.request')
|
|
74
|
+
│ ├── Response # Fleet inference response message (type: 'llm.fleet.response', default exchange publish)
|
|
75
|
+
│ ├── Error # Fleet error message (type: 'llm.fleet.error', ERROR_CODES registry)
|
|
76
|
+
│ ├── Dispatcher # Fleet dispatch with timeout and routing key building
|
|
73
77
|
│ ├── Handler # Fleet request handler for GPU worker nodes
|
|
74
|
-
│ └── ReplyDispatcher # Correlation-based reply routing
|
|
78
|
+
│ └── ReplyDispatcher # Correlation-based reply routing with type-aware dispatch, fulfill_return, fulfill_nack
|
|
79
|
+
├── Metering # Metering event emission (replaces gateway dependency)
|
|
80
|
+
│ ├── Exchange # Declares `llm.metering` topic exchange
|
|
81
|
+
│ └── Event # Metering event message (type: 'llm.metering.event')
|
|
82
|
+
├── Audit # Audit event emission (replaces gateway dependency)
|
|
83
|
+
│ ├── Exchange # Declares `llm.audit` topic exchange
|
|
84
|
+
│ ├── PromptEvent # Prompt audit message (type: 'llm.audit.prompt', always encrypted)
|
|
85
|
+
│ └── ToolEvent # Tool audit message (type: 'llm.audit.tool', always encrypted)
|
|
75
86
|
└── Helpers::LLM # Extension helper mixin (llm_chat, llm_embed, llm_session, compress:)
|
|
76
87
|
```
|
|
77
88
|
|
|
@@ -181,6 +192,20 @@ Legion::LLM.chat(message:, escalate: true, max_escalations: 3, quality_check:) #
|
|
|
181
192
|
Legion::LLM::EscalationExhausted # raised when all escalation attempts are exhausted
|
|
182
193
|
Legion::LLM::Router.resolve_chain(intent:, tier:, max_escalations:) # -> EscalationChain
|
|
183
194
|
Legion::LLM::QualityChecker.check(response, quality_threshold: 50, json_expected: false, quality_check: nil) # -> QualityResult
|
|
195
|
+
|
|
196
|
+
# Metering
|
|
197
|
+
Legion::LLM::Metering.emit(event_hash) # -> :published | :spooled | :dropped
|
|
198
|
+
Legion::LLM::Metering.flush_spool # -> Integer (count flushed)
|
|
199
|
+
|
|
200
|
+
# Audit
|
|
201
|
+
Legion::LLM::Audit.emit_prompt(event_hash) # -> :published | :dropped
|
|
202
|
+
Legion::LLM::Audit.emit_tools(event_hash) # -> :published | :dropped
|
|
203
|
+
|
|
204
|
+
# Fleet Dispatcher
|
|
205
|
+
Legion::LLM::Fleet::Dispatcher.dispatch(model:, messages:, **) # Old signature (backwards compat)
|
|
206
|
+
Legion::LLM::Fleet::Dispatcher.dispatch(request:, message_context:, routing_key:, **) # New signature
|
|
207
|
+
Legion::LLM::Fleet::Dispatcher.build_routing_key(provider:, request_type:, model:) # -> String
|
|
208
|
+
Legion::LLM::Fleet::Dispatcher.fleet_available? # -> Boolean
|
|
184
209
|
```
|
|
185
210
|
|
|
186
211
|
## Settings
|
|
@@ -347,10 +372,22 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
347
372
|
| `lib/legion/llm/pipeline/steps/rag_guard.rb` | Pipeline::Steps::RagGuard: faithfulness check against retrieved RAG context |
|
|
348
373
|
| `lib/legion/llm/pipeline/enrichment_injector.rb` | Pipeline::EnrichmentInjector: converts RAG/GAIA enrichments into system prompt |
|
|
349
374
|
| `lib/legion/llm/cost_estimator.rb` | CostEstimator: model cost estimation with fuzzy pricing |
|
|
350
|
-
| `lib/legion/llm/
|
|
351
|
-
| `lib/legion/llm/fleet
|
|
375
|
+
| `lib/legion/llm/transport/message.rb` | LLM base message class: message_context propagation, LLM headers, envelope key stripping |
|
|
376
|
+
| `lib/legion/llm/fleet.rb` | Fleet module: requires exchange, request, response, error, dispatcher, handler, reply_dispatcher |
|
|
377
|
+
| `lib/legion/llm/fleet/exchange.rb` | Fleet::Exchange: declares `llm.request` topic exchange |
|
|
378
|
+
| `lib/legion/llm/fleet/request.rb` | Fleet::Request: fleet inference request with priority mapping, TTL conversion |
|
|
379
|
+
| `lib/legion/llm/fleet/response.rb` | Fleet::Response: fleet response with default-exchange publish |
|
|
380
|
+
| `lib/legion/llm/fleet/error.rb` | Fleet::Error: fleet error with ERROR_CODES registry, error headers |
|
|
381
|
+
| `lib/legion/llm/fleet/dispatcher.rb` | Fleet::Dispatcher: fleet RPC dispatch with routing key building, per-type timeouts |
|
|
352
382
|
| `lib/legion/llm/fleet/handler.rb` | Fleet::Handler: fleet request handler |
|
|
353
|
-
| `lib/legion/llm/fleet/reply_dispatcher.rb` | Fleet::ReplyDispatcher:
|
|
383
|
+
| `lib/legion/llm/fleet/reply_dispatcher.rb` | Fleet::ReplyDispatcher: type-aware reply routing, fulfill_return, fulfill_nack |
|
|
384
|
+
| `lib/legion/llm/metering.rb` | Metering module: emit, flush_spool public API |
|
|
385
|
+
| `lib/legion/llm/metering/exchange.rb` | Metering::Exchange: declares `llm.metering` topic exchange |
|
|
386
|
+
| `lib/legion/llm/metering/event.rb` | Metering::Event: metering event message with tier header |
|
|
387
|
+
| `lib/legion/llm/audit.rb` | Audit module: emit_prompt, emit_tools public API |
|
|
388
|
+
| `lib/legion/llm/audit/exchange.rb` | Audit::Exchange: declares `llm.audit` topic exchange |
|
|
389
|
+
| `lib/legion/llm/audit/prompt_event.rb` | Audit::PromptEvent: prompt audit with classification/caller/retention headers |
|
|
390
|
+
| `lib/legion/llm/audit/tool_event.rb` | Audit::ToolEvent: tool audit with tool metadata headers |
|
|
354
391
|
| `lib/legion/llm/helpers/llm.rb` | Extension helper mixin: llm_chat (with compress:, escalate:, max_escalations:, quality_check:), llm_embed, llm_session |
|
|
355
392
|
| `spec/legion/llm_spec.rb` | Tests: settings, lifecycle, providers, auto-config |
|
|
356
393
|
| `spec/legion/llm/integration_spec.rb` | Tests: routing integration with chat() |
|
|
@@ -390,8 +427,20 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
390
427
|
| `spec/legion/llm/pipeline/executor_spec.rb` | Tests: Executor pipeline execution, profile skipping |
|
|
391
428
|
| `spec/legion/llm/pipeline/integration_spec.rb` | Tests: Pipeline integration with chat() dispatch |
|
|
392
429
|
| `spec/legion/llm/pipeline/steps/metering_spec.rb` | Tests: Metering event building |
|
|
393
|
-
| `spec/legion/llm/
|
|
430
|
+
| `spec/legion/llm/transport/message_spec.rb` | Tests: LLM base message class |
|
|
431
|
+
| `spec/legion/llm/fleet/exchange_spec.rb` | Tests: fleet exchange declaration |
|
|
432
|
+
| `spec/legion/llm/fleet/request_spec.rb` | Tests: Fleet::Request message |
|
|
433
|
+
| `spec/legion/llm/fleet/response_spec.rb` | Tests: Fleet::Response message |
|
|
434
|
+
| `spec/legion/llm/fleet/error_spec.rb` | Tests: Fleet::Error message |
|
|
435
|
+
| `spec/legion/llm/fleet/dispatcher_spec.rb` | Tests: Fleet dispatch, routing keys, per-type timeouts, ReplyDispatcher |
|
|
394
436
|
| `spec/legion/llm/fleet/handler_spec.rb` | Tests: Fleet handler, auth, response building |
|
|
437
|
+
| `spec/legion/llm/metering/exchange_spec.rb` | Tests: metering exchange |
|
|
438
|
+
| `spec/legion/llm/metering/event_spec.rb` | Tests: Metering::Event message |
|
|
439
|
+
| `spec/legion/llm/metering_spec.rb` | Tests: Metering emit/spool API |
|
|
440
|
+
| `spec/legion/llm/audit/exchange_spec.rb` | Tests: audit exchange |
|
|
441
|
+
| `spec/legion/llm/audit/prompt_event_spec.rb` | Tests: Audit::PromptEvent |
|
|
442
|
+
| `spec/legion/llm/audit/tool_event_spec.rb` | Tests: Audit::ToolEvent |
|
|
443
|
+
| `spec/legion/llm/audit_spec.rb` | Tests: Audit emit API |
|
|
395
444
|
| `spec/legion/llm/pipeline/steps/rag_context_spec.rb` | Tests: RAG context strategy selection, Apollo retrieval, graceful degradation |
|
|
396
445
|
| `spec/legion/llm/pipeline/steps/rag_guard_spec.rb` | Tests: RAG faithfulness checking |
|
|
397
446
|
| `spec/legion/llm/pipeline/enrichment_injector_spec.rb` | Tests: enrichment injection into system prompt |
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../transport/message'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module Audit
|
|
8
|
+
class PromptEvent < Legion::LLM::Transport::Message
|
|
9
|
+
def type = 'llm.audit.prompt'
|
|
10
|
+
def exchange = Legion::LLM::Audit::Exchange
|
|
11
|
+
def routing_key = "audit.prompt.#{@options[:request_type]}"
|
|
12
|
+
def priority = 0
|
|
13
|
+
def encrypt? = true
|
|
14
|
+
def expiration = nil
|
|
15
|
+
|
|
16
|
+
def headers
|
|
17
|
+
super.merge(classification_headers).merge(caller_headers).merge(retention_headers).merge(tier_header)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def message_id_prefix = 'audit_prompt'
|
|
23
|
+
|
|
24
|
+
def classification_headers
|
|
25
|
+
cls = @options[:classification] || {}
|
|
26
|
+
h = {}
|
|
27
|
+
h['x-legion-classification'] = cls[:level].to_s if cls[:level]
|
|
28
|
+
h['x-legion-contains-phi'] = cls[:contains_phi].to_s unless cls[:contains_phi].nil?
|
|
29
|
+
h['x-legion-jurisdictions'] = Array(cls[:jurisdictions]).join(',') if cls[:jurisdictions]
|
|
30
|
+
h
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def caller_headers
|
|
34
|
+
caller_info = @options.dig(:caller, :requested_by) || {}
|
|
35
|
+
h = {}
|
|
36
|
+
h['x-legion-caller-identity'] = caller_info[:identity].to_s if caller_info[:identity]
|
|
37
|
+
h['x-legion-caller-type'] = caller_info[:type].to_s if caller_info[:type]
|
|
38
|
+
h
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def retention_headers
|
|
42
|
+
cls = @options[:classification] || {}
|
|
43
|
+
h = {}
|
|
44
|
+
h['x-legion-retention'] = cls[:retention].to_s if cls[:retention]
|
|
45
|
+
h
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def tier_header
|
|
49
|
+
h = {}
|
|
50
|
+
h['x-legion-llm-tier'] = @options[:tier].to_s if @options[:tier]
|
|
51
|
+
h
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../transport/message'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module Audit
|
|
8
|
+
class ToolEvent < Legion::LLM::Transport::Message
|
|
9
|
+
def type = 'llm.audit.tool'
|
|
10
|
+
def exchange = Legion::LLM::Audit::Exchange
|
|
11
|
+
def routing_key = "audit.tool.#{@options[:tool_name]}"
|
|
12
|
+
def priority = 0
|
|
13
|
+
def encrypt? = true
|
|
14
|
+
def expiration = nil
|
|
15
|
+
|
|
16
|
+
def headers
|
|
17
|
+
super.merge(tool_headers).merge(classification_headers)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def message_id_prefix = 'audit_tool'
|
|
23
|
+
|
|
24
|
+
def tool_headers
|
|
25
|
+
tc = @options[:tool_call] || {}
|
|
26
|
+
src = tc[:source] || {}
|
|
27
|
+
h = {}
|
|
28
|
+
tool_name = tc[:name] || @options[:tool_name]
|
|
29
|
+
h['x-legion-tool-name'] = tool_name.to_s if tool_name
|
|
30
|
+
h['x-legion-tool-source-type'] = src[:type].to_s if src[:type]
|
|
31
|
+
h['x-legion-tool-source-server'] = src[:server].to_s if src[:server]
|
|
32
|
+
h['x-legion-tool-status'] = tc[:status].to_s if tc[:status]
|
|
33
|
+
h
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def classification_headers
|
|
37
|
+
cls = @options[:classification] || {}
|
|
38
|
+
h = {}
|
|
39
|
+
h['x-legion-classification'] = cls[:level].to_s if cls[:level]
|
|
40
|
+
h['x-legion-contains-phi'] = cls[:contains_phi].to_s unless cls[:contains_phi].nil?
|
|
41
|
+
h
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
5
|
+
if defined?(Legion::Transport::Message)
|
|
6
|
+
require_relative 'audit/exchange'
|
|
7
|
+
require_relative 'audit/prompt_event'
|
|
8
|
+
require_relative 'audit/tool_event'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
module Legion
|
|
12
|
+
module LLM
|
|
13
|
+
module Audit
|
|
14
|
+
extend Legion::Logging::Helper
|
|
15
|
+
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
def emit_prompt(event)
|
|
19
|
+
if transport_connected? && defined?(Legion::LLM::Audit::PromptEvent)
|
|
20
|
+
Legion::LLM::Audit::PromptEvent.new(**event).publish
|
|
21
|
+
log.info('[llm][audit] published prompt audit')
|
|
22
|
+
:published
|
|
23
|
+
else
|
|
24
|
+
log.warn('[llm][audit] dropped prompt audit: transport unavailable')
|
|
25
|
+
:dropped
|
|
26
|
+
end
|
|
27
|
+
rescue StandardError => e
|
|
28
|
+
handle_exception(e, level: :warn, operation: 'llm.audit.emit_prompt')
|
|
29
|
+
:dropped
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def emit_tools(event)
|
|
33
|
+
if transport_connected? && defined?(Legion::LLM::Audit::ToolEvent)
|
|
34
|
+
Legion::LLM::Audit::ToolEvent.new(**event).publish
|
|
35
|
+
log.info('[llm][audit] published tool audit')
|
|
36
|
+
:published
|
|
37
|
+
else
|
|
38
|
+
log.warn('[llm][audit] dropped tool audit: transport unavailable')
|
|
39
|
+
:dropped
|
|
40
|
+
end
|
|
41
|
+
rescue StandardError => e
|
|
42
|
+
handle_exception(e, level: :warn, operation: 'llm.audit.emit_tools')
|
|
43
|
+
:dropped
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def transport_connected?
|
|
47
|
+
!!(defined?(Legion::Transport) &&
|
|
48
|
+
Legion::Transport.respond_to?(:connected?) &&
|
|
49
|
+
Legion::Transport.connected?)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -373,11 +373,13 @@ module Legion
|
|
|
373
373
|
end
|
|
374
374
|
|
|
375
375
|
def db_append_message(conversation_id, msg)
|
|
376
|
+
content = msg[:content]
|
|
377
|
+
content = content.to_json unless content.is_a?(String) || content.nil?
|
|
376
378
|
row = {
|
|
377
379
|
conversation_id: conversation_id,
|
|
378
380
|
seq: msg[:seq],
|
|
379
381
|
role: msg[:role].to_s,
|
|
380
|
-
content:
|
|
382
|
+
content: content,
|
|
381
383
|
provider: msg[:provider]&.to_s,
|
|
382
384
|
model: msg[:model]&.to_s,
|
|
383
385
|
input_tokens: msg[:input_tokens],
|
|
@@ -7,18 +7,66 @@ module Legion
|
|
|
7
7
|
module Fleet
|
|
8
8
|
module Dispatcher
|
|
9
9
|
DEFAULT_TIMEOUT = 30
|
|
10
|
+
|
|
11
|
+
TIMEOUTS = {
|
|
12
|
+
embed: 10,
|
|
13
|
+
chat: 30,
|
|
14
|
+
generate: 30,
|
|
15
|
+
default: 30
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
10
18
|
extend Legion::Logging::Helper
|
|
11
19
|
|
|
12
20
|
module_function
|
|
13
21
|
|
|
14
|
-
|
|
15
|
-
|
|
22
|
+
# Backwards-compatible shim: supports old (model:, messages:) and new (request:, message_context:) callers
|
|
23
|
+
def dispatch(model: nil, messages: nil, request: nil, message_context: {}, routing_key: nil, reply_to: nil, **opts)
|
|
24
|
+
return error_result('fleet_unavailable', message_context: message_context) unless fleet_available?
|
|
25
|
+
|
|
26
|
+
# Old calling convention: build minimal params from model/messages
|
|
27
|
+
if request.nil? && (model || messages)
|
|
28
|
+
provider = opts[:provider] || 'ollama'
|
|
29
|
+
request_type = opts[:request_type] || 'chat'
|
|
30
|
+
routing_key ||= build_routing_key(provider: provider, request_type: request_type, model: model)
|
|
31
|
+
reply_to ||= ReplyDispatcher.agent_queue_name
|
|
32
|
+
correlation_id = publish_request(
|
|
33
|
+
routing_key: routing_key, reply_to: reply_to,
|
|
34
|
+
provider: provider, model: model, request_type: request_type,
|
|
35
|
+
messages: messages, message_context: message_context, **opts
|
|
36
|
+
)
|
|
37
|
+
timeout = resolve_timeout(request_type: request_type, override: opts[:timeout])
|
|
38
|
+
return wait_for_response(correlation_id, timeout: timeout, message_context: message_context)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# New calling convention
|
|
42
|
+
request_opts =
|
|
43
|
+
if request.respond_to?(:to_h)
|
|
44
|
+
request.to_h.transform_keys(&:to_sym)
|
|
45
|
+
else
|
|
46
|
+
{}
|
|
47
|
+
end
|
|
48
|
+
request_opts = request_opts.merge(opts)
|
|
49
|
+
|
|
50
|
+
provider = request_opts[:provider] || 'ollama'
|
|
51
|
+
request_type = request_opts[:request_type] || 'chat'
|
|
52
|
+
model = request_opts[:model]
|
|
53
|
+
routing_key ||= build_routing_key(provider: provider, request_type: request_type, model: model)
|
|
54
|
+
reply_to ||= ReplyDispatcher.agent_queue_name
|
|
55
|
+
correlation_id = publish_request(
|
|
56
|
+
routing_key: routing_key, reply_to: reply_to,
|
|
57
|
+
provider: provider, model: model, request_type: request_type,
|
|
58
|
+
message_context: message_context, **request_opts.except(:provider, :model, :request_type, :timeout)
|
|
59
|
+
)
|
|
60
|
+
timeout = resolve_timeout(request_type: request_type, override: request_opts[:timeout] || opts[:timeout])
|
|
61
|
+
wait_for_response(correlation_id, timeout: timeout, message_context: message_context)
|
|
62
|
+
end
|
|
16
63
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
64
|
+
def build_routing_key(provider:, request_type:, model:)
|
|
65
|
+
"llm.request.#{provider}.#{request_type}.#{sanitize_model(model)}"
|
|
66
|
+
end
|
|
20
67
|
|
|
21
|
-
|
|
68
|
+
def sanitize_model(model)
|
|
69
|
+
model.to_s.gsub(':', '.')
|
|
22
70
|
end
|
|
23
71
|
|
|
24
72
|
def fleet_available?
|
|
@@ -48,10 +96,17 @@ module Legion
|
|
|
48
96
|
routing.fetch(:use_fleet, true)
|
|
49
97
|
end
|
|
50
98
|
|
|
51
|
-
def resolve_timeout(override)
|
|
99
|
+
def resolve_timeout(request_type: :default, override: nil)
|
|
52
100
|
return override if override
|
|
53
101
|
|
|
54
|
-
|
|
102
|
+
configured = fleet_timeout_from_settings(request_type)
|
|
103
|
+
return configured if configured
|
|
104
|
+
|
|
105
|
+
TIMEOUTS[request_type.to_sym] || TIMEOUTS[:default]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def fleet_timeout_from_settings(request_type)
|
|
109
|
+
return unless defined?(Legion::Settings)
|
|
55
110
|
|
|
56
111
|
settings = begin
|
|
57
112
|
Legion::Settings[:llm]
|
|
@@ -59,35 +114,51 @@ module Legion
|
|
|
59
114
|
handle_exception(e, level: :debug, operation: 'llm.fleet.dispatcher.resolve_timeout')
|
|
60
115
|
nil
|
|
61
116
|
end
|
|
62
|
-
return DEFAULT_TIMEOUT unless settings.is_a?(Hash)
|
|
63
117
|
|
|
64
|
-
settings.
|
|
118
|
+
return unless settings.is_a?(Hash)
|
|
119
|
+
|
|
120
|
+
routing = settings[:routing]
|
|
121
|
+
return unless routing.is_a?(Hash)
|
|
122
|
+
|
|
123
|
+
fleet_settings = routing.dig(:tiers, :fleet)
|
|
124
|
+
fleet_settings = routing[:fleet] unless fleet_settings.is_a?(Hash)
|
|
125
|
+
return unless fleet_settings.is_a?(Hash)
|
|
126
|
+
|
|
127
|
+
fleet_settings.dig(:timeouts, request_type.to_sym) || fleet_settings[:timeout_seconds]
|
|
65
128
|
end
|
|
66
129
|
|
|
67
|
-
def publish_request(**)
|
|
68
|
-
|
|
130
|
+
def publish_request(**opts)
|
|
131
|
+
correlation_id = "req_#{SecureRandom.uuid}"
|
|
132
|
+
opts[:fleet_correlation_id] = correlation_id
|
|
133
|
+
|
|
134
|
+
if defined?(Legion::LLM::Fleet::Request)
|
|
135
|
+
Legion::LLM::Fleet::Request.new(**opts).publish
|
|
136
|
+
elsif defined?(Legion::Extensions::LLM::Gateway::Transport::Messages::InferenceRequest)
|
|
137
|
+
Legion::Extensions::LLM::Gateway::Transport::Messages::InferenceRequest.new(
|
|
138
|
+
reply_to: opts[:reply_to], **opts.except(:reply_to)
|
|
139
|
+
).publish
|
|
140
|
+
end
|
|
69
141
|
|
|
70
|
-
|
|
71
|
-
reply_to: ReplyDispatcher.agent_queue_name, **
|
|
72
|
-
).publish
|
|
142
|
+
correlation_id
|
|
73
143
|
end
|
|
74
144
|
|
|
75
|
-
def wait_for_response(correlation_id, timeout:)
|
|
145
|
+
def wait_for_response(correlation_id, timeout:, message_context: {})
|
|
76
146
|
future = ReplyDispatcher.register(correlation_id)
|
|
77
147
|
result = future.value!(timeout)
|
|
78
|
-
result || timeout_result(correlation_id, timeout)
|
|
148
|
+
result || timeout_result(correlation_id, timeout, message_context: message_context)
|
|
79
149
|
rescue Concurrent::CancelledOperationError
|
|
80
|
-
timeout_result(correlation_id, timeout)
|
|
150
|
+
timeout_result(correlation_id, timeout, message_context: message_context)
|
|
81
151
|
ensure
|
|
82
152
|
ReplyDispatcher.deregister(correlation_id)
|
|
83
153
|
end
|
|
84
154
|
|
|
85
|
-
def timeout_result(correlation_id, timeout)
|
|
86
|
-
{ success: false, error: 'fleet_timeout', correlation_id: correlation_id,
|
|
155
|
+
def timeout_result(correlation_id, timeout, message_context: {})
|
|
156
|
+
{ success: false, error: 'fleet_timeout', correlation_id: correlation_id,
|
|
157
|
+
timeout: timeout, message_context: message_context }
|
|
87
158
|
end
|
|
88
159
|
|
|
89
|
-
def error_result(reason)
|
|
90
|
-
{ success: false, error: reason }
|
|
160
|
+
def error_result(reason, message_context: {})
|
|
161
|
+
{ success: false, error: reason, message_context: message_context }
|
|
91
162
|
end
|
|
92
163
|
end
|
|
93
164
|
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../transport/message'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module Fleet
|
|
8
|
+
class Error < Legion::LLM::Transport::Message
|
|
9
|
+
ERROR_CODES = %w[
|
|
10
|
+
model_not_loaded ollama_unavailable inference_failed inference_timeout
|
|
11
|
+
invalid_token token_expired payload_too_large unsupported_type
|
|
12
|
+
unsupported_streaming no_fleet_queue fleet_backpressure fleet_timeout
|
|
13
|
+
].freeze
|
|
14
|
+
|
|
15
|
+
def type = 'llm.fleet.error'
|
|
16
|
+
def routing_key = @options[:reply_to]
|
|
17
|
+
def priority = 0
|
|
18
|
+
def expiration = nil
|
|
19
|
+
def encrypt? = false
|
|
20
|
+
|
|
21
|
+
def headers
|
|
22
|
+
super.merge(error_headers).merge(tracing_headers)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Same default-exchange override as Fleet::Response.
|
|
26
|
+
def publish(options = @options)
|
|
27
|
+
raise unless @valid
|
|
28
|
+
|
|
29
|
+
validate_payload_size
|
|
30
|
+
channel.default_exchange.publish(
|
|
31
|
+
encode_message,
|
|
32
|
+
routing_key: routing_key,
|
|
33
|
+
content_type: options[:content_type] || content_type,
|
|
34
|
+
content_encoding: options[:content_encoding] || content_encoding,
|
|
35
|
+
headers: headers,
|
|
36
|
+
type: type,
|
|
37
|
+
priority: priority,
|
|
38
|
+
message_id: message_id,
|
|
39
|
+
correlation_id: correlation_id,
|
|
40
|
+
app_id: app_id,
|
|
41
|
+
timestamp: timestamp
|
|
42
|
+
)
|
|
43
|
+
rescue Bunny::ConnectionClosedError, Bunny::ChannelAlreadyClosed,
|
|
44
|
+
Bunny::NetworkErrorWrapper, IOError, Timeout::Error => e
|
|
45
|
+
spool_message(e)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def message_id_prefix = 'err'
|
|
51
|
+
|
|
52
|
+
def error_headers
|
|
53
|
+
h = {}
|
|
54
|
+
code = @options.dig(:error, :code)
|
|
55
|
+
h['x-legion-fleet-error'] = code.to_s if code
|
|
56
|
+
h
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -34,11 +34,36 @@ module Legion
|
|
|
34
34
|
future = @pending.delete(cid)
|
|
35
35
|
return unless future
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
# Type-aware dispatch (new protocol) with fallback to legacy (no type)
|
|
38
|
+
case properties[:type]
|
|
39
|
+
when 'llm.fleet.error'
|
|
40
|
+
future.fulfill(normalize_error(payload))
|
|
41
|
+
else
|
|
42
|
+
# 'llm.fleet.response' or legacy (no type)
|
|
43
|
+
future.fulfill(payload)
|
|
44
|
+
end
|
|
38
45
|
rescue StandardError => e
|
|
39
46
|
handle_exception(e, level: :warn)
|
|
40
47
|
end
|
|
41
48
|
|
|
49
|
+
def fulfill_return(correlation_id)
|
|
50
|
+
future = @pending.delete(correlation_id)
|
|
51
|
+
return unless future
|
|
52
|
+
|
|
53
|
+
future.fulfill({ success: false, error: 'no_fleet_queue' })
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
handle_exception(e, level: :warn, operation: 'llm.fleet.reply_dispatcher.fulfill_return')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def fulfill_nack(correlation_id)
|
|
59
|
+
future = @pending.delete(correlation_id)
|
|
60
|
+
return unless future
|
|
61
|
+
|
|
62
|
+
future.fulfill({ success: false, error: 'fleet_backpressure' })
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
handle_exception(e, level: :warn, operation: 'llm.fleet.reply_dispatcher.fulfill_nack')
|
|
65
|
+
end
|
|
66
|
+
|
|
42
67
|
def agent_queue_name
|
|
43
68
|
@agent_queue_name ||= "llm.fleet.reply.#{SecureRandom.hex(8)}"
|
|
44
69
|
end
|
|
@@ -62,7 +87,10 @@ module Legion
|
|
|
62
87
|
channel = Legion::Transport.connection.create_channel
|
|
63
88
|
queue = channel.queue(agent_queue_name, auto_delete: true, durable: false)
|
|
64
89
|
@consumer = queue.subscribe(manual_ack: false) do |_delivery, properties, body|
|
|
65
|
-
props = {
|
|
90
|
+
props = {
|
|
91
|
+
correlation_id: properties.correlation_id,
|
|
92
|
+
type: properties.type
|
|
93
|
+
}
|
|
66
94
|
handle_delivery(body, props)
|
|
67
95
|
end
|
|
68
96
|
end
|
|
@@ -96,6 +124,16 @@ module Legion
|
|
|
96
124
|
handle_exception(e, level: :debug)
|
|
97
125
|
{}
|
|
98
126
|
end
|
|
127
|
+
|
|
128
|
+
def normalize_error(payload)
|
|
129
|
+
error = payload[:error] || {}
|
|
130
|
+
{
|
|
131
|
+
success: false,
|
|
132
|
+
error: error.is_a?(Hash) ? error[:code] || error[:message] || 'fleet_error' : error.to_s,
|
|
133
|
+
message_context: payload[:message_context] || {},
|
|
134
|
+
raw_error: error
|
|
135
|
+
}
|
|
136
|
+
end
|
|
99
137
|
end
|
|
100
138
|
end
|
|
101
139
|
end
|