legion-llm 0.6.8 → 0.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +4 -4
- data/CHANGELOG.md +44 -0
- data/README.md +35 -19
- data/lib/legion/llm/arbitrage.rb +5 -2
- data/lib/legion/llm/batch.rb +20 -8
- data/lib/legion/llm/cache.rb +8 -5
- data/lib/legion/llm/claude_config_loader.rb +7 -4
- data/lib/legion/llm/codex_config_loader.rb +8 -4
- data/lib/legion/llm/compressor.rb +11 -7
- data/lib/legion/llm/confidence_scorer.rb +8 -2
- data/lib/legion/llm/context_curator.rb +13 -8
- data/lib/legion/llm/conversation_store.rb +11 -6
- data/lib/legion/llm/cost_tracker.rb +5 -2
- data/lib/legion/llm/daemon_client.rb +18 -11
- data/lib/legion/llm/discovery/ollama.rb +8 -5
- data/lib/legion/llm/discovery/system.rb +8 -5
- data/lib/legion/llm/embeddings.rb +108 -18
- data/lib/legion/llm/escalation_tracker.rb +8 -5
- data/lib/legion/llm/fleet/dispatcher.rb +7 -2
- data/lib/legion/llm/fleet/handler.rb +160 -9
- data/lib/legion/llm/fleet/reply_dispatcher.rb +8 -5
- data/lib/legion/llm/helper.rb +22 -9
- data/lib/legion/llm/hooks/budget_guard.rb +6 -3
- data/lib/legion/llm/hooks/cost_tracking.rb +4 -1
- data/lib/legion/llm/hooks/metering.rb +5 -2
- data/lib/legion/llm/hooks/rag_guard.rb +5 -6
- data/lib/legion/llm/hooks/reciprocity.rb +5 -2
- data/lib/legion/llm/hooks/reflection.rb +11 -9
- data/lib/legion/llm/hooks/response_guard.rb +4 -1
- data/lib/legion/llm/hooks.rb +5 -2
- data/lib/legion/llm/native_dispatch.rb +13 -1
- data/lib/legion/llm/off_peak.rb +9 -2
- data/lib/legion/llm/override_confidence.rb +9 -6
- data/lib/legion/llm/pipeline/audit_publisher.rb +7 -4
- data/lib/legion/llm/pipeline/enrichment_injector.rb +5 -0
- data/lib/legion/llm/pipeline/executor.rb +211 -99
- data/lib/legion/llm/pipeline/gaia_caller.rb +7 -0
- data/lib/legion/llm/pipeline/mcp_tool_adapter.rb +22 -9
- data/lib/legion/llm/pipeline/request.rb +8 -4
- data/lib/legion/llm/pipeline/steps/billing.rb +13 -0
- data/lib/legion/llm/pipeline/steps/classification.rb +6 -1
- data/lib/legion/llm/pipeline/steps/confidence_scoring.rb +5 -0
- data/lib/legion/llm/pipeline/steps/debate.rb +6 -0
- data/lib/legion/llm/pipeline/steps/gaia_advisory.rb +12 -5
- data/lib/legion/llm/pipeline/steps/knowledge_capture.rb +8 -1
- data/lib/legion/llm/pipeline/steps/mcp_discovery.rb +30 -3
- data/lib/legion/llm/pipeline/steps/metering.rb +10 -1
- data/lib/legion/llm/pipeline/steps/post_response.rb +24 -0
- data/lib/legion/llm/pipeline/steps/prompt_cache.rb +6 -0
- data/lib/legion/llm/pipeline/steps/rag_context.rb +9 -2
- data/lib/legion/llm/pipeline/steps/rag_guard.rb +5 -0
- data/lib/legion/llm/pipeline/steps/rbac.rb +16 -1
- data/lib/legion/llm/pipeline/steps/span_annotator.rb +6 -1
- data/lib/legion/llm/pipeline/steps/tier_assigner.rb +19 -3
- data/lib/legion/llm/pipeline/steps/token_budget.rb +4 -1
- data/lib/legion/llm/pipeline/steps/tool_calls.rb +66 -4
- data/lib/legion/llm/pipeline/tool_dispatcher.rb +5 -1
- data/lib/legion/llm/provider_registry.rb +8 -0
- data/lib/legion/llm/providers.rb +28 -23
- data/lib/legion/llm/quality_checker.rb +4 -1
- data/lib/legion/llm/response_cache.rb +18 -5
- data/lib/legion/llm/router/gateway_interceptor.rb +5 -2
- data/lib/legion/llm/router/health_tracker.rb +10 -7
- data/lib/legion/llm/router/rule.rb +9 -6
- data/lib/legion/llm/router.rb +13 -11
- data/lib/legion/llm/routes.rb +300 -69
- data/lib/legion/llm/scheduling.rb +6 -3
- data/lib/legion/llm/settings.rb +7 -4
- data/lib/legion/llm/shadow_eval.rb +12 -6
- data/lib/legion/llm/structured_output.rb +16 -13
- data/lib/legion/llm/token_tracker.rb +6 -3
- data/lib/legion/llm/tool_registry.rb +16 -2
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +390 -76
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 245c24519b2180958f4a214034612fc0635e53aa73ed54d84534319c8d2b6e6d
|
|
4
|
+
data.tar.gz: b28143d46d537c40bd912b70a24a12add3ba0dfacd161415b0c824b4d70cb9eb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 73279e263a95b388adc0a7831b288291d9c05ef4a1957f95cdae4434ebf5401b93197b73ae0a9c3d2cfed637758c597133d16e1f2935b0df17b726d57c7ac356
|
|
7
|
+
data.tar.gz: d5c3e9d471aaeb6cb3787e85fb15be6da95ed13d660c77414d75928f1720838bd7971d61d5b4b99539788c190bf2286bb4ce23feab82ceed0bf42037b8c8063e
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -14,7 +14,7 @@ Layout/HashAlignment:
|
|
|
14
14
|
EnforcedColonStyle: table
|
|
15
15
|
|
|
16
16
|
Metrics/MethodLength:
|
|
17
|
-
Max:
|
|
17
|
+
Max: 60
|
|
18
18
|
|
|
19
19
|
Metrics/ClassLength:
|
|
20
20
|
Max: 1500
|
|
@@ -28,13 +28,13 @@ Metrics/BlockLength:
|
|
|
28
28
|
- 'spec/**/*'
|
|
29
29
|
|
|
30
30
|
Metrics/AbcSize:
|
|
31
|
-
Max:
|
|
31
|
+
Max: 85
|
|
32
32
|
|
|
33
33
|
Metrics/CyclomaticComplexity:
|
|
34
|
-
Max:
|
|
34
|
+
Max: 35
|
|
35
35
|
|
|
36
36
|
Metrics/PerceivedComplexity:
|
|
37
|
-
Max:
|
|
37
|
+
Max: 35
|
|
38
38
|
|
|
39
39
|
Style/Documentation:
|
|
40
40
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.6.14] - 2026-04-02
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- Preserved fleet reply success and error state exactly as produced by the handler instead of forcing successful delivery metadata onto failures
|
|
9
|
+
- Preserved full fleet chat request fidelity by replaying prior messages locally and forwarding provider/model context through fleet chat, embed, and structured execution paths (closes #48)
|
|
10
|
+
|
|
11
|
+
## [0.6.13] - 2026-04-02
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- Honored `llm.daemon.enabled` in `DaemonClient` so daemon-first behavior is fully disabled when operators turn the daemon off
|
|
15
|
+
- Honored nested `llm.routing.health.*` settings when building the router health tracker so custom health windows and circuit-breaker thresholds take effect (closes #45)
|
|
16
|
+
|
|
17
|
+
## [0.6.12] - 2026-04-02
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- Fixed `Batch.flush` direct execution so grouped work preserves provider/model routing, executes the queued request, and bypasses scheduling deferral when draining the queue
|
|
21
|
+
- Fixed deferred `ask_direct` execution so direct asks enqueue the original message and return the deferred result cleanly instead of crashing on a deferred hash (closes #44)
|
|
22
|
+
|
|
23
|
+
## [0.6.11] - 2026-04-02
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
- Made response-cache spool overflow writes configurable via `llm.prompt_caching.response_cache.spool_dir` and updated the response-cache specs to use a hermetic temp directory instead of `~/.legionio`
|
|
27
|
+
- Updated README public API examples to match `Legion::LLM.ask(message: ...)` and to distinguish session creation from pipeline-backed `Legion::LLM.chat(message:/messages:)` calls (closes #46)
|
|
28
|
+
|
|
29
|
+
## [0.6.10] - 2026-04-02
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
- Removed a redundant RuboCop block-length suppression in Sinatra route tool wiring after the `0.6.9` release cut
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
- Aligned streaming RubyLLM execution with the standard provider path so streamed requests now apply enriched system instructions and conversation breakpoints before provider execution, matching non-streaming prompt construction (closes #47)
|
|
36
|
+
|
|
37
|
+
## [0.6.9] - 2026-04-02
|
|
38
|
+
|
|
39
|
+
### Added
|
|
40
|
+
- Additional helper-based `info`/`error` logging across non-Sinatra runtime paths including native dispatch, prompt caching, GAIA caller helpers, billing and metering pipeline steps, provider and tool registries, compressor, reflection, shadow evaluation, and escalation tracking
|
|
41
|
+
|
|
42
|
+
### Changed
|
|
43
|
+
- Uplifted remaining non-Sinatra `lib/**/*.rb` runtime modules to `Legion::Logging::Helper`, replacing lingering wrapper-style `log_debug` calls with direct `log.debug/info/warn/error`
|
|
44
|
+
- Added catch-all `handle_exception` coverage for remaining non-Sinatra rescue paths, including RBAC pipeline failure handling
|
|
45
|
+
- Fixed pipeline request ID generation for `Request.from_chat_args` callers so response objects consistently retain a non-nil `request_id`
|
|
46
|
+
- Restored shared post-response tool-call serialization used by `PostResponse` and `KnowledgeCapture`, keeping audit publishing and local knowledge capture working in isolated step execution
|
|
47
|
+
- Guarded non-pipeline shadow-evaluation checks when `ShadowEval` is not loaded and cleared the remaining RuboCop regressions in `lib/` and route helpers
|
|
48
|
+
|
|
5
49
|
## [0.6.8] - 2026-04-01
|
|
6
50
|
|
|
7
51
|
### Added
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
LLM integration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Wraps [ruby_llm](https://github.com/crmne/ruby_llm) to provide chat, embeddings, tool use, and agent capabilities to any Legion extension.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.
|
|
5
|
+
**Version**: 0.6.14
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -111,11 +111,13 @@ Legion::LLM.settings # -> Hash (current LLM settings)
|
|
|
111
111
|
|
|
112
112
|
```ruby
|
|
113
113
|
# Synchronous response
|
|
114
|
-
|
|
115
|
-
puts
|
|
114
|
+
result = Legion::LLM.ask(message: "What is the capital of France?")
|
|
115
|
+
puts(result[:response] || result[:content])
|
|
116
116
|
|
|
117
|
-
#
|
|
118
|
-
#
|
|
117
|
+
# Daemon immediate/created responses return the daemon body hash.
|
|
118
|
+
# Direct fallback and async poll completion return:
|
|
119
|
+
# { status: :done, response: "...", meta: { ... } }
|
|
120
|
+
# HTTP 403 raises DaemonDeniedError; HTTP 429 raises DaemonRateLimitedError.
|
|
119
121
|
```
|
|
120
122
|
|
|
121
123
|
Configure daemon routing under `llm.daemon`:
|
|
@@ -131,23 +133,37 @@ Configure daemon routing under `llm.daemon`:
|
|
|
131
133
|
}
|
|
132
134
|
```
|
|
133
135
|
|
|
134
|
-
|
|
136
|
+
Large async responses that overflow the cache spool to disk under
|
|
137
|
+
`llm.prompt_caching.response_cache.spool_dir` (default:
|
|
138
|
+
`~/.legionio/data/spool/llm_responses`).
|
|
135
139
|
|
|
136
|
-
|
|
140
|
+
### Chat
|
|
137
141
|
|
|
138
|
-
|
|
139
|
-
# Use configured defaults
|
|
140
|
-
chat = Legion::LLM.chat
|
|
141
|
-
response = chat.ask("What is the capital of France?")
|
|
142
|
-
puts response.content
|
|
142
|
+
`Legion::LLM.chat` has two public modes:
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
|
|
144
|
+
- Call it without `message:` or `messages:` to create a `RubyLLM::Chat` session for multi-turn conversation.
|
|
145
|
+
- Call it with `message:` or `messages:` to execute immediately. When the pipeline is enabled, these request-shaped calls run through the pipeline and return a pipeline response object.
|
|
146
146
|
|
|
147
|
-
|
|
147
|
+
```ruby
|
|
148
|
+
# Session creation for multi-turn conversation
|
|
148
149
|
chat = Legion::LLM.chat
|
|
149
150
|
chat.ask("Remember: my name is Matt")
|
|
150
151
|
chat.ask("What's my name?") # -> "Matt"
|
|
152
|
+
|
|
153
|
+
# Immediate execution through the request path
|
|
154
|
+
result = Legion::LLM.chat(message: "What is the capital of France?")
|
|
155
|
+
|
|
156
|
+
# Explicit multi-message request
|
|
157
|
+
result = Legion::LLM.chat(
|
|
158
|
+
messages: [
|
|
159
|
+
{ role: :user, content: "Summarize the meeting notes" },
|
|
160
|
+
{ role: :assistant, content: "Notes received." },
|
|
161
|
+
{ role: :user, content: "Now produce the summary" }
|
|
162
|
+
]
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Session creation with overrides still returns RubyLLM::Chat
|
|
166
|
+
chat = Legion::LLM.chat(model: 'gpt-4o', provider: :openai)
|
|
151
167
|
```
|
|
152
168
|
|
|
153
169
|
### Embeddings
|
|
@@ -282,14 +298,14 @@ response = session.ask("Review this PR: #{diff}")
|
|
|
282
298
|
|
|
283
299
|
### Unified Pipeline
|
|
284
300
|
|
|
285
|
-
|
|
301
|
+
`Legion::LLM.chat` calls that include `message:` or `messages:` flow through a multi-step request/response pipeline when `pipeline_enabled` is `true` (the default). Session-construction calls such as `Legion::LLM.chat(model: ..., provider: ...)` return a raw `RubyLLM::Chat` and do not enter the pipeline. The pipeline handles RBAC, classification, RAG context retrieval, MCP tool discovery, metering, billing, audit, and GAIA advisory in a consistent sequence. Steps are skipped based on the caller profile (`:external`, `:gaia`, `:system`).
|
|
286
302
|
|
|
287
303
|
```ruby
|
|
288
|
-
#
|
|
304
|
+
# Request-shaped calls enter the pipeline by default
|
|
289
305
|
result = Legion::LLM.chat(message: "hello")
|
|
290
306
|
|
|
291
|
-
#
|
|
292
|
-
|
|
307
|
+
# Session creation does not
|
|
308
|
+
session = Legion::LLM.chat(model: "gpt-4o")
|
|
293
309
|
```
|
|
294
310
|
|
|
295
311
|
The pipeline accepts a `caller:` hash describing the request origin:
|
data/lib/legion/llm/arbitrage.rb
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
module Legion
|
|
4
5
|
module LLM
|
|
5
6
|
module Arbitrage
|
|
7
|
+
extend Legion::Logging::Helper
|
|
8
|
+
|
|
6
9
|
# Default cost table: per-1M-token input/output prices in USD.
|
|
7
10
|
# Overridable via settings: llm.arbitrage.cost_table
|
|
8
11
|
DEFAULT_COST_TABLE = {
|
|
@@ -57,7 +60,7 @@ module Legion
|
|
|
57
60
|
return nil if scored.empty?
|
|
58
61
|
|
|
59
62
|
selected = scored.min_by { |_model, cost| cost }&.first
|
|
60
|
-
|
|
63
|
+
log.debug("Arbitrage selected model=#{selected} capability=#{capability}")
|
|
61
64
|
selected
|
|
62
65
|
end
|
|
63
66
|
|
|
@@ -83,7 +86,7 @@ module Legion
|
|
|
83
86
|
arb = llm[:arbitrage] || llm['arbitrage'] || {}
|
|
84
87
|
arb.is_a?(Hash) ? arb.transform_keys(&:to_sym) : {}
|
|
85
88
|
rescue StandardError => e
|
|
86
|
-
|
|
89
|
+
handle_exception(e, level: :warn)
|
|
87
90
|
{}
|
|
88
91
|
end
|
|
89
92
|
|
data/lib/legion/llm/batch.rb
CHANGED
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
require 'securerandom'
|
|
4
4
|
|
|
5
|
+
require 'legion/logging/helper'
|
|
5
6
|
module Legion
|
|
6
7
|
module LLM
|
|
7
8
|
module Batch
|
|
9
|
+
extend Legion::Logging::Helper
|
|
10
|
+
|
|
8
11
|
@mutex = Mutex.new
|
|
9
12
|
@flush_timer = nil
|
|
10
13
|
|
|
@@ -30,7 +33,7 @@ module Legion
|
|
|
30
33
|
|
|
31
34
|
@mutex.synchronize { queue << entry }
|
|
32
35
|
ensure_flush_timer
|
|
33
|
-
|
|
36
|
+
log.debug "Legion::LLM::Batch enqueued #{request_id} (queue size: #{queue_size})"
|
|
34
37
|
request_id
|
|
35
38
|
end
|
|
36
39
|
|
|
@@ -49,7 +52,7 @@ module Legion
|
|
|
49
52
|
|
|
50
53
|
return [] if to_flush.empty?
|
|
51
54
|
|
|
52
|
-
|
|
55
|
+
log.debug "Legion::LLM::Batch flushing #{to_flush.size} request(s)"
|
|
53
56
|
|
|
54
57
|
groups = to_flush.group_by { |e| [e[:provider], e[:model]] }
|
|
55
58
|
results = []
|
|
@@ -117,7 +120,7 @@ module Legion
|
|
|
117
120
|
@flush_timer = Concurrent::TimerTask.new(execution_interval: interval) do
|
|
118
121
|
flush(max_wait: 0)
|
|
119
122
|
rescue StandardError => e
|
|
120
|
-
|
|
123
|
+
handle_exception(e, level: :warn)
|
|
121
124
|
end
|
|
122
125
|
@flush_timer.execute
|
|
123
126
|
end
|
|
@@ -129,19 +132,28 @@ module Legion
|
|
|
129
132
|
b = llm[:batch] || llm['batch'] || {}
|
|
130
133
|
b.is_a?(Hash) ? b.transform_keys(&:to_sym) : {}
|
|
131
134
|
rescue StandardError => e
|
|
132
|
-
|
|
135
|
+
handle_exception(e, level: :warn)
|
|
133
136
|
{}
|
|
134
137
|
end
|
|
135
138
|
|
|
136
139
|
def submit_single(entry, provider:, model:)
|
|
140
|
+
msgs = entry[:messages]
|
|
141
|
+
prompt = if msgs.is_a?(Array)
|
|
142
|
+
last_user = msgs.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
143
|
+
(last_user || {}).fetch(:content, nil) || (last_user || {}).fetch('content', nil) || ''
|
|
144
|
+
else
|
|
145
|
+
msgs.to_s
|
|
146
|
+
end
|
|
137
147
|
response = Legion::LLM.chat_direct(
|
|
138
|
-
|
|
148
|
+
**entry[:opts],
|
|
149
|
+
provider: provider,
|
|
139
150
|
model: model,
|
|
140
|
-
|
|
151
|
+
message: prompt,
|
|
152
|
+
urgency: :immediate
|
|
141
153
|
)
|
|
142
154
|
|
|
143
155
|
{
|
|
144
|
-
status: :completed,
|
|
156
|
+
status: response.is_a?(Hash) && response[:deferred] ? :deferred : :completed,
|
|
145
157
|
model: model,
|
|
146
158
|
provider: provider,
|
|
147
159
|
id: entry[:id],
|
|
@@ -149,7 +161,7 @@ module Legion
|
|
|
149
161
|
meta: { batched: true, queued_at: entry[:queued_at], completed_at: Time.now.utc }
|
|
150
162
|
}
|
|
151
163
|
rescue StandardError => e
|
|
152
|
-
|
|
164
|
+
handle_exception(e, level: :warn)
|
|
153
165
|
{
|
|
154
166
|
status: :failed,
|
|
155
167
|
model: model,
|
data/lib/legion/llm/cache.rb
CHANGED
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
require 'digest'
|
|
4
4
|
|
|
5
|
+
require 'legion/logging/helper'
|
|
5
6
|
module Legion
|
|
6
7
|
module LLM
|
|
7
8
|
module Cache
|
|
9
|
+
extend Legion::Logging::Helper
|
|
10
|
+
|
|
8
11
|
DEFAULT_TTL = 300
|
|
9
12
|
|
|
10
13
|
module_function
|
|
@@ -28,13 +31,13 @@ module Legion
|
|
|
28
31
|
|
|
29
32
|
raw = Legion::Cache.get(cache_key)
|
|
30
33
|
if raw.nil?
|
|
31
|
-
|
|
34
|
+
log.debug("LLM cache miss key=#{cache_key}")
|
|
32
35
|
return nil
|
|
33
36
|
end
|
|
34
37
|
|
|
35
38
|
::JSON.parse(raw, symbolize_names: true)
|
|
36
39
|
rescue StandardError => e
|
|
37
|
-
|
|
40
|
+
handle_exception(e, level: :warn)
|
|
38
41
|
nil
|
|
39
42
|
end
|
|
40
43
|
|
|
@@ -43,10 +46,10 @@ module Legion
|
|
|
43
46
|
return false unless available?
|
|
44
47
|
|
|
45
48
|
Legion::Cache.set(cache_key, ::JSON.dump(response), ttl)
|
|
46
|
-
|
|
49
|
+
log.debug("LLM cache write key=#{cache_key} ttl=#{ttl}")
|
|
47
50
|
true
|
|
48
51
|
rescue StandardError => e
|
|
49
|
-
|
|
52
|
+
handle_exception(e, level: :warn)
|
|
50
53
|
false
|
|
51
54
|
end
|
|
52
55
|
|
|
@@ -69,7 +72,7 @@ module Legion
|
|
|
69
72
|
Legion::LLM::Settings.default
|
|
70
73
|
end
|
|
71
74
|
rescue StandardError => e
|
|
72
|
-
|
|
75
|
+
handle_exception(e, level: :warn)
|
|
73
76
|
{}
|
|
74
77
|
end
|
|
75
78
|
end
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
module Legion
|
|
4
5
|
module LLM
|
|
5
6
|
module ClaudeConfigLoader
|
|
7
|
+
extend Legion::Logging::Helper
|
|
8
|
+
|
|
6
9
|
CLAUDE_SETTINGS = File.expand_path('~/.claude/settings.json')
|
|
7
10
|
CLAUDE_CONFIG = File.expand_path('~/.claude.json')
|
|
8
11
|
|
|
@@ -21,7 +24,7 @@ module Legion
|
|
|
21
24
|
require 'json'
|
|
22
25
|
::JSON.parse(File.read(path), symbolize_names: true)
|
|
23
26
|
rescue StandardError => e
|
|
24
|
-
|
|
27
|
+
handle_exception(e, level: :debug)
|
|
25
28
|
{}
|
|
26
29
|
end
|
|
27
30
|
|
|
@@ -36,13 +39,13 @@ module Legion
|
|
|
36
39
|
|
|
37
40
|
if config[:anthropicApiKey] && providers.dig(:anthropic, :api_key).nil?
|
|
38
41
|
providers[:anthropic][:api_key] = config[:anthropicApiKey]
|
|
39
|
-
|
|
42
|
+
log.debug 'Imported Anthropic API key from Claude CLI config'
|
|
40
43
|
end
|
|
41
44
|
|
|
42
45
|
return unless config[:openaiApiKey] && providers.dig(:openai, :api_key).nil?
|
|
43
46
|
|
|
44
47
|
providers[:openai][:api_key] = config[:openaiApiKey]
|
|
45
|
-
|
|
48
|
+
log.debug 'Imported OpenAI API key from Claude CLI config'
|
|
46
49
|
end
|
|
47
50
|
|
|
48
51
|
def apply_model_preference(config)
|
|
@@ -53,7 +56,7 @@ module Legion
|
|
|
53
56
|
return if llm[:default_model]
|
|
54
57
|
|
|
55
58
|
llm[:default_model] = model
|
|
56
|
-
|
|
59
|
+
log.debug "Imported model preference from Claude CLI config: #{model}"
|
|
57
60
|
end
|
|
58
61
|
end
|
|
59
62
|
end
|
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
require 'base64'
|
|
4
4
|
require 'json'
|
|
5
5
|
|
|
6
|
+
require 'legion/logging/helper'
|
|
6
7
|
module Legion
|
|
7
8
|
module LLM
|
|
8
9
|
module CodexConfigLoader
|
|
10
|
+
extend Legion::Logging::Helper
|
|
11
|
+
|
|
9
12
|
CODEX_AUTH = File.expand_path('~/.codex/auth.json')
|
|
10
13
|
|
|
11
14
|
module_function
|
|
@@ -36,7 +39,7 @@ module Legion
|
|
|
36
39
|
def read_json(path)
|
|
37
40
|
::JSON.parse(File.read(path), symbolize_names: true)
|
|
38
41
|
rescue StandardError => e
|
|
39
|
-
|
|
42
|
+
handle_exception(e, level: :debug)
|
|
40
43
|
{}
|
|
41
44
|
end
|
|
42
45
|
|
|
@@ -47,7 +50,7 @@ module Legion
|
|
|
47
50
|
return unless token.is_a?(String) && !token.empty?
|
|
48
51
|
|
|
49
52
|
unless token_valid?(token)
|
|
50
|
-
|
|
53
|
+
log.debug 'CodexConfigLoader: access token is expired, skipping'
|
|
51
54
|
return
|
|
52
55
|
end
|
|
53
56
|
|
|
@@ -57,7 +60,7 @@ module Legion
|
|
|
57
60
|
return unless resolved_existing.nil? || (resolved_existing.respond_to?(:empty?) && resolved_existing.empty?)
|
|
58
61
|
|
|
59
62
|
providers[:openai][:api_key] = token
|
|
60
|
-
|
|
63
|
+
log.debug 'Imported OpenAI API key from Codex auth config'
|
|
61
64
|
end
|
|
62
65
|
|
|
63
66
|
def resolve_env_api_key(value)
|
|
@@ -99,7 +102,8 @@ module Legion
|
|
|
99
102
|
|
|
100
103
|
exp > Time.now.to_i
|
|
101
104
|
rescue StandardError => e
|
|
102
|
-
|
|
105
|
+
log.debug("CodexConfigLoader.token_valid? failed to parse access token: #{e.message}")
|
|
106
|
+
handle_exception(e, level: :debug)
|
|
103
107
|
true
|
|
104
108
|
end
|
|
105
109
|
end
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
module Legion
|
|
4
5
|
module LLM
|
|
5
6
|
module Compressor
|
|
7
|
+
extend Legion::Logging::Helper
|
|
8
|
+
|
|
6
9
|
NONE = 0
|
|
7
10
|
LIGHT = 1
|
|
8
11
|
MODERATE = 2
|
|
@@ -35,7 +38,7 @@ module Legion
|
|
|
35
38
|
result = segments.map { |seg| seg[:protected] ? seg[:text] : compress_prose(seg[:text], level) }.join
|
|
36
39
|
|
|
37
40
|
result = collapse_whitespace(result) if level >= AGGRESSIVE
|
|
38
|
-
|
|
41
|
+
log.debug("Compressor applied level=#{level} original=#{original_length} compressed=#{result.length}")
|
|
39
42
|
result
|
|
40
43
|
end
|
|
41
44
|
|
|
@@ -47,10 +50,14 @@ module Legion
|
|
|
47
50
|
|
|
48
51
|
summary = llm_summarize(text, max_tokens)
|
|
49
52
|
if summary
|
|
50
|
-
|
|
53
|
+
log.info("[llm][compressor] summarized messages=#{messages.size} summary_chars=#{summary.length}")
|
|
51
54
|
{ summary: summary, original_count: messages.size, compressed: true }
|
|
52
55
|
else
|
|
53
56
|
fallback = compress(text, level: AGGRESSIVE)
|
|
57
|
+
log.info(
|
|
58
|
+
"[llm][compressor] fallback_compress messages=#{messages.size} " \
|
|
59
|
+
"input_chars=#{text.length} summary_chars=#{fallback.length}"
|
|
60
|
+
)
|
|
54
61
|
{ summary: fallback, original_count: messages.size, compressed: true, method: :stopword }
|
|
55
62
|
end
|
|
56
63
|
end
|
|
@@ -172,7 +179,8 @@ module Legion
|
|
|
172
179
|
response = session.ask("#{SUMMARIZE_PROMPT}\n\n#{text[0, max_tokens * 8]}")
|
|
173
180
|
response.content
|
|
174
181
|
rescue StandardError => e
|
|
175
|
-
|
|
182
|
+
handle_exception(e, level: :debug, operation: 'llm.compressor.llm_summarize')
|
|
183
|
+
log.debug("[llm][compressor] summarize_failed error=#{e.message}")
|
|
176
184
|
nil
|
|
177
185
|
end
|
|
178
186
|
|
|
@@ -189,10 +197,6 @@ module Legion
|
|
|
189
197
|
union = (words_a | words_b).size.to_f
|
|
190
198
|
union.zero? ? 0.0 : intersection / union
|
|
191
199
|
end
|
|
192
|
-
|
|
193
|
-
def log_debug(msg)
|
|
194
|
-
Legion::Logging.debug("Compressor: #{msg}") if defined?(Legion::Logging)
|
|
195
|
-
end
|
|
196
200
|
end
|
|
197
201
|
end
|
|
198
202
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module LLM
|
|
5
7
|
# Computes a ConfidenceScore for an LLM response using available signals.
|
|
@@ -13,6 +15,8 @@ module Legion
|
|
|
13
15
|
# Legion::Settings is available, otherwise the DEFAULT_BANDS constants are used.
|
|
14
16
|
# Per-call overrides can be passed as options[:confidence_bands].
|
|
15
17
|
module ConfidenceScorer
|
|
18
|
+
extend Legion::Logging::Helper
|
|
19
|
+
|
|
16
20
|
# Default band boundaries. Keys are the *lower* boundary of that band name:
|
|
17
21
|
# score < :low -> :very_low
|
|
18
22
|
# score < :medium -> :low
|
|
@@ -119,7 +123,8 @@ module Legion
|
|
|
119
123
|
# avg_lp is in (-inf, 0]; e^0 = 1.0 (perfect), e^(-5) ≈ 0.007 (very uncertain).
|
|
120
124
|
# We clamp at -5 so very negative values still map to > 0.
|
|
121
125
|
Math.exp([avg_lp, -5.0].max)
|
|
122
|
-
rescue StandardError
|
|
126
|
+
rescue StandardError => e
|
|
127
|
+
handle_exception(e, level: :debug, operation: 'llm.confidence_scorer.extract_logprobs')
|
|
123
128
|
nil
|
|
124
129
|
end
|
|
125
130
|
|
|
@@ -131,7 +136,8 @@ module Legion
|
|
|
131
136
|
lp = raw_response.logprobs if klass.method_defined?(:logprobs)
|
|
132
137
|
lp ||= raw_response.metadata&.dig(:logprobs) if klass.method_defined?(:metadata)
|
|
133
138
|
lp
|
|
134
|
-
rescue StandardError
|
|
139
|
+
rescue StandardError => e
|
|
140
|
+
handle_exception(e, level: :debug, operation: 'llm.confidence_scorer.probe_logprobs')
|
|
135
141
|
nil
|
|
136
142
|
end
|
|
137
143
|
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/logging/helper'
|
|
3
4
|
module Legion
|
|
4
5
|
module LLM
|
|
5
6
|
class ContextCurator
|
|
7
|
+
include Legion::Logging::Helper
|
|
8
|
+
|
|
6
9
|
CURATED_KEY = :__curated__
|
|
7
10
|
|
|
8
11
|
def initialize(conversation_id:)
|
|
@@ -19,7 +22,7 @@ module Legion
|
|
|
19
22
|
store_curated(@conversation_id, curated)
|
|
20
23
|
@curated_cache = nil
|
|
21
24
|
rescue StandardError => e
|
|
22
|
-
|
|
25
|
+
handle_exception(e, level: :warn)
|
|
23
26
|
end
|
|
24
27
|
end
|
|
25
28
|
|
|
@@ -123,7 +126,7 @@ module Legion
|
|
|
123
126
|
distill_tool_result(msg, assistant_response)
|
|
124
127
|
end
|
|
125
128
|
rescue StandardError => e
|
|
126
|
-
|
|
129
|
+
handle_exception(e, level: :warn)
|
|
127
130
|
distill_tool_result(msg, assistant_response)
|
|
128
131
|
end
|
|
129
132
|
|
|
@@ -141,7 +144,8 @@ module Legion
|
|
|
141
144
|
|
|
142
145
|
def curation_settings
|
|
143
146
|
Legion::Settings.dig(:llm, :context_curation) || {}
|
|
144
|
-
rescue StandardError
|
|
147
|
+
rescue StandardError => e
|
|
148
|
+
handle_exception(e, level: :debug, operation: 'llm.context_curator.curation_settings')
|
|
145
149
|
{}
|
|
146
150
|
end
|
|
147
151
|
|
|
@@ -174,7 +178,7 @@ module Legion
|
|
|
174
178
|
)
|
|
175
179
|
end
|
|
176
180
|
rescue StandardError => e
|
|
177
|
-
|
|
181
|
+
handle_exception(e, level: :warn)
|
|
178
182
|
end
|
|
179
183
|
|
|
180
184
|
def load_curated(conversation_id)
|
|
@@ -187,7 +191,7 @@ module Legion
|
|
|
187
191
|
regular = raw.reject { |m| m[:role] == CURATED_KEY }
|
|
188
192
|
apply_curation_pipeline(regular)
|
|
189
193
|
rescue StandardError => e
|
|
190
|
-
|
|
194
|
+
handle_exception(e, level: :warn)
|
|
191
195
|
nil
|
|
192
196
|
end
|
|
193
197
|
|
|
@@ -201,7 +205,7 @@ module Legion
|
|
|
201
205
|
result = evict_superseded(result)
|
|
202
206
|
dedup_similar(result)
|
|
203
207
|
rescue StandardError => e
|
|
204
|
-
|
|
208
|
+
handle_exception(e, level: :warn)
|
|
205
209
|
messages
|
|
206
210
|
end
|
|
207
211
|
|
|
@@ -279,7 +283,7 @@ module Legion
|
|
|
279
283
|
response = Legion::LLM.chat_direct(model: model, message: prompt)
|
|
280
284
|
response.respond_to?(:content) ? response.content : nil
|
|
281
285
|
rescue StandardError => e
|
|
282
|
-
|
|
286
|
+
handle_exception(e, level: :warn)
|
|
283
287
|
nil
|
|
284
288
|
end
|
|
285
289
|
|
|
@@ -300,7 +304,8 @@ module Legion
|
|
|
300
304
|
return config[:default_model] if config.is_a?(Hash) && config[:enabled] && config[:default_model]
|
|
301
305
|
end
|
|
302
306
|
nil
|
|
303
|
-
rescue StandardError
|
|
307
|
+
rescue StandardError => e
|
|
308
|
+
handle_exception(e, level: :debug, operation: 'llm.context_curator.detect_small_model')
|
|
304
309
|
nil
|
|
305
310
|
end
|
|
306
311
|
end
|