legion-llm 0.8.2 → 0.8.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +91 -0
- data/CLAUDE.md +24 -15
- data/lib/legion/llm/cache/response.rb +17 -23
- data/lib/legion/llm/call/claude_config_loader.rb +116 -9
- data/lib/legion/llm/call/codex_config_loader.rb +25 -6
- data/lib/legion/llm/call/embeddings.rb +16 -36
- data/lib/legion/llm/call/providers.rb +195 -32
- data/lib/legion/llm/compat.rb +0 -2
- data/lib/legion/llm/config.rb +0 -1
- data/lib/legion/llm/fleet/dispatcher.rb +5 -35
- data/lib/legion/llm/inference/executor.rb +60 -47
- data/lib/legion/llm/inference/steps/classification.rb +8 -14
- data/lib/legion/llm/inference/steps/tier_assigner.rb +3 -3
- data/lib/legion/llm/inference/steps/trigger_match.rb +1 -1
- data/lib/legion/llm/inference.rb +27 -15
- data/lib/legion/llm/router/escalation/chain.rb +1 -0
- data/lib/legion/llm/router/gateway_interceptor.rb +2 -2
- data/lib/legion/llm/router/resolution.rb +12 -0
- data/lib/legion/llm/router.rb +95 -15
- data/lib/legion/llm/settings.rb +385 -0
- data/lib/legion/llm/skills.rb +0 -2
- data/lib/legion/llm/transport/messages/escalation_event.rb +3 -1
- data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
- data/lib/legion/llm/transport/messages/skill_event.rb +1 -1
- data/lib/legion/llm/transport/messages/tool_event.rb +1 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +2 -6
- metadata +2 -3
- data/lib/legion/llm/config/settings.rb +0 -341
- data/lib/legion/llm/skills/settings.rb +0 -35
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9d36acc38aded21eee645009a3bf41e8a0214aff728ee3179becbede3bc70528
|
|
4
|
+
data.tar.gz: 301984dc9000a53f88036fb5672faf616b0cc1d45c898462999133e976555d51
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 49c479b427f77ee4aa0b1c59f9fca8fa3d9947f68ebc05de298158766744b1c2877696603c97baf82be7144d2f97c6d23cd3d0856358316ac4fddf908a7561c2
|
|
7
|
+
data.tar.gz: 84e6095b475be225de975deac022a7587af8f9a4cae13950a85be9149803bed8dc689cf2cacb0f6d5abc5114edacc8eb8f94df66e23f76f09cd98ebc3a014d51
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,96 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.13] - 2026-04-22
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Escalation loop now feeds `Router.health_tracker` with an `:error` signal on every failure so the circuit breaker trips when a provider is consistently down — auth failures, rate limits, and general errors all count.
|
|
7
|
+
- `AuthError` and `PrivacyModeError` in escalation are logged with `handled: true` so they appear in logs as gracefully-handled failures rather than uncaught exceptions.
|
|
8
|
+
- `RateLimitError` in escalation is handled the same way (was previously re-raised, aborting the entire chain).
|
|
9
|
+
- Extracted `attempt_escalation` and `record_escalation_failure` from `run_provider_call_with_escalation` to keep the method within Rubocop length limits and make each responsibility clear.
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- `CodexConfigLoader`: refactored to extract `read_config` helper (eliminates duplicate file-exist checks in `load` and `read_token`); added `read_openai_api_key` and `read_openai_credential` accessors for the multi-source credential probing chain.
|
|
13
|
+
|
|
14
|
+
## [0.8.12] - 2026-04-22
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- `ClaudeConfigLoader`: `settings_path` and `config_path` are now read from `Legion::LLM.settings.dig(:claude_cli, ...)` instead of hardcoded constants, making both paths configurable. `SECRET_URI_PATTERN` remains a constant — it's a protocol definition, not a runtime knob. Corresponding settings keys added to `Legion::LLM::Settings.claude_cli_defaults`.
|
|
18
|
+
|
|
19
|
+
## [0.8.11] - 2026-04-22
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
- Multi-source credential detection for all providers: `credential_available_for?` checks resolved env vars, not raw `env://` URI strings, so providers aren't falsely auto-enabled when the env var is unset.
|
|
23
|
+
- `probe_provider_credentials`: when multiple API keys exist for a provider (e.g. both `OPENAI_API_KEY` and `CODEX_API_KEY`), each candidate is tested in order and the first working key is committed; provider is disabled if all fail.
|
|
24
|
+
- `probe_via_model_list`: uses `RubyLLM::Provider.list_models` (a cheap GET with no token cost) to validate credentials before falling back to a lightweight chat probe.
|
|
25
|
+
- `recover_openai_with_codex`: automatically attempts Codex bearer-token fallback when all direct OpenAI keys fail.
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
- `configure_bedrock`/`configure_anthropic`/`configure_openai`: use `resolve_setting_reference` to unwrap `env://` placeholders before passing to RubyLLM config, preventing "key not found" errors when env var is absent.
|
|
29
|
+
- `ClaudeConfigLoader.apply_api_keys`: removed early-return pattern that prevented Bedrock bearer token import from running when no OpenAI key was found.
|
|
30
|
+
|
|
31
|
+
## [0.8.10] - 2026-04-22
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
- `compliance_defaults`: `classification_scan` and `encrypt_audit` default to `false`; classification is opt-in, audit encryption is opt-in.
|
|
35
|
+
- `tool_trigger_defaults`: `scan_depth` raised to `10` (was `2`), `tool_limit` raised to `50` (was `10`).
|
|
36
|
+
- `trigger_match.rb`: hardcoded `|| 2` fallback updated to `|| 10` to match new setting default.
|
|
37
|
+
|
|
38
|
+
## [0.8.9] - 2026-04-22
|
|
39
|
+
|
|
40
|
+
### Fixed
|
|
41
|
+
- Classification spec: wholesale `Legion::Settings[:llm] = {...}` replacements converted to key-level writes (`[:llm][:default_provider] = :x`) to prevent wiping sibling settings.
|
|
42
|
+
- Audit `encrypt?` specs: updated to test toggle behavior (`false` by default, `true` when `encrypt_audit` setting is enabled) instead of expecting always-on.
|
|
43
|
+
- Trigger match spec: updated scan_depth expectation and before-block to match new defaults.
|
|
44
|
+
|
|
45
|
+
## [0.8.8] - 2026-04-22
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
- `Legion::LLM.settings` now calls `Legion::Settings[:llm]` directly — dead `const_defined?('Settings')` branch and `Settings.default` fallback removed. No explicit `require 'legion/settings'` is needed in `llm.rb` because `legion-settings` is a gemspec dependency and is always activated by Bundler before `legion-llm` is required.
|
|
49
|
+
- `settings.rb` bootstrap call simplified from a guarded `begin/rescue` block to a direct `Legion::Settings.merge_settings(...)` call for the same reason.
|
|
50
|
+
|
|
51
|
+
## [0.8.7] - 2026-04-22
|
|
52
|
+
|
|
53
|
+
### Changed
|
|
54
|
+
- Eliminated scattered constants and duplicate settings files across the codebase:
|
|
55
|
+
- `Skills::Settings` module deleted — defaults moved into `Legion::LLM::Settings.skills_defaults`; `Skills.start` no longer calls `Settings.apply` (merge happens at load time via the standard settings bootstrap)
|
|
56
|
+
- `Fleet::Dispatcher` `DEFAULT_TIMEOUT`/`TIMEOUTS` constants removed — `resolve_timeout` now reads directly from `Legion::LLM.settings.dig(:routing, :tiers, :fleet, :timeouts, ...)`; dead `defined?(Legion::Settings)` guard removed
|
|
57
|
+
- `Call::Embeddings` `PROVIDER_EMBEDDING_MODELS`, `TARGET_DIMENSION`, `OLLAMA_CONTEXT_CHARS`, `OLLAMA_DEFAULT_CONTEXT_CHARS`, `PREFIX_REGISTRY` constants removed — replaced with `target_dimension`/`embedding_settings` helpers reading from `settings[:embedding]`; `embedding_settings` corrected to use `Legion::LLM.settings` instead of bare `Legion::Settings.dig(:llm, :embedding)`
|
|
58
|
+
- `Cache::Response` `DEFAULT_TTL`, `SPOOL_THRESHOLD`, `SPOOL_DIR` constants removed — replaced with private `default_ttl`/`spool_threshold`/`spool_dir` helpers reading from `settings[:prompt_caching][:response_cache]`
|
|
59
|
+
- `Settings.embedding_defaults` expanded: added `anthropic`/`gemini` to `provider_models`, added `ollama_context_chars`, `ollama_default_context_chars`, `prefix_registry`
|
|
60
|
+
- `Settings.prompt_caching_defaults.response_cache` gains `spool_threshold_bytes: 8MB`
|
|
61
|
+
|
|
62
|
+
## [0.8.6] - 2026-04-22
|
|
63
|
+
|
|
64
|
+
### Changed
|
|
65
|
+
- `Legion::LLM::Settings` is now the canonical module — content moved from `Legion::LLM::Config::Settings` directly into `lib/legion/llm/settings.rb`. The `Config::Settings` indirection and `lib/legion/llm/config/settings.rb` are removed. `service.rb` and any external callers using `Legion::LLM::Settings.default` continue to work unchanged.
|
|
66
|
+
|
|
67
|
+
## [0.8.5] - 2026-04-22
|
|
68
|
+
|
|
69
|
+
### Fixed
|
|
70
|
+
- All compliance settings now have explicit defaults defined in `Config::Settings.compliance_defaults` (merged under `llm.compliance`): `classification_scan`, `encrypt_audit`, `phi_block_cloud`, `cloud_providers`, `redact_pii`, `redaction_placeholder`, `strict_hipaa`, `default_level`. Previously these keys were read via `dig` with no guaranteed defaults.
|
|
71
|
+
- `Steps::Classification` now reads compliance settings via `Legion::LLM.settings.dig(:compliance, ...)` (consistent with all other llm settings) instead of bare `Legion::Settings.dig(:compliance, ...)` which targeted the wrong path.
|
|
72
|
+
- Removed dead `defined?(Legion::Settings)` guards in `Steps::Classification` — `legion-settings` is a hard dependency and is always present.
|
|
73
|
+
|
|
74
|
+
## [0.8.4] - 2026-04-22
|
|
75
|
+
|
|
76
|
+
### Fixed
|
|
77
|
+
- `Inference::Executor` now normalizes content-blocks arrays (`[{type: "text", text: "..."}]`) to a plain string before passing to `session.ask`. Previously the raw array was forwarded to RubyLLM, which serialized it as `{ type: 'text', text: [{...}] }` — an invalid Anthropic API payload causing HTTP 400 on every request when the Interlink sends structured content blocks.
|
|
78
|
+
|
|
79
|
+
### Added
|
|
80
|
+
- Audit encryption is now configurable: set `llm.compliance.encrypt_audit: true` in settings to encrypt payloads on the `llm.audit` exchange. Defaults to `false` (plaintext). Applies to `PromptEvent`, `ToolEvent`, and `SkillEvent`.
|
|
81
|
+
|
|
82
|
+
## [0.8.3] - 2026-04-22
|
|
83
|
+
|
|
84
|
+
### Fixed
|
|
85
|
+
- `EscalationEvent#routing_key` was called as a class-level DSL on load, raising `NoMethodError` and preventing `LLM::Transport.load_all` from completing. Converted to instance method to match all other message classes.
|
|
86
|
+
- Spec stub for `Legion::Transport::Message` also used the class-level DSL pattern, masking this bug. Updated to instance method.
|
|
87
|
+
|
|
88
|
+
## [0.8.2] - 2026-04-22
|
|
89
|
+
|
|
90
|
+
### Fixed
|
|
91
|
+
- `Escalation` exchange used class-level `exchange_name`/`exchange_type` DSL which doesn't exist on `Legion::Transport::Exchange`, raising `NoMethodError` at require time. Converted to instance methods `exchange_name` / `default_type`.
|
|
92
|
+
- `Legion::Settings[:llm][:connected]` writes were hitting an ephemeral overlay-merged hash copy; switched to `Legion::Settings.loader.settings[:llm][:connected]` to persist through restarts.
|
|
93
|
+
|
|
3
94
|
## [0.8.0] - 2026-04-21
|
|
4
95
|
|
|
5
96
|
### Changed
|
data/CLAUDE.md
CHANGED
|
@@ -186,21 +186,27 @@ Note: Backward-compat aliases live in lib/legion/llm/compat.rb (const_missing-ba
|
|
|
186
186
|
|
|
187
187
|
### Routing Architecture
|
|
188
188
|
|
|
189
|
-
|
|
189
|
+
Five-tier dispatch model. Local-first avoids unnecessary network hops; fleet offloads to shared hardware via Transport; openai_compat routes to user-configured gateways; cloud handles managed cloud providers; frontier is the fallback for direct frontier model providers.
|
|
190
190
|
|
|
191
191
|
```
|
|
192
|
-
|
|
193
|
-
│
|
|
194
|
-
│
|
|
195
|
-
│ Tier 1: LOCAL
|
|
196
|
-
│ Zero network overhead, no Transport
|
|
197
|
-
│
|
|
198
|
-
│ Tier 2: FLEET
|
|
199
|
-
│ Via Fleet::Dispatcher RPC over AMQP
|
|
200
|
-
│
|
|
201
|
-
│ Tier 3:
|
|
202
|
-
│
|
|
203
|
-
|
|
192
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
193
|
+
│ Legion::LLM Router (per-node) │
|
|
194
|
+
│ │
|
|
195
|
+
│ Tier 1: LOCAL → Ollama on this machine (direct HTTP) │
|
|
196
|
+
│ Zero network overhead, no Transport │
|
|
197
|
+
│ │
|
|
198
|
+
│ Tier 2: FLEET → Ollama on Mac Studios / GPU servers │
|
|
199
|
+
│ Via Fleet::Dispatcher RPC over AMQP │
|
|
200
|
+
│ │
|
|
201
|
+
│ Tier 3: OPENAI_COMPAT → User-configured OpenAI-spec gateways│
|
|
202
|
+
│ UAIS, Kong AI, custom endpoints │
|
|
203
|
+
│ │
|
|
204
|
+
│ Tier 4: CLOUD → Bedrock, Azure, Gemini/Vertex AI │
|
|
205
|
+
│ Managed cloud provider API calls │
|
|
206
|
+
│ │
|
|
207
|
+
│ Tier 5: FRONTIER → Anthropic, OpenAI direct │
|
|
208
|
+
│ Direct API calls to frontier model providers │
|
|
209
|
+
└──────────────────────────────────────────────────────────────┘
|
|
204
210
|
```
|
|
205
211
|
|
|
206
212
|
### Routing Resolution Flow
|
|
@@ -392,9 +398,12 @@ Nested under `Legion::Settings[:llm][:routing]`:
|
|
|
392
398
|
|-----|------|---------|-------------|
|
|
393
399
|
| `enabled` | Boolean | `false` | Enable routing (opt-in) |
|
|
394
400
|
| `default_intent` | Hash | `{ privacy: 'normal', capability: 'moderate', cost: 'normal' }` | Defaults merged into every intent |
|
|
401
|
+
| `tier_priority` | Array | `%w[local fleet openai_compat cloud frontier]` | Ordered tier preference for routing |
|
|
395
402
|
| `tiers.local` | Hash | `{ provider: 'ollama' }` | Local tier config |
|
|
396
403
|
| `tiers.fleet` | Hash | `{ queue: 'llm.inference', timeout_seconds: 30 }` | Fleet tier config |
|
|
397
|
-
| `tiers.
|
|
404
|
+
| `tiers.openai_compat` | Hash | `{ gateways: [] }` | User-configured OpenAI-compatible gateways |
|
|
405
|
+
| `tiers.cloud` | Hash | `{ providers: ['bedrock', 'azure', 'gemini'] }` | Managed cloud provider API calls |
|
|
406
|
+
| `tiers.frontier` | Hash | `{ providers: ['anthropic', 'openai'] }` | Direct API frontier providers |
|
|
398
407
|
| `health.window_seconds` | Integer | `300` | Rolling window for latency tracking |
|
|
399
408
|
| `health.circuit_breaker.failure_threshold` | Integer | `3` | Consecutive failures before circuit opens |
|
|
400
409
|
| `health.circuit_breaker.cooldown_seconds` | Integer | `60` | Seconds before circuit transitions to half_open |
|
|
@@ -426,7 +435,7 @@ Each rule is a hash with:
|
|
|
426
435
|
|
|
427
436
|
| Dimension | Values | Default | Effect |
|
|
428
437
|
|-----------|--------|---------|--------|
|
|
429
|
-
| `privacy` | `:strict`, `:normal` | `:normal` | `:strict` -> never
|
|
438
|
+
| `privacy` | `:strict`, `:normal` | `:normal` | `:strict` -> never external (via `never_external` constraint rules, blocks cloud + frontier + openai_compat) |
|
|
430
439
|
| `capability` | `:basic`, `:moderate`, `:reasoning` | `:moderate` | Higher prefers larger/cloud models |
|
|
431
440
|
| `cost` | `:minimize`, `:normal` | `:normal` | `:minimize` prefers local/fleet |
|
|
432
441
|
|
|
@@ -10,28 +10,21 @@ module Legion
|
|
|
10
10
|
module Response
|
|
11
11
|
extend Legion::Logging::Helper
|
|
12
12
|
|
|
13
|
-
DEFAULT_TTL = 300
|
|
14
|
-
SPOOL_THRESHOLD = 8 * 1024 * 1024 # 8 MB
|
|
15
|
-
SPOOL_DIR = File.expand_path('~/.legionio/data/spool/llm_responses').freeze
|
|
16
|
-
|
|
17
13
|
module_function
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
def init_request(request_id, ttl: DEFAULT_TTL)
|
|
15
|
+
def init_request(request_id, ttl: default_ttl)
|
|
21
16
|
cache_set(status_key(request_id), 'pending', ttl)
|
|
22
17
|
end
|
|
23
18
|
|
|
24
|
-
|
|
25
|
-
def complete(request_id, response:, meta:, ttl: DEFAULT_TTL)
|
|
19
|
+
def complete(request_id, response:, meta:, ttl: default_ttl)
|
|
26
20
|
write_response(request_id, response, ttl)
|
|
27
|
-
cache_set(meta_key(request_id), ::JSON.dump(meta), ttl)
|
|
21
|
+
cache_set(meta_key(request_id), Legion::JSON.dump(meta), ttl)
|
|
28
22
|
cache_set(status_key(request_id), 'done', ttl)
|
|
29
23
|
end
|
|
30
24
|
|
|
31
|
-
|
|
32
|
-
def fail_request(request_id, code:, message:, ttl: DEFAULT_TTL)
|
|
25
|
+
def fail_request(request_id, code:, message:, ttl: default_ttl)
|
|
33
26
|
log.warn("ResponseCache fail_request request_id=#{request_id} code=#{code} message=#{message}")
|
|
34
|
-
payload = ::JSON.dump({ code: code, message: message })
|
|
27
|
+
payload = Legion::JSON.dump({ code: code, message: message })
|
|
35
28
|
cache_set(error_key(request_id), payload, ttl)
|
|
36
29
|
cache_set(status_key(request_id), 'error', ttl)
|
|
37
30
|
end
|
|
@@ -67,9 +60,7 @@ module Legion
|
|
|
67
60
|
::JSON.parse(raw, symbolize_names: true)
|
|
68
61
|
end
|
|
69
62
|
|
|
70
|
-
|
|
71
|
-
# { status: :error, error: }, or { status: :timeout }.
|
|
72
|
-
def poll(request_id, timeout: DEFAULT_TTL, interval: 0.1)
|
|
63
|
+
def poll(request_id, timeout: default_ttl, interval: 0.1)
|
|
73
64
|
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + timeout
|
|
74
65
|
|
|
75
66
|
loop do
|
|
@@ -124,18 +115,21 @@ module Legion
|
|
|
124
115
|
Legion::Cache.set(key, value, ttl)
|
|
125
116
|
end
|
|
126
117
|
|
|
127
|
-
private_class_method def self.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
end
|
|
131
|
-
configured = configured.to_s.strip
|
|
132
|
-
return SPOOL_DIR if configured.empty?
|
|
118
|
+
private_class_method def self.default_ttl
|
|
119
|
+
Legion::LLM.settings.dig(:prompt_caching, :response_cache, :ttl_seconds) || 300
|
|
120
|
+
end
|
|
133
121
|
|
|
134
|
-
|
|
122
|
+
private_class_method def self.spool_threshold
|
|
123
|
+
Legion::LLM.settings.dig(:prompt_caching, :response_cache, :spool_threshold_bytes) || (8 * 1024 * 1024)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
private_class_method def self.spool_dir
|
|
127
|
+
configured = Legion::LLM.settings.dig(:prompt_caching, :response_cache, :spool_dir).to_s.strip
|
|
128
|
+
configured.empty? ? File.expand_path('~/.legionio/data/spool/llm_responses') : File.expand_path(configured)
|
|
135
129
|
end
|
|
136
130
|
|
|
137
131
|
private_class_method def self.write_response(request_id, response_text, ttl)
|
|
138
|
-
if response_text.bytesize >
|
|
132
|
+
if response_text.bytesize > spool_threshold
|
|
139
133
|
log.warn("ResponseCache spool overflow request_id=#{request_id} bytes=#{response_text.bytesize}")
|
|
140
134
|
FileUtils.mkdir_p(spool_dir)
|
|
141
135
|
path = File.join(spool_dir, "#{request_id}.txt")
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
3
5
|
require 'legion/logging/helper'
|
|
4
6
|
module Legion
|
|
5
7
|
module LLM
|
|
@@ -7,28 +9,79 @@ module Legion
|
|
|
7
9
|
module ClaudeConfigLoader
|
|
8
10
|
extend Legion::Logging::Helper
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
CLAUDE_CONFIG = File.expand_path('~/.claude.json')
|
|
12
|
+
SECRET_URI_PATTERN = %r{\A(?:env|vault|lease)://}
|
|
12
13
|
|
|
13
14
|
module_function
|
|
14
15
|
|
|
16
|
+
def claude_settings_path
|
|
17
|
+
File.expand_path(Legion::LLM.settings.dig(:claude_cli, :settings_path) || '~/.claude/settings.json')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def claude_config_path
|
|
21
|
+
File.expand_path(Legion::LLM.settings.dig(:claude_cli, :config_path) || '~/.claude.json')
|
|
22
|
+
end
|
|
23
|
+
|
|
15
24
|
def load
|
|
16
|
-
config =
|
|
25
|
+
config = merged_config
|
|
17
26
|
return if config.empty?
|
|
18
27
|
|
|
19
28
|
apply_claude_config(config)
|
|
20
29
|
end
|
|
21
30
|
|
|
31
|
+
def merged_config
|
|
32
|
+
read_json(claude_settings_path).merge(read_json(claude_config_path))
|
|
33
|
+
end
|
|
34
|
+
|
|
22
35
|
def read_json(path)
|
|
23
36
|
return {} unless File.exist?(path)
|
|
24
37
|
|
|
25
|
-
require 'json'
|
|
26
38
|
::JSON.parse(File.read(path), symbolize_names: true)
|
|
27
39
|
rescue StandardError => e
|
|
28
40
|
handle_exception(e, level: :debug)
|
|
29
41
|
{}
|
|
30
42
|
end
|
|
31
43
|
|
|
44
|
+
def anthropic_api_key
|
|
45
|
+
config = merged_config
|
|
46
|
+
first_present(
|
|
47
|
+
config[:anthropicApiKey],
|
|
48
|
+
config.dig(:env, :ANTHROPIC_API_KEY)
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def openai_api_key
|
|
53
|
+
config = merged_config
|
|
54
|
+
first_present(
|
|
55
|
+
config[:openaiApiKey],
|
|
56
|
+
config.dig(:env, :OPENAI_API_KEY),
|
|
57
|
+
config.dig(:env, :CODEX_API_KEY)
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def bedrock_bearer_token
|
|
62
|
+
env = read_json(claude_settings_path)[:env]
|
|
63
|
+
return nil unless env.is_a?(Hash)
|
|
64
|
+
|
|
65
|
+
direct = first_present(env[:AWS_BEARER_TOKEN_BEDROCK], env['AWS_BEARER_TOKEN_BEDROCK'])
|
|
66
|
+
return direct if direct
|
|
67
|
+
|
|
68
|
+
match = env.find do |key, value|
|
|
69
|
+
name = key.to_s.upcase
|
|
70
|
+
next false unless name.include?('AWS')
|
|
71
|
+
next false unless name.include?('BEARER')
|
|
72
|
+
next false unless name.include?('TOKEN')
|
|
73
|
+
next false unless name.include?('BEDROCK')
|
|
74
|
+
|
|
75
|
+
!normalize_secret(value).nil?
|
|
76
|
+
end
|
|
77
|
+
normalize_secret(match&.last)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def oauth_account_available?
|
|
81
|
+
oauth = read_json(claude_config_path)[:oauthAccount]
|
|
82
|
+
oauth.is_a?(Hash) && oauth.any? { |_k, value| !normalize_secret(value).nil? }
|
|
83
|
+
end
|
|
84
|
+
|
|
32
85
|
def apply_claude_config(config)
|
|
33
86
|
apply_api_keys(config)
|
|
34
87
|
apply_model_preference(config)
|
|
@@ -38,15 +91,23 @@ module Legion
|
|
|
38
91
|
llm = Legion::LLM.settings
|
|
39
92
|
providers = llm[:providers]
|
|
40
93
|
|
|
41
|
-
|
|
42
|
-
|
|
94
|
+
anthropic_key = first_present(config[:anthropicApiKey], config.dig(:env, :ANTHROPIC_API_KEY))
|
|
95
|
+
if anthropic_key && !setting_has_usable_credential?(providers.dig(:anthropic, :api_key))
|
|
96
|
+
providers[:anthropic][:api_key] = anthropic_key
|
|
43
97
|
log.debug 'Imported Anthropic API key from Claude CLI config'
|
|
44
98
|
end
|
|
45
99
|
|
|
46
|
-
|
|
100
|
+
openai_key = first_present(config[:openaiApiKey], config.dig(:env, :OPENAI_API_KEY), config.dig(:env, :CODEX_API_KEY))
|
|
101
|
+
if openai_key && !setting_has_usable_credential?(providers.dig(:openai, :api_key))
|
|
102
|
+
providers[:openai][:api_key] = openai_key
|
|
103
|
+
log.debug 'Imported OpenAI API key from Claude CLI config'
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
bedrock_token = bedrock_bearer_token
|
|
107
|
+
return unless bedrock_token && !setting_has_usable_credential?(providers.dig(:bedrock, :bearer_token))
|
|
47
108
|
|
|
48
|
-
providers[:
|
|
49
|
-
log.debug 'Imported
|
|
109
|
+
providers[:bedrock][:bearer_token] = bedrock_token
|
|
110
|
+
log.debug 'Imported Bedrock bearer token from Claude settings.json env section'
|
|
50
111
|
end
|
|
51
112
|
|
|
52
113
|
def apply_model_preference(config)
|
|
@@ -59,6 +120,52 @@ module Legion
|
|
|
59
120
|
llm[:default_model] = model
|
|
60
121
|
log.debug "Imported model preference from Claude CLI config: #{model}"
|
|
61
122
|
end
|
|
123
|
+
|
|
124
|
+
def setting_has_usable_credential?(value)
|
|
125
|
+
!resolve_setting_reference(value).nil?
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def resolve_setting_reference(value)
|
|
129
|
+
case value
|
|
130
|
+
when Array
|
|
131
|
+
value.each do |entry|
|
|
132
|
+
resolved = resolve_setting_reference(entry)
|
|
133
|
+
return resolved unless resolved.nil?
|
|
134
|
+
end
|
|
135
|
+
nil
|
|
136
|
+
when String
|
|
137
|
+
resolved = normalize_secret(value)
|
|
138
|
+
return nil if resolved.nil?
|
|
139
|
+
|
|
140
|
+
if resolved.start_with?('env://')
|
|
141
|
+
env_name = resolved.sub('env://', '')
|
|
142
|
+
return normalize_secret(ENV.fetch(env_name, nil))
|
|
143
|
+
end
|
|
144
|
+
return nil if resolved.match?(SECRET_URI_PATTERN)
|
|
145
|
+
|
|
146
|
+
resolved
|
|
147
|
+
else
|
|
148
|
+
normalize_secret(value)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def first_present(*values)
|
|
153
|
+
values.each do |value|
|
|
154
|
+
normalized = normalize_secret(value)
|
|
155
|
+
return normalized unless normalized.nil?
|
|
156
|
+
end
|
|
157
|
+
nil
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def normalize_secret(value)
|
|
161
|
+
return nil if value.nil?
|
|
162
|
+
return value unless value.is_a?(String)
|
|
163
|
+
|
|
164
|
+
normalized = value.strip
|
|
165
|
+
return nil if normalized.empty?
|
|
166
|
+
|
|
167
|
+
normalized
|
|
168
|
+
end
|
|
62
169
|
end
|
|
63
170
|
end
|
|
64
171
|
end
|
|
@@ -15,18 +15,14 @@ module Legion
|
|
|
15
15
|
module_function
|
|
16
16
|
|
|
17
17
|
def load
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
config = read_json(CODEX_AUTH)
|
|
18
|
+
config = read_config
|
|
21
19
|
return if config.empty?
|
|
22
20
|
|
|
23
21
|
apply_codex_config(config)
|
|
24
22
|
end
|
|
25
23
|
|
|
26
24
|
def read_token
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
config = read_json(CODEX_AUTH)
|
|
25
|
+
config = read_config
|
|
30
26
|
return nil if config.empty?
|
|
31
27
|
return nil unless config[:auth_mode] == 'chatgpt'
|
|
32
28
|
|
|
@@ -37,6 +33,29 @@ module Legion
|
|
|
37
33
|
token
|
|
38
34
|
end
|
|
39
35
|
|
|
36
|
+
def read_openai_api_key
|
|
37
|
+
config = read_config
|
|
38
|
+
return nil if config.empty?
|
|
39
|
+
|
|
40
|
+
key = config[:OPENAI_API_KEY] || config[:openai_api_key]
|
|
41
|
+
return nil unless key.is_a?(String)
|
|
42
|
+
|
|
43
|
+
key = key.strip
|
|
44
|
+
return nil if key.empty?
|
|
45
|
+
|
|
46
|
+
key
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def read_openai_credential
|
|
50
|
+
read_token || read_openai_api_key
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def read_config
|
|
54
|
+
return {} unless File.exist?(CODEX_AUTH)
|
|
55
|
+
|
|
56
|
+
read_json(CODEX_AUTH)
|
|
57
|
+
end
|
|
58
|
+
|
|
40
59
|
def read_json(path)
|
|
41
60
|
::JSON.parse(File.read(path), symbolize_names: true)
|
|
42
61
|
rescue StandardError => e
|
|
@@ -9,30 +9,6 @@ module Legion
|
|
|
9
9
|
module Embeddings
|
|
10
10
|
extend Legion::Logging::Helper
|
|
11
11
|
|
|
12
|
-
PROVIDER_EMBEDDING_MODELS = {
|
|
13
|
-
bedrock: 'amazon.titan-embed-text-v2:0',
|
|
14
|
-
anthropic: nil,
|
|
15
|
-
openai: 'text-embedding-3-small',
|
|
16
|
-
gemini: 'text-embedding-004',
|
|
17
|
-
azure: 'text-embedding-3-small',
|
|
18
|
-
ollama: 'mxbai-embed-large'
|
|
19
|
-
}.freeze
|
|
20
|
-
|
|
21
|
-
TARGET_DIMENSION = 1024
|
|
22
|
-
|
|
23
|
-
OLLAMA_CONTEXT_CHARS = {
|
|
24
|
-
'mxbai-embed-large' => 1400,
|
|
25
|
-
'bge-large' => 1400,
|
|
26
|
-
'snowflake-arctic-embed' => 1400,
|
|
27
|
-
'nomic-embed-text' => 24_000
|
|
28
|
-
}.freeze
|
|
29
|
-
OLLAMA_DEFAULT_CONTEXT_CHARS = 1400
|
|
30
|
-
|
|
31
|
-
PREFIX_REGISTRY = {
|
|
32
|
-
'nomic-embed-text' => { document: 'search_document: ', query: 'search_query: ' },
|
|
33
|
-
'mxbai-embed-large' => { query: 'Represent this sentence for searching relevant passages: ' }
|
|
34
|
-
}.freeze
|
|
35
|
-
|
|
36
12
|
class << self
|
|
37
13
|
def generate(text:, model: nil, provider: nil, dimensions: nil, task: :document)
|
|
38
14
|
return { vector: nil, model: model, provider: provider, error: 'LLM not started' } unless LLM.started?
|
|
@@ -117,7 +93,7 @@ module Legion
|
|
|
117
93
|
end
|
|
118
94
|
|
|
119
95
|
def build_opts(model, provider, dimensions)
|
|
120
|
-
target_dim = enforce_dimension? ?
|
|
96
|
+
target_dim = enforce_dimension? ? target_dimension : dimensions
|
|
121
97
|
opts = { model: model }
|
|
122
98
|
opts[:provider] = provider if provider
|
|
123
99
|
opts[:dimensions] = target_dim if target_dim && provider&.to_sym == :openai
|
|
@@ -145,10 +121,11 @@ module Legion
|
|
|
145
121
|
end
|
|
146
122
|
|
|
147
123
|
def enforce_dimensions(vector, _provider)
|
|
148
|
-
|
|
149
|
-
return vector
|
|
124
|
+
dim = target_dimension
|
|
125
|
+
return vector if vector.size == dim
|
|
126
|
+
return vector.first(dim) if vector.size > dim
|
|
150
127
|
|
|
151
|
-
"got #{vector.size}, need #{
|
|
128
|
+
"got #{vector.size}, need #{dim} (provider cannot upscale)"
|
|
152
129
|
end
|
|
153
130
|
|
|
154
131
|
def handle_embed_failure(error, text:, failed_provider:, failed_model:)
|
|
@@ -210,9 +187,6 @@ module Legion
|
|
|
210
187
|
pm = models[provider&.to_sym] || models[provider.to_s]
|
|
211
188
|
return pm.to_s if pm
|
|
212
189
|
|
|
213
|
-
provider_default = PROVIDER_EMBEDDING_MODELS[provider&.to_sym] if provider
|
|
214
|
-
return provider_default if provider_default
|
|
215
|
-
|
|
216
190
|
'text-embedding-3-small'
|
|
217
191
|
end
|
|
218
192
|
|
|
@@ -220,7 +194,8 @@ module Legion
|
|
|
220
194
|
return text unless prefix_injection_enabled?
|
|
221
195
|
|
|
222
196
|
base_model = model.to_s.split(':').first
|
|
223
|
-
|
|
197
|
+
registry = embedding_settings[:prefix_registry] || {}
|
|
198
|
+
prefixes = registry[base_model]
|
|
224
199
|
return text unless prefixes
|
|
225
200
|
|
|
226
201
|
prefix = prefixes[task.to_sym]
|
|
@@ -279,12 +254,16 @@ module Legion
|
|
|
279
254
|
end
|
|
280
255
|
|
|
281
256
|
def embedding_settings
|
|
282
|
-
Legion::
|
|
257
|
+
Legion::LLM.settings[:embedding] || {}
|
|
283
258
|
rescue StandardError => e
|
|
284
259
|
handle_exception(e, level: :debug, operation: 'llm.embeddings.embedding_settings')
|
|
285
260
|
{}
|
|
286
261
|
end
|
|
287
262
|
|
|
263
|
+
def target_dimension
|
|
264
|
+
embedding_settings[:dimension] || 1024
|
|
265
|
+
end
|
|
266
|
+
|
|
288
267
|
def generate_ollama(text:, model:)
|
|
289
268
|
ctx_max = nil
|
|
290
269
|
ctx_max = ollama_context_chars(model)
|
|
@@ -359,8 +338,9 @@ module Legion
|
|
|
359
338
|
end
|
|
360
339
|
|
|
361
340
|
def ollama_context_chars(model)
|
|
362
|
-
base
|
|
363
|
-
|
|
341
|
+
base = model.to_s.split(':').first
|
|
342
|
+
context = embedding_settings[:ollama_context_chars] || {}
|
|
343
|
+
context[base] || embedding_settings[:ollama_default_context_chars] || 1400
|
|
364
344
|
end
|
|
365
345
|
|
|
366
346
|
# ── Azure OpenAI (direct HTTP with SNI, bypasses ruby_llm) ──
|
|
@@ -416,7 +396,7 @@ module Legion
|
|
|
416
396
|
req['Content-Type'] = 'application/json'
|
|
417
397
|
req['api-key'] = api_key
|
|
418
398
|
body = { input: input }
|
|
419
|
-
body[:dimensions] = dimensions ||
|
|
399
|
+
body[:dimensions] = dimensions || target_dimension
|
|
420
400
|
req.body = ::JSON.dump(body)
|
|
421
401
|
|
|
422
402
|
response = http.request(req)
|