legion-llm 0.8.47 → 0.8.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +18 -0
- data/CLAUDE.md +20 -17
- data/Gemfile +3 -0
- data/README.md +46 -48
- data/legion-llm.gemspec +1 -1
- data/lib/legion/llm/api/native/helpers.rb +5 -3
- data/lib/legion/llm/api/native/instances.rb +32 -35
- data/lib/legion/llm/api/native/providers.rb +51 -75
- data/lib/legion/llm/api/native/routing.rb +55 -0
- data/lib/legion/llm/api.rb +2 -0
- data/lib/legion/llm/cache/response.rb +2 -1
- data/lib/legion/llm/cache.rb +4 -2
- data/lib/legion/llm/call/daemon_client.rb +2 -2
- data/lib/legion/llm/call/dispatch.rb +88 -49
- data/lib/legion/llm/call/embeddings.rb +113 -503
- data/lib/legion/llm/call/lex_llm_adapter.rb +43 -15
- data/lib/legion/llm/call/providers.rb +22 -694
- data/lib/legion/llm/call/registry.rb +94 -10
- data/lib/legion/llm/call/structured_output.rb +1 -1
- data/lib/legion/llm/call.rb +0 -2
- data/lib/legion/llm/compat.rb +2 -7
- data/lib/legion/llm/config.rb +11 -1
- data/lib/legion/llm/context/curator.rb +2 -1
- data/lib/legion/llm/discovery/memory_gate.rb +53 -0
- data/lib/legion/llm/discovery/rule_generator.rb +147 -0
- data/lib/legion/llm/discovery.rb +201 -50
- data/lib/legion/llm/fleet/dispatcher.rb +4 -1
- data/lib/legion/llm/fleet/handler.rb +6 -2
- data/lib/legion/llm/fleet/lane.rb +1 -1
- data/lib/legion/llm/fleet/reply_dispatcher.rb +1 -6
- data/lib/legion/llm/hooks.rb +2 -2
- data/lib/legion/llm/inference/conversation.rb +3 -2
- data/lib/legion/llm/inference/executor.rb +172 -113
- data/lib/legion/llm/inference/prompt.rb +0 -12
- data/lib/legion/llm/inference/steps/classification.rb +4 -13
- data/lib/legion/llm/inference/steps/debate.rb +19 -22
- data/lib/legion/llm/inference/steps/gaia_advisory.rb +2 -2
- data/lib/legion/llm/inference/steps/prompt_cache.rb +1 -13
- data/lib/legion/llm/inference/steps/rag_context.rb +8 -20
- data/lib/legion/llm/inference/steps/skill_injector.rb +1 -13
- data/lib/legion/llm/inference/steps/sticky_helpers.rb +1 -13
- data/lib/legion/llm/inference/steps/sticky_persist.rb +29 -8
- data/lib/legion/llm/inference/steps/sticky_runners.rb +6 -6
- data/lib/legion/llm/inference/steps/tool_discovery.rb +27 -13
- data/lib/legion/llm/inference/steps/trigger_match.rb +3 -16
- data/lib/legion/llm/inference.rb +6 -22
- data/lib/legion/llm/inventory.rb +32 -38
- data/lib/legion/llm/quality/checker.rb +2 -2
- data/lib/legion/llm/quality/confidence/scorer.rb +3 -2
- data/lib/legion/llm/router/health_tracker.rb +144 -57
- data/lib/legion/llm/router/resolution.rb +9 -3
- data/lib/legion/llm/router/rule.rb +6 -3
- data/lib/legion/llm/router.rb +188 -57
- data/lib/legion/llm/scheduling.rb +2 -2
- data/lib/legion/llm/settings.rb +11 -66
- data/lib/legion/llm/skills/base.rb +4 -1
- data/lib/legion/llm/tools/confidence.rb +8 -6
- data/lib/legion/llm/tools/dispatcher.rb +27 -43
- data/lib/legion/llm/transport/message.rb +30 -1
- data/lib/legion/llm/types/tool_definition.rb +17 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +6 -4
- metadata +6 -7
- data/lib/legion/llm/call/claude_config_loader.rb +0 -182
- data/lib/legion/llm/call/codex_config_loader.rb +0 -137
- data/lib/legion/llm/discovery/ollama.rb +0 -116
- data/lib/legion/llm/discovery/vllm.rb +0 -134
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 88524bf330ed22e6ac366d35ddccdbad4557b115c447d87d238ab2670ca8da7e
|
|
4
|
+
data.tar.gz: 68c0470f4c10ee885bd28d0486f487777ae3c9dc35ab0fea3cc4ab860c68a0c6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ba9ad2293c9e65db838aca3921ba291ef3bfd71798fc044bfdcdb90f3a04e0536fa5c562f00ea3e06eda88c41ee1659d23ef46382c69424940b4ff058f5cd978
|
|
7
|
+
data.tar.gz: c7feaf54a2a618eb5d45741269ba65d855bb478142a3b90702d29de9d23f3bbceadca26b5eb765bedf6c2476b97901dc59cc597242051d9500df9733174532f4
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.49] - 2026-04-29
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
- `Settings.register_defaults!` now calls `Legion::Settings.register_library` instead of `merge_settings`, using the idempotent legion-settings 1.4.0 API that prevents double-registration.
|
|
7
|
+
- Bumped `legion-settings` dependency floor to `>= 1.4.0`.
|
|
8
|
+
- Test stub `Legion::Settings` now exposes `register_library` matching the real 1.4.0 API.
|
|
9
|
+
|
|
10
|
+
## [0.8.48] - 2026-04-29
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `ToolDefinition.from_registry_entry` builds tool definitions from `Legion::Settings::Extensions` registry entries.
|
|
14
|
+
- `Dispatcher` checks `Settings::Extensions` for tool override resolution; when no matching entry is found it falls back to settings-based MCP overrides (no `Tools::Registry` or `Catalog::Registry` fallback).
|
|
15
|
+
- `Executor#add_registry_tool_definitions` reads from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry` for backward compatibility.
|
|
16
|
+
- `Steps::ToolDiscovery` discovers tools from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry`.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- Bumped `legion-settings` dependency floor to `>= 1.4.0` (requires `Settings::Extensions` module).
|
|
20
|
+
|
|
3
21
|
## [0.8.47] - 2026-04-29
|
|
4
22
|
|
|
5
23
|
### Fixed
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions through Legion-native provider dispatch. Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.8.
|
|
11
|
+
**Version**: 0.8.49
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -61,7 +61,10 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
|
|
|
61
61
|
│ ├── Compressor # Deterministic prompt compression (3 levels, code-block-aware)
|
|
62
62
|
│ └── Curator # Async conversation curation: strip thinking, distill tools, fold resolved exchanges
|
|
63
63
|
├── Discovery # Runtime introspection
|
|
64
|
-
│ ├── Ollama #
|
|
64
|
+
│ ├── Ollama # Multi-instance Ollama /api/tags + /api/show discovery (TTL-cached)
|
|
65
|
+
│ ├── Vllm # Multi-instance vLLM /v1/models + /health discovery (TTL-cached)
|
|
66
|
+
│ ├── RuleGenerator # Auto-generates routing rules from discovered instances/models
|
|
67
|
+
│ ├── MemoryGate # Checks available RAM before routing to local models
|
|
65
68
|
│ └── System # Queries OS memory: macOS (vm_stat/sysctl), Linux (/proc/meminfo)
|
|
66
69
|
├── Quality # Response quality evaluation
|
|
67
70
|
│ ├── Checker # Quality heuristics (empty, too_short, repetition, json_parse) + pluggable (was QualityChecker)
|
|
@@ -364,24 +367,21 @@ Settings read from `Legion::Settings[:llm]`:
|
|
|
364
367
|
|
|
365
368
|
### Provider Settings
|
|
366
369
|
|
|
367
|
-
Each provider has: `enabled`, `api_key`,
|
|
370
|
+
Provider defaults now live in each `lex-llm-*` provider extension's `default_settings`. The `providers:` key in `Settings.default` ships as an empty hash; settings files and extension registrations populate it at runtime. Each provider has: `enabled`, `api_key`, plus provider-specific keys.
|
|
368
371
|
|
|
369
|
-
|
|
372
|
+
Local/fleet providers (Ollama, vLLM, MLX) support multi-instance configs via an `instances:` hash. Discovery scans all instances in parallel, enriches models with real capability metadata, and generates per-instance routing rules.
|
|
370
373
|
|
|
371
|
-
|
|
372
|
-
- **SigV4** (default): `api_key` + `secret_key` (+ optional `session_token`)
|
|
373
|
-
- **Bearer token**: `bearer_token` for AWS Identity Center/SSO. Native Bedrock providers consume it through lex-llm configuration.
|
|
374
|
+
### Capability-Aware Routing
|
|
374
375
|
|
|
375
|
-
|
|
376
|
+
Routing rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The `RuleGenerator` creates rules from discovered instances without a static capability map -- each provider supplies real metadata.
|
|
377
|
+
|
|
378
|
+
### Memory Gate
|
|
376
379
|
|
|
377
|
-
|
|
380
|
+
`Discovery::MemoryGate` checks available system memory before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
|
|
381
|
+
|
|
382
|
+
### Auto-Detection Priority
|
|
378
383
|
|
|
379
|
-
|
|
380
|
-
2. Anthropic -> `claude-sonnet-4-6`
|
|
381
|
-
3. OpenAI -> `gpt-4o`
|
|
382
|
-
4. Gemini -> `gemini-2.0-flash`
|
|
383
|
-
5. Azure -> (endpoint-specific, from `api_base`)
|
|
384
|
-
6. Ollama -> `llama3`
|
|
384
|
+
When no defaults are configured, the first enabled provider is used. Detection order and default models are defined by each `lex-llm-*` provider extension.
|
|
385
385
|
|
|
386
386
|
### Routing Settings
|
|
387
387
|
|
|
@@ -501,7 +501,10 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
501
501
|
| `lib/legion/llm/context/compressor.rb` | Deterministic prompt compression: 3 levels, code-block-aware, stopword removal |
|
|
502
502
|
| `lib/legion/llm/context/curator.rb` | Async heuristic conversation curation (was ContextCurator) |
|
|
503
503
|
| `lib/legion/llm/discovery.rb` | Discovery entry point: run, detect_embedding_capability, can_embed? |
|
|
504
|
-
| `lib/legion/llm/discovery/ollama.rb` | Ollama /api/tags discovery with TTL cache |
|
|
504
|
+
| `lib/legion/llm/discovery/ollama.rb` | Multi-instance Ollama /api/tags + /api/show discovery with TTL cache |
|
|
505
|
+
| `lib/legion/llm/discovery/vllm.rb` | Multi-instance vLLM /v1/models + /health discovery with TTL cache |
|
|
506
|
+
| `lib/legion/llm/discovery/rule_generator.rb` | Auto-generates routing rules from discovered instances/models |
|
|
507
|
+
| `lib/legion/llm/discovery/memory_gate.rb` | Checks available RAM vs model size before routing to local models |
|
|
505
508
|
| `lib/legion/llm/discovery/system.rb` | OS memory introspection (macOS + Linux) with TTL cache |
|
|
506
509
|
| `lib/legion/llm/quality.rb` | Quality entry point |
|
|
507
510
|
| `lib/legion/llm/quality/checker.rb` | Quality heuristics + pluggable callable (was QualityChecker) |
|
|
@@ -715,7 +718,7 @@ The legacy `vault_path` per-provider setting was removed in v0.3.1.
|
|
|
715
718
|
Tests run without the full LegionIO stack. `spec/spec_helper.rb` uses real `Legion::Logging` and `Legion::Settings` (no stubs — hard dependencies are always present). Each test resets settings to defaults via `before(:each)`.
|
|
716
719
|
|
|
717
720
|
```bash
|
|
718
|
-
bundle exec rspec #
|
|
721
|
+
bundle exec rspec # 2379 examples, 0 failures
|
|
719
722
|
bundle exec rubocop # 0 offenses
|
|
720
723
|
```
|
|
721
724
|
|
data/Gemfile
CHANGED
|
@@ -4,6 +4,9 @@ source 'https://rubygems.org'
|
|
|
4
4
|
|
|
5
5
|
gemspec
|
|
6
6
|
|
|
7
|
+
legion_settings_path = File.expand_path('../legion-settings', __dir__)
|
|
8
|
+
gem 'legion-settings', path: legion_settings_path if Dir.exist?(legion_settings_path)
|
|
9
|
+
|
|
7
10
|
group :test do
|
|
8
11
|
lex_llm_path = File.expand_path('../extensions-ai/lex-llm', __dir__)
|
|
9
12
|
if Dir.exist?(lex_llm_path)
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
LLM routing and provider orchestration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Routes chat, embeddings, tool use, fleet dispatch, auditing, and provider metadata through Legion-native `lex-llm-*` provider extensions.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.8.
|
|
5
|
+
**Version**: 0.8.49
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -90,6 +90,8 @@ When enabled, validates `Authorization: Bearer <token>` or `x-api-key` headers a
|
|
|
90
90
|
|
|
91
91
|
## Configuration
|
|
92
92
|
|
|
93
|
+
Provider defaults now live in each `lex-llm-*` provider extension. `legion-llm` ships an empty `providers: {}` hash; settings files and extension registrations populate it at runtime.
|
|
94
|
+
|
|
93
95
|
Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
|
|
94
96
|
|
|
95
97
|
```json
|
|
@@ -103,53 +105,60 @@ Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
|
|
|
103
105
|
"region": "us-east-2",
|
|
104
106
|
"bearer_token": ["vault://secret/data/llm/bedrock#bearer_token", "env://AWS_BEARER_TOKEN"]
|
|
105
107
|
},
|
|
106
|
-
"anthropic": {
|
|
107
|
-
"enabled": false,
|
|
108
|
-
"api_key": "env://ANTHROPIC_API_KEY"
|
|
109
|
-
},
|
|
110
|
-
"openai": {
|
|
111
|
-
"enabled": false,
|
|
112
|
-
"api_key": "env://OPENAI_API_KEY"
|
|
113
|
-
},
|
|
114
108
|
"ollama": {
|
|
115
|
-
"enabled":
|
|
116
|
-
"base_url": "http://localhost:11434"
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
"default_model": "qwen3.6-27b",
|
|
122
|
-
"enable_thinking": true
|
|
123
|
-
},
|
|
124
|
-
"mlx": {
|
|
125
|
-
"enabled": false,
|
|
126
|
-
"base_url": "http://localhost:8000"
|
|
109
|
+
"enabled": true,
|
|
110
|
+
"base_url": "http://localhost:11434",
|
|
111
|
+
"instances": {
|
|
112
|
+
"default": { "base_url": "http://localhost:11434" },
|
|
113
|
+
"gpu_server": { "base_url": "http://gpu-server:11434" }
|
|
114
|
+
}
|
|
127
115
|
}
|
|
128
116
|
}
|
|
129
117
|
}
|
|
130
118
|
}
|
|
131
119
|
```
|
|
132
120
|
|
|
133
|
-
Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains
|
|
121
|
+
Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains -- the first non-nil result wins.
|
|
122
|
+
|
|
123
|
+
### Provider Extensions (lex-llm-*)
|
|
124
|
+
|
|
125
|
+
Each provider is a standalone `lex-llm-*` gem that ships its own `default_settings`, model catalog, and capability declarations. The provider registers itself with `legion-llm` at load time. Provider gems implement:
|
|
126
|
+
|
|
127
|
+
- **`default_settings`** -- Connection defaults (base_url, region, API key env vars)
|
|
128
|
+
- **`model_allowed?(model_name)`** -- Provider-level model filtering
|
|
129
|
+
- **`Model::Info`** -- Real capabilities, context lengths, and parameter counts for each model
|
|
130
|
+
|
|
131
|
+
The routing layer only sees models the provider has already filtered and annotated.
|
|
132
|
+
|
|
133
|
+
### Multi-Instance Providers
|
|
134
|
+
|
|
135
|
+
Local and fleet providers (Ollama, vLLM, MLX) support multiple named instances:
|
|
136
|
+
|
|
137
|
+
```json
|
|
138
|
+
{
|
|
139
|
+
"ollama": {
|
|
140
|
+
"enabled": true,
|
|
141
|
+
"instances": {
|
|
142
|
+
"macbook": { "base_url": "http://localhost:11434" },
|
|
143
|
+
"gpu_server": { "base_url": "http://gpu-server:11434" }
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Discovery scans all instances in parallel, enriches models with `/api/show` metadata, and generates per-instance routing rules. Each instance appears independently in the routing table so the router can target the exact hardware.
|
|
150
|
+
|
|
151
|
+
### Capability-Aware Routing
|
|
134
152
|
|
|
135
|
-
|
|
153
|
+
Routing rules and auto-generated rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The router uses these to match capability requirements (e.g., `thinking`, `vision`, `tools`) without a static lookup table.
|
|
136
154
|
|
|
137
|
-
|
|
155
|
+
### Generic Dispatch
|
|
138
156
|
|
|
139
|
-
|
|
140
|
-
|-------|------|-------------|
|
|
141
|
-
| `enabled` | Boolean | Enable this provider (default: `false`) |
|
|
142
|
-
| `api_key` | String | API key (supports `vault://`, `env://`, or plain string) |
|
|
157
|
+
`Call::Dispatch.call` accepts a `capability:` parameter (`:chat`, `:stream`, `:embed`) and routes to the registered `lex-llm-*` adapter. This replaces the old provider-specific dispatch paths.
|
|
143
158
|
|
|
144
|
-
|
|
159
|
+
### Memory Gate
|
|
145
160
|
|
|
146
|
-
|
|
147
|
-
|----------|------------------|
|
|
148
|
-
| **Bedrock** | `secret_key`, `session_token`, `region` (default: `us-east-2`), `bearer_token` (alternative to SigV4 — for AWS Identity Center/SSO) |
|
|
149
|
-
| **Azure** | `api_base` (Azure OpenAI endpoint URL, required), `auth_token` (bearer token alternative to `api_key`) |
|
|
150
|
-
| **Ollama** | `base_url` (default: `http://localhost:11434`) |
|
|
151
|
-
| **vLLM** | `base_url` (default: `http://localhost:8000/v1`), `api_key`, `enable_thinking` |
|
|
152
|
-
| **MLX** | `base_url` (default: `http://localhost:8000`), `api_key` |
|
|
161
|
+
Discovery checks available system memory (macOS `vm_stat`/`sysctl`, Linux `/proc/meminfo`) before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
|
|
153
162
|
|
|
154
163
|
### Credential Resolution
|
|
155
164
|
|
|
@@ -171,18 +180,7 @@ By the time `Legion::LLM.start` runs, all `vault://` and `env://` references hav
|
|
|
171
180
|
|
|
172
181
|
### Auto-Detection
|
|
173
182
|
|
|
174
|
-
If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider
|
|
175
|
-
|
|
176
|
-
| Priority | Provider | Default Model |
|
|
177
|
-
|----------|----------|---------------|
|
|
178
|
-
| 1 | Bedrock | `us.anthropic.claude-sonnet-4-6-v1` |
|
|
179
|
-
| 2 | Anthropic | `claude-sonnet-4-6` |
|
|
180
|
-
| 3 | OpenAI | `gpt-4o` |
|
|
181
|
-
| 4 | Gemini | `gemini-2.0-flash` |
|
|
182
|
-
| 5 | Azure | (endpoint-specific) |
|
|
183
|
-
| 6 | Ollama | `qwen3.5:latest` |
|
|
184
|
-
| 7 | vLLM | `qwen3.6-27b` |
|
|
185
|
-
| 8 | MLX | (configured model) |
|
|
183
|
+
If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider. The detection order and default models are defined by each `lex-llm-*` provider extension's `default_settings`.
|
|
186
184
|
|
|
187
185
|
## Core API
|
|
188
186
|
|
data/legion-llm.gemspec
CHANGED
|
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
|
|
|
30
30
|
spec.add_dependency 'legion-cache', '>= 1.4.2'
|
|
31
31
|
spec.add_dependency 'legion-json', '>= 1.2.0'
|
|
32
32
|
spec.add_dependency 'legion-logging', '>= 1.2.8'
|
|
33
|
-
spec.add_dependency 'legion-settings', '>= 1.
|
|
33
|
+
spec.add_dependency 'legion-settings', '>= 1.4.0'
|
|
34
34
|
spec.add_dependency 'lex-knowledge'
|
|
35
35
|
spec.add_dependency 'lex-llm', '>= 0.1.6'
|
|
36
36
|
spec.add_dependency 'pdf-reader'
|
|
@@ -10,8 +10,10 @@ require 'legion/llm/types'
|
|
|
10
10
|
begin
|
|
11
11
|
require 'legion/identity/request'
|
|
12
12
|
require 'legion/identity/process'
|
|
13
|
-
rescue LoadError
|
|
14
|
-
|
|
13
|
+
rescue LoadError => e
|
|
14
|
+
Object.new.extend(Legion::Logging::Helper).handle_exception(
|
|
15
|
+
e, level: :debug, handled: true, operation: 'llm.api.native.helpers.optional_identity_require'
|
|
16
|
+
)
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
module Legion
|
|
@@ -334,7 +336,7 @@ module Legion
|
|
|
334
336
|
text = content.key?(:text) || content.key?('text') ? (content[:text] || content['text']) : (content[:content] || content['content'])
|
|
335
337
|
extract_text_content(text)
|
|
336
338
|
else
|
|
337
|
-
content.to_s
|
|
339
|
+
content.respond_to?(:text) ? content.text.to_s : content.to_s
|
|
338
340
|
end
|
|
339
341
|
end
|
|
340
342
|
|
|
@@ -16,14 +16,13 @@ module Legion
|
|
|
16
16
|
log.debug('[llm][api][instances] action=list_instances')
|
|
17
17
|
require_llm!
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
instances = Legion::LLM::API::Native::Instances.instances_from_offerings(offerings)
|
|
19
|
+
instances = Legion::LLM::API::Native::Instances.registry_instances
|
|
21
20
|
|
|
22
21
|
json_response({
|
|
23
22
|
instances: instances,
|
|
24
23
|
summary: {
|
|
25
24
|
total: instances.size,
|
|
26
|
-
providers: instances.map { |
|
|
25
|
+
providers: instances.map { |inst| inst[:provider] }.uniq.size
|
|
27
26
|
}
|
|
28
27
|
})
|
|
29
28
|
rescue StandardError => e
|
|
@@ -36,11 +35,16 @@ module Legion
|
|
|
36
35
|
log.debug("[llm][api][instances] action=get_instance id=#{instance_id}")
|
|
37
36
|
require_llm!
|
|
38
37
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
result = Legion::LLM::API::Native::Instances.find_registry_instance(instance_id)
|
|
39
|
+
if result == :ambiguous
|
|
40
|
+
halt json_error('ambiguous_instance_id',
|
|
41
|
+
"Instance id '#{instance_id}' matches multiple providers; " \
|
|
42
|
+
'use composite id (provider/instance) to disambiguate',
|
|
43
|
+
status_code: 400)
|
|
44
|
+
end
|
|
45
|
+
halt json_error('instance_not_found', "Instance '#{instance_id}' not found", status_code: 404) unless result
|
|
42
46
|
|
|
43
|
-
json_response({ instance:
|
|
47
|
+
json_response({ instance: result })
|
|
44
48
|
rescue StandardError => e
|
|
45
49
|
handle_exception(e, level: :error, handled: true, operation: 'llm.api.instances.get')
|
|
46
50
|
json_error('instance_inventory_error', e.message, status_code: 500)
|
|
@@ -49,39 +53,32 @@ module Legion
|
|
|
49
53
|
log.debug('[llm][api][instances] provider instance inventory routes registered')
|
|
50
54
|
end
|
|
51
55
|
|
|
52
|
-
def self.
|
|
53
|
-
instances =
|
|
54
|
-
|
|
56
|
+
def self.registry_instances
|
|
57
|
+
instances = []
|
|
58
|
+
Legion::LLM::Call::Registry.available.each do |provider_name|
|
|
59
|
+
Legion::LLM::Call::Registry.instances_for(provider_name).each_key do |inst_id|
|
|
60
|
+
instances << {
|
|
61
|
+
id: "#{provider_name}/#{inst_id}",
|
|
62
|
+
provider: provider_name.to_s,
|
|
63
|
+
instance: inst_id.to_s
|
|
64
|
+
}
|
|
65
|
+
end
|
|
55
66
|
end
|
|
56
|
-
instances.sort_by { |
|
|
67
|
+
instances.sort_by { |inst| inst[:id] }
|
|
57
68
|
end
|
|
58
69
|
|
|
59
|
-
def self.
|
|
60
|
-
|
|
61
|
-
|
|
70
|
+
def self.find_registry_instance(instance_id)
|
|
71
|
+
# Try exact composite id match first (e.g. "ollama/local")
|
|
72
|
+
exact = registry_instances.find { |inst| inst[:id] == instance_id }
|
|
73
|
+
return exact if exact
|
|
62
74
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
transports: rows.map { |offering| offering[:transport].to_s }.uniq.sort,
|
|
68
|
-
health: aggregate_health(rows),
|
|
69
|
-
capacity: aggregate_capacity(rows),
|
|
70
|
-
offerings: rows.sort_by { |offering| offering[:offering_id].to_s }
|
|
71
|
-
}
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def self.aggregate_health(offerings)
|
|
75
|
-
states = offerings.filter_map { |offering| offering.dig(:health, :circuit_state) }.uniq.sort
|
|
76
|
-
{ circuit_states: states }
|
|
77
|
-
end
|
|
75
|
+
# Fall back to bare instance name, but guard against ambiguity
|
|
76
|
+
matches = registry_instances.select { |inst| inst[:instance] == instance_id }
|
|
77
|
+
return matches.first if matches.size == 1
|
|
78
|
+
return :ambiguous if matches.size > 1
|
|
78
79
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
max_context_window: offerings.filter_map { |offering| offering.dig(:limits, :context_window) }.max,
|
|
82
|
-
max_output_tokens: offerings.filter_map { |offering| offering.dig(:limits, :max_output_tokens) }.max,
|
|
83
|
-
offering_count: offerings.size
|
|
84
|
-
}.compact
|
|
80
|
+
# No match found
|
|
81
|
+
nil
|
|
85
82
|
end
|
|
86
83
|
end
|
|
87
84
|
end
|
|
@@ -9,10 +9,6 @@ module Legion
|
|
|
9
9
|
module Providers
|
|
10
10
|
extend Legion::Logging::Helper
|
|
11
11
|
|
|
12
|
-
SECRET_KEYS = %w[
|
|
13
|
-
api_key secret_key bearer_token session_token auth_token authorization password
|
|
14
|
-
].freeze
|
|
15
|
-
|
|
16
12
|
def self.registered(app)
|
|
17
13
|
log.debug('[llm][api][providers] registering provider routes')
|
|
18
14
|
|
|
@@ -20,19 +16,15 @@ module Legion
|
|
|
20
16
|
log.debug('[llm][api][providers] action=list_providers')
|
|
21
17
|
require_llm!
|
|
22
18
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
19
|
+
instances = begin
|
|
20
|
+
Legion::LLM::Call::Registry.all_instances
|
|
21
|
+
rescue StandardError => e
|
|
22
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
|
|
23
|
+
[]
|
|
24
|
+
end
|
|
28
25
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
enabled: true,
|
|
32
|
-
default_model: Legion::LLM::API::Native::Providers.config_value(config, :default_model),
|
|
33
|
-
health: Legion::LLM::API::Native::Providers.provider_health(provider_name),
|
|
34
|
-
native: Legion::LLM::Call::Registry.registered?(provider_name.to_sym)
|
|
35
|
-
}
|
|
26
|
+
provider_list = instances.map do |entry|
|
|
27
|
+
Legion::LLM::API::Native::Providers.instance_to_hash(entry)
|
|
36
28
|
end
|
|
37
29
|
|
|
38
30
|
summary = {
|
|
@@ -53,22 +45,30 @@ module Legion
|
|
|
53
45
|
require_llm!
|
|
54
46
|
|
|
55
47
|
provider_name = params[:name].to_s
|
|
56
|
-
|
|
48
|
+
provider_sym = provider_name.to_sym
|
|
49
|
+
|
|
50
|
+
instances = begin
|
|
51
|
+
Legion::LLM::Call::Registry.all_instances
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
|
|
54
|
+
[]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
family = instances.select { |entry| entry[:provider].to_sym == provider_sym }
|
|
57
58
|
|
|
58
|
-
unless
|
|
59
|
+
unless family.any?
|
|
59
60
|
log.debug("[llm][api][providers] action=not_found name=#{params[:name]}")
|
|
60
|
-
halt json_error('provider_not_found',
|
|
61
|
+
halt json_error('provider_not_found',
|
|
62
|
+
"Provider '#{params[:name]}' not found",
|
|
63
|
+
status_code: 404)
|
|
61
64
|
end
|
|
62
65
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
native: Legion::LLM::Call::Registry.registered?(provider_name.to_sym),
|
|
70
|
-
config: Legion::LLM::API::Native::Providers.safe_config(provider_config)
|
|
71
|
-
})
|
|
66
|
+
provider_list = family.map do |entry|
|
|
67
|
+
Legion::LLM::API::Native::Providers.instance_to_hash(entry)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
log.debug("[llm][api][providers] action=found name=#{params[:name]} instances=#{provider_list.size}")
|
|
71
|
+
json_response({ provider: provider_name, instances: provider_list })
|
|
72
72
|
rescue StandardError => e
|
|
73
73
|
handle_exception(e, level: :error, handled: true, operation: 'llm.api.providers.get')
|
|
74
74
|
json_error('provider_error', e.message, status_code: 500)
|
|
@@ -77,54 +77,30 @@ module Legion
|
|
|
77
77
|
log.debug('[llm][api][providers] provider routes registered')
|
|
78
78
|
end
|
|
79
79
|
|
|
80
|
-
def self.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def self.enabled_provider_config?(config)
|
|
86
|
-
config.is_a?(Hash) && config_value(config, :enabled, true) != false
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def self.config_value(config, key, default = nil)
|
|
90
|
-
return default unless config.respond_to?(:key?)
|
|
91
|
-
|
|
92
|
-
string_key = key.to_s
|
|
93
|
-
return config[string_key] if config.key?(string_key)
|
|
94
|
-
|
|
95
|
-
config.key?(key) ? config[key] : default
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def self.settings_value(key, default = nil)
|
|
99
|
-
Legion::LLM::Settings.value(key, default: default)
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def self.safe_config(config)
|
|
103
|
-
config.each_with_object({}) do |(key, value), safe|
|
|
104
|
-
safe[key] = redact_value(value) unless SECRET_KEYS.include?(key.to_s)
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
def self.redact_value(value)
|
|
109
|
-
case value
|
|
110
|
-
when Hash
|
|
111
|
-
safe_config(value)
|
|
112
|
-
when Array
|
|
113
|
-
value.map { |entry| redact_value(entry) }
|
|
114
|
-
else
|
|
115
|
-
value
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
def self.provider_health(name)
|
|
120
|
-
if Legion::LLM::Router.routing_enabled?
|
|
121
|
-
tracker = Legion::LLM::Router.health_tracker
|
|
122
|
-
provider_key = name.to_sym
|
|
123
|
-
{ circuit_state: tracker.circuit_state(provider_key).to_s,
|
|
124
|
-
adjustment: tracker.adjustment(provider_key) }
|
|
125
|
-
else
|
|
126
|
-
{ circuit_state: 'unknown' }
|
|
80
|
+
def self.instance_to_hash(entry)
|
|
81
|
+
health = begin
|
|
82
|
+
Legion::LLM::Router.health_tracker
|
|
83
|
+
rescue StandardError
|
|
84
|
+
nil
|
|
127
85
|
end
|
|
86
|
+
provider_key = entry[:provider].to_sym
|
|
87
|
+
instance_key = entry[:instance].to_sym
|
|
88
|
+
|
|
89
|
+
{
|
|
90
|
+
provider: entry[:provider].to_s,
|
|
91
|
+
instance: entry[:instance].to_s,
|
|
92
|
+
tier: entry.dig(:metadata, :tier)&.to_s,
|
|
93
|
+
capabilities: Array(entry.dig(:metadata, :capabilities)).map(&:to_s),
|
|
94
|
+
health: if health
|
|
95
|
+
{
|
|
96
|
+
circuit_state: health.circuit_state(provider_key, instance: instance_key).to_s,
|
|
97
|
+
adjustment: health.adjustment(provider_key, instance: instance_key)
|
|
98
|
+
}
|
|
99
|
+
else
|
|
100
|
+
{}
|
|
101
|
+
end,
|
|
102
|
+
native: true
|
|
103
|
+
}
|
|
128
104
|
end
|
|
129
105
|
end
|
|
130
106
|
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module API
|
|
8
|
+
module Native
|
|
9
|
+
module Routing
|
|
10
|
+
extend Legion::Logging::Helper
|
|
11
|
+
|
|
12
|
+
def self.registered(app)
|
|
13
|
+
log.debug('[llm][api][routing] registering routing routes')
|
|
14
|
+
|
|
15
|
+
app.get '/api/llm/routing' do
|
|
16
|
+
log.debug('[llm][api][routing] action=list_rules')
|
|
17
|
+
require_llm!
|
|
18
|
+
|
|
19
|
+
rules = Legion::LLM::Router.send(:load_rules)
|
|
20
|
+
auto_count = rules.count { |r| r.name.to_s.start_with?('auto:') }
|
|
21
|
+
manual_count = rules.size - auto_count
|
|
22
|
+
|
|
23
|
+
rule_list = rules.map do |rule|
|
|
24
|
+
{
|
|
25
|
+
name: rule.name,
|
|
26
|
+
priority: rule.priority,
|
|
27
|
+
conditions: rule.conditions,
|
|
28
|
+
target: rule.target,
|
|
29
|
+
constraint: rule.constraint,
|
|
30
|
+
auto: rule.name.to_s.start_with?('auto:')
|
|
31
|
+
}.compact
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
json_response({
|
|
35
|
+
routing_enabled: Legion::LLM::Router.routing_enabled?,
|
|
36
|
+
auto_rules_populated: Legion::LLM::Router.auto_rules_populated?,
|
|
37
|
+
rules: rule_list,
|
|
38
|
+
summary: {
|
|
39
|
+
total: rules.size,
|
|
40
|
+
auto: auto_count,
|
|
41
|
+
manual: manual_count
|
|
42
|
+
}
|
|
43
|
+
})
|
|
44
|
+
rescue StandardError => e
|
|
45
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.routing.list')
|
|
46
|
+
json_error('routing_error', e.message, status_code: 500)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
log.debug('[llm][api][routing] routing routes registered')
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
data/lib/legion/llm/api.rb
CHANGED
|
@@ -8,6 +8,7 @@ require_relative 'api/native/providers'
|
|
|
8
8
|
require_relative 'api/native/models'
|
|
9
9
|
require_relative 'api/native/offerings'
|
|
10
10
|
require_relative 'api/native/instances'
|
|
11
|
+
require_relative 'api/native/routing'
|
|
11
12
|
require_relative 'api/translators/openai_request'
|
|
12
13
|
require_relative 'api/translators/openai_response'
|
|
13
14
|
require_relative 'api/openai/chat_completions'
|
|
@@ -34,6 +35,7 @@ module Legion
|
|
|
34
35
|
Native::Models.registered(app)
|
|
35
36
|
Native::Offerings.registered(app)
|
|
36
37
|
Native::Instances.registered(app)
|
|
38
|
+
Native::Routing.registered(app)
|
|
37
39
|
OpenAI::ChatCompletions.registered(app)
|
|
38
40
|
OpenAI::Models.registered(app)
|
|
39
41
|
OpenAI::Embeddings.registered(app)
|
|
@@ -135,7 +135,8 @@ module Legion
|
|
|
135
135
|
|
|
136
136
|
private_class_method def self.local_cache_backend?
|
|
137
137
|
respond_to?(:local_cache_connected?) && local_cache_connected?
|
|
138
|
-
rescue StandardError
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
log.debug("[llm][cache][response] action=local_cache_backend error=#{e.class} message=#{e.message}")
|
|
139
140
|
false
|
|
140
141
|
end
|
|
141
142
|
|