legion-llm 0.8.47 → 0.8.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/CHANGELOG.md +18 -0
  4. data/CLAUDE.md +20 -17
  5. data/Gemfile +3 -0
  6. data/README.md +46 -48
  7. data/legion-llm.gemspec +1 -1
  8. data/lib/legion/llm/api/native/helpers.rb +5 -3
  9. data/lib/legion/llm/api/native/instances.rb +32 -35
  10. data/lib/legion/llm/api/native/providers.rb +51 -75
  11. data/lib/legion/llm/api/native/routing.rb +55 -0
  12. data/lib/legion/llm/api.rb +2 -0
  13. data/lib/legion/llm/cache/response.rb +2 -1
  14. data/lib/legion/llm/cache.rb +4 -2
  15. data/lib/legion/llm/call/daemon_client.rb +2 -2
  16. data/lib/legion/llm/call/dispatch.rb +88 -49
  17. data/lib/legion/llm/call/embeddings.rb +113 -503
  18. data/lib/legion/llm/call/lex_llm_adapter.rb +43 -15
  19. data/lib/legion/llm/call/providers.rb +22 -694
  20. data/lib/legion/llm/call/registry.rb +94 -10
  21. data/lib/legion/llm/call/structured_output.rb +1 -1
  22. data/lib/legion/llm/call.rb +0 -2
  23. data/lib/legion/llm/compat.rb +2 -7
  24. data/lib/legion/llm/config.rb +11 -1
  25. data/lib/legion/llm/context/curator.rb +2 -1
  26. data/lib/legion/llm/discovery/memory_gate.rb +53 -0
  27. data/lib/legion/llm/discovery/rule_generator.rb +147 -0
  28. data/lib/legion/llm/discovery.rb +201 -50
  29. data/lib/legion/llm/fleet/dispatcher.rb +4 -1
  30. data/lib/legion/llm/fleet/handler.rb +6 -2
  31. data/lib/legion/llm/fleet/lane.rb +1 -1
  32. data/lib/legion/llm/fleet/reply_dispatcher.rb +1 -6
  33. data/lib/legion/llm/hooks.rb +2 -2
  34. data/lib/legion/llm/inference/conversation.rb +3 -2
  35. data/lib/legion/llm/inference/executor.rb +172 -113
  36. data/lib/legion/llm/inference/prompt.rb +0 -12
  37. data/lib/legion/llm/inference/steps/classification.rb +4 -13
  38. data/lib/legion/llm/inference/steps/debate.rb +19 -22
  39. data/lib/legion/llm/inference/steps/gaia_advisory.rb +2 -2
  40. data/lib/legion/llm/inference/steps/prompt_cache.rb +1 -13
  41. data/lib/legion/llm/inference/steps/rag_context.rb +8 -20
  42. data/lib/legion/llm/inference/steps/skill_injector.rb +1 -13
  43. data/lib/legion/llm/inference/steps/sticky_helpers.rb +1 -13
  44. data/lib/legion/llm/inference/steps/sticky_persist.rb +29 -8
  45. data/lib/legion/llm/inference/steps/sticky_runners.rb +6 -6
  46. data/lib/legion/llm/inference/steps/tool_discovery.rb +27 -13
  47. data/lib/legion/llm/inference/steps/trigger_match.rb +3 -16
  48. data/lib/legion/llm/inference.rb +6 -22
  49. data/lib/legion/llm/inventory.rb +32 -38
  50. data/lib/legion/llm/quality/checker.rb +2 -2
  51. data/lib/legion/llm/quality/confidence/scorer.rb +3 -2
  52. data/lib/legion/llm/router/health_tracker.rb +144 -57
  53. data/lib/legion/llm/router/resolution.rb +9 -3
  54. data/lib/legion/llm/router/rule.rb +6 -3
  55. data/lib/legion/llm/router.rb +188 -57
  56. data/lib/legion/llm/scheduling.rb +2 -2
  57. data/lib/legion/llm/settings.rb +11 -66
  58. data/lib/legion/llm/skills/base.rb +4 -1
  59. data/lib/legion/llm/tools/confidence.rb +8 -6
  60. data/lib/legion/llm/tools/dispatcher.rb +27 -43
  61. data/lib/legion/llm/transport/message.rb +30 -1
  62. data/lib/legion/llm/types/tool_definition.rb +17 -0
  63. data/lib/legion/llm/version.rb +1 -1
  64. data/lib/legion/llm.rb +6 -4
  65. metadata +6 -7
  66. data/lib/legion/llm/call/claude_config_loader.rb +0 -182
  67. data/lib/legion/llm/call/codex_config_loader.rb +0 -137
  68. data/lib/legion/llm/discovery/ollama.rb +0 -116
  69. data/lib/legion/llm/discovery/vllm.rb +0 -134
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 758dae0c2bb2fba09c683bbcbf29459301ffdfe0239ff37214952e8c334422b0
4
- data.tar.gz: 841c7d0ae47d825431601edb635bde28f1f7b60f938a008abe1c8d6f2ac5772d
3
+ metadata.gz: 88524bf330ed22e6ac366d35ddccdbad4557b115c447d87d238ab2670ca8da7e
4
+ data.tar.gz: 68c0470f4c10ee885bd28d0486f487777ae3c9dc35ab0fea3cc4ab860c68a0c6
5
5
  SHA512:
6
- metadata.gz: 7a14e57a3a2bd88ac05cb83608e64461b710aed5370eb0686830dc705a80019eb7de351279204eb96429e06da167420c744d8ae79518dbeec6d7a69fcec14c7a
7
- data.tar.gz: d442eeee484806be5f01dd13e212b155d8cb8d839cb50a0ddd9517d013a49675320936927ded888aa4c617ee8d282624dc93b69b814599c097ee6078a3e4409c
6
+ metadata.gz: ba9ad2293c9e65db838aca3921ba291ef3bfd71798fc044bfdcdb90f3a04e0536fa5c562f00ea3e06eda88c41ee1659d23ef46382c69424940b4ff058f5cd978
7
+ data.tar.gz: c7feaf54a2a618eb5d45741269ba65d855bb478142a3b90702d29de9d23f3bbceadca26b5eb765bedf6c2476b97901dc59cc597242051d9500df9733174532f4
data/.gitignore CHANGED
@@ -21,3 +21,6 @@ legion.log
21
21
  .worktrees/
22
22
  .claude/
23
23
  docs/
24
+ bin/apollo-setup-postreboot.sh
25
+ bin/apollo-setup-prereboot.sh
26
+ legionio-bootstrap-uhg-v3.json
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.49] - 2026-04-29
4
+
5
+ ### Changed
6
+ - `Settings.register_defaults!` now calls `Legion::Settings.register_library` instead of `merge_settings`, using the idempotent legion-settings 1.4.0 API that prevents double-registration.
7
+ - Bumped `legion-settings` dependency floor to `>= 1.4.0`.
8
+ - Test stub `Legion::Settings` now exposes `register_library` matching the real 1.4.0 API.
9
+
10
+ ## [0.8.48] - 2026-04-29
11
+
12
+ ### Added
13
+ - `ToolDefinition.from_registry_entry` builds tool definitions from `Legion::Settings::Extensions` registry entries.
14
+ - `Dispatcher` checks `Settings::Extensions` for tool override resolution; when no matching entry is found it falls back to settings-based MCP overrides (no `Tools::Registry` or `Catalog::Registry` fallback).
15
+ - `Executor#add_registry_tool_definitions` reads from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry` for backward compatibility.
16
+ - `Steps::ToolDiscovery` discovers tools from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry`.
17
+
18
+ ### Changed
19
+ - Bumped `legion-settings` dependency floor to `>= 1.4.0` (requires `Settings::Extensions` module).
20
+
3
21
  ## [0.8.47] - 2026-04-29
4
22
 
5
23
  ### Fixed
data/CLAUDE.md CHANGED
@@ -8,7 +8,7 @@
8
8
  Core LegionIO gem providing LLM capabilities to all extensions through Legion-native provider dispatch. Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
9
9
 
10
10
  **GitHub**: https://github.com/LegionIO/legion-llm
11
- **Version**: 0.8.0
11
+ **Version**: 0.8.49
12
12
  **License**: Apache-2.0
13
13
 
14
14
  ## Architecture
@@ -61,7 +61,10 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
61
61
  │ ├── Compressor # Deterministic prompt compression (3 levels, code-block-aware)
62
62
  │ └── Curator # Async conversation curation: strip thinking, distill tools, fold resolved exchanges
63
63
  ├── Discovery # Runtime introspection
64
- │ ├── Ollama # Queries Ollama /api/tags for pulled models (TTL-cached)
64
+ │ ├── Ollama # Multi-instance Ollama /api/tags + /api/show discovery (TTL-cached)
65
+ │ ├── Vllm # Multi-instance vLLM /v1/models + /health discovery (TTL-cached)
66
+ │ ├── RuleGenerator # Auto-generates routing rules from discovered instances/models
67
+ │ ├── MemoryGate # Checks available RAM before routing to local models
65
68
  │ └── System # Queries OS memory: macOS (vm_stat/sysctl), Linux (/proc/meminfo)
66
69
  ├── Quality # Response quality evaluation
67
70
  │ ├── Checker # Quality heuristics (empty, too_short, repetition, json_parse) + pluggable (was QualityChecker)
@@ -364,24 +367,21 @@ Settings read from `Legion::Settings[:llm]`:
364
367
 
365
368
  ### Provider Settings
366
369
 
367
- Each provider has: `enabled`, `api_key`, `vault_path`, plus provider-specific keys.
370
+ Provider defaults now live in each `lex-llm-*` provider extension's `default_settings`. The `providers:` key in `Settings.default` ships as an empty hash; settings files and extension registrations populate it at runtime. Each provider has: `enabled`, `api_key`, plus provider-specific keys.
368
371
 
369
- Vault credential resolution: When `vault_path` is set and Legion::Crypt::Vault is connected, credentials are fetched from Vault at startup. Keys map to provider-specific fields automatically.
372
+ Local/fleet providers (Ollama, vLLM, MLX) support multi-instance configs via an `instances:` hash. Discovery scans all instances in parallel, enriches models with real capability metadata, and generates per-instance routing rules.
370
373
 
371
- Bedrock supports two auth modes:
372
- - **SigV4** (default): `api_key` + `secret_key` (+ optional `session_token`)
373
- - **Bearer token**: `bearer_token` for AWS Identity Center/SSO. Native Bedrock providers consume it through lex-llm configuration.
374
+ ### Capability-Aware Routing
374
375
 
375
- ### Auto-Detection Priority
376
+ Routing rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The `RuleGenerator` creates rules from discovered instances without a static capability map -- each provider supplies real metadata.
377
+
378
+ ### Memory Gate
376
379
 
377
- When no defaults are configured, the first enabled provider is used:
380
+ `Discovery::MemoryGate` checks available system memory before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
381
+
382
+ ### Auto-Detection Priority
378
383
 
379
- 1. Bedrock -> `us.anthropic.claude-sonnet-4-6-v1`
380
- 2. Anthropic -> `claude-sonnet-4-6`
381
- 3. OpenAI -> `gpt-4o`
382
- 4. Gemini -> `gemini-2.0-flash`
383
- 5. Azure -> (endpoint-specific, from `api_base`)
384
- 6. Ollama -> `llama3`
384
+ When no defaults are configured, the first enabled provider is used. Detection order and default models are defined by each `lex-llm-*` provider extension.
385
385
 
386
386
  ### Routing Settings
387
387
 
@@ -501,7 +501,10 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
501
501
  | `lib/legion/llm/context/compressor.rb` | Deterministic prompt compression: 3 levels, code-block-aware, stopword removal |
502
502
  | `lib/legion/llm/context/curator.rb` | Async heuristic conversation curation (was ContextCurator) |
503
503
  | `lib/legion/llm/discovery.rb` | Discovery entry point: run, detect_embedding_capability, can_embed? |
504
- | `lib/legion/llm/discovery/ollama.rb` | Ollama /api/tags discovery with TTL cache |
504
+ | `lib/legion/llm/discovery/ollama.rb` | Multi-instance Ollama /api/tags + /api/show discovery with TTL cache |
505
+ | `lib/legion/llm/discovery/vllm.rb` | Multi-instance vLLM /v1/models + /health discovery with TTL cache |
506
+ | `lib/legion/llm/discovery/rule_generator.rb` | Auto-generates routing rules from discovered instances/models |
507
+ | `lib/legion/llm/discovery/memory_gate.rb` | Checks available RAM vs model size before routing to local models |
505
508
  | `lib/legion/llm/discovery/system.rb` | OS memory introspection (macOS + Linux) with TTL cache |
506
509
  | `lib/legion/llm/quality.rb` | Quality entry point |
507
510
  | `lib/legion/llm/quality/checker.rb` | Quality heuristics + pluggable callable (was QualityChecker) |
@@ -715,7 +718,7 @@ The legacy `vault_path` per-provider setting was removed in v0.3.1.
715
718
  Tests run without the full LegionIO stack. `spec/spec_helper.rb` uses real `Legion::Logging` and `Legion::Settings` (no stubs — hard dependencies are always present). Each test resets settings to defaults via `before(:each)`.
716
719
 
717
720
  ```bash
718
- bundle exec rspec # 1661 examples, 0 failures
721
+ bundle exec rspec # 2379 examples, 0 failures
719
722
  bundle exec rubocop # 0 offenses
720
723
  ```
721
724
 
data/Gemfile CHANGED
@@ -4,6 +4,9 @@ source 'https://rubygems.org'
4
4
 
5
5
  gemspec
6
6
 
7
+ legion_settings_path = File.expand_path('../legion-settings', __dir__)
8
+ gem 'legion-settings', path: legion_settings_path if Dir.exist?(legion_settings_path)
9
+
7
10
  group :test do
8
11
  lex_llm_path = File.expand_path('../extensions-ai/lex-llm', __dir__)
9
12
  if Dir.exist?(lex_llm_path)
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  LLM routing and provider orchestration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Routes chat, embeddings, tool use, fleet dispatch, auditing, and provider metadata through Legion-native `lex-llm-*` provider extensions.
4
4
 
5
- **Version**: 0.8.47
5
+ **Version**: 0.8.49
6
6
 
7
7
  ## Installation
8
8
 
@@ -90,6 +90,8 @@ When enabled, validates `Authorization: Bearer <token>` or `x-api-key` headers a
90
90
 
91
91
  ## Configuration
92
92
 
93
+ Provider defaults now live in each `lex-llm-*` provider extension. `legion-llm` ships an empty `providers: {}` hash; settings files and extension registrations populate it at runtime.
94
+
93
95
  Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
94
96
 
95
97
  ```json
@@ -103,53 +105,60 @@ Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
103
105
  "region": "us-east-2",
104
106
  "bearer_token": ["vault://secret/data/llm/bedrock#bearer_token", "env://AWS_BEARER_TOKEN"]
105
107
  },
106
- "anthropic": {
107
- "enabled": false,
108
- "api_key": "env://ANTHROPIC_API_KEY"
109
- },
110
- "openai": {
111
- "enabled": false,
112
- "api_key": "env://OPENAI_API_KEY"
113
- },
114
108
  "ollama": {
115
- "enabled": false,
116
- "base_url": "http://localhost:11434"
117
- },
118
- "vllm": {
119
- "enabled": false,
120
- "base_url": "http://localhost:8000/v1",
121
- "default_model": "qwen3.6-27b",
122
- "enable_thinking": true
123
- },
124
- "mlx": {
125
- "enabled": false,
126
- "base_url": "http://localhost:8000"
109
+ "enabled": true,
110
+ "base_url": "http://localhost:11434",
111
+ "instances": {
112
+ "default": { "base_url": "http://localhost:11434" },
113
+ "gpu_server": { "base_url": "http://gpu-server:11434" }
114
+ }
127
115
  }
128
116
  }
129
117
  }
130
118
  }
131
119
  ```
132
120
 
133
- Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains the first non-nil result wins.
121
+ Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains -- the first non-nil result wins.
122
+
123
+ ### Provider Extensions (lex-llm-*)
124
+
125
+ Each provider is a standalone `lex-llm-*` gem that ships its own `default_settings`, model catalog, and capability declarations. The provider registers itself with `legion-llm` at load time. Provider gems implement:
126
+
127
+ - **`default_settings`** -- Connection defaults (base_url, region, API key env vars)
128
+ - **`model_allowed?(model_name)`** -- Provider-level model filtering
129
+ - **`Model::Info`** -- Real capabilities, context lengths, and parameter counts for each model
130
+
131
+ The routing layer only sees models the provider has already filtered and annotated.
132
+
133
+ ### Multi-Instance Providers
134
+
135
+ Local and fleet providers (Ollama, vLLM, MLX) support multiple named instances:
136
+
137
+ ```json
138
+ {
139
+ "ollama": {
140
+ "enabled": true,
141
+ "instances": {
142
+ "macbook": { "base_url": "http://localhost:11434" },
143
+ "gpu_server": { "base_url": "http://gpu-server:11434" }
144
+ }
145
+ }
146
+ }
147
+ ```
148
+
149
+ Discovery scans all instances in parallel, enriches models with `/api/show` metadata, and generates per-instance routing rules. Each instance appears independently in the routing table so the router can target the exact hardware.
150
+
151
+ ### Capability-Aware Routing
134
152
 
135
- ### Provider Configuration
153
+ Routing rules and auto-generated rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The router uses these to match capability requirements (e.g., `thinking`, `vision`, `tools`) without a static lookup table.
136
154
 
137
- Each provider supports these common fields:
155
+ ### Generic Dispatch
138
156
 
139
- | Field | Type | Description |
140
- |-------|------|-------------|
141
- | `enabled` | Boolean | Enable this provider (default: `false`) |
142
- | `api_key` | String | API key (supports `vault://`, `env://`, or plain string) |
157
+ `Call::Dispatch.call` accepts a `capability:` parameter (`:chat`, `:stream`, `:embed`) and routes to the registered `lex-llm-*` adapter. This replaces the old provider-specific dispatch paths.
143
158
 
144
- Provider-specific fields:
159
+ ### Memory Gate
145
160
 
146
- | Provider | Additional Fields |
147
- |----------|------------------|
148
- | **Bedrock** | `secret_key`, `session_token`, `region` (default: `us-east-2`), `bearer_token` (alternative to SigV4 — for AWS Identity Center/SSO) |
149
- | **Azure** | `api_base` (Azure OpenAI endpoint URL, required), `auth_token` (bearer token alternative to `api_key`) |
150
- | **Ollama** | `base_url` (default: `http://localhost:11434`) |
151
- | **vLLM** | `base_url` (default: `http://localhost:8000/v1`), `api_key`, `enable_thinking` |
152
- | **MLX** | `base_url` (default: `http://localhost:8000`), `api_key` |
161
+ Discovery checks available system memory (macOS `vm_stat`/`sysctl`, Linux `/proc/meminfo`) before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
153
162
 
154
163
  ### Credential Resolution
155
164
 
@@ -171,18 +180,7 @@ By the time `Legion::LLM.start` runs, all `vault://` and `env://` references hav
171
180
 
172
181
  ### Auto-Detection
173
182
 
174
- If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider in priority order:
175
-
176
- | Priority | Provider | Default Model |
177
- |----------|----------|---------------|
178
- | 1 | Bedrock | `us.anthropic.claude-sonnet-4-6-v1` |
179
- | 2 | Anthropic | `claude-sonnet-4-6` |
180
- | 3 | OpenAI | `gpt-4o` |
181
- | 4 | Gemini | `gemini-2.0-flash` |
182
- | 5 | Azure | (endpoint-specific) |
183
- | 6 | Ollama | `qwen3.5:latest` |
184
- | 7 | vLLM | `qwen3.6-27b` |
185
- | 8 | MLX | (configured model) |
183
+ If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider. The detection order and default models are defined by each `lex-llm-*` provider extension's `default_settings`.
186
184
 
187
185
  ## Core API
188
186
 
data/legion-llm.gemspec CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_dependency 'legion-cache', '>= 1.4.2'
31
31
  spec.add_dependency 'legion-json', '>= 1.2.0'
32
32
  spec.add_dependency 'legion-logging', '>= 1.2.8'
33
- spec.add_dependency 'legion-settings', '>= 1.3.12'
33
+ spec.add_dependency 'legion-settings', '>= 1.4.0'
34
34
  spec.add_dependency 'lex-knowledge'
35
35
  spec.add_dependency 'lex-llm', '>= 0.1.6'
36
36
  spec.add_dependency 'pdf-reader'
@@ -10,8 +10,10 @@ require 'legion/llm/types'
10
10
  begin
11
11
  require 'legion/identity/request'
12
12
  require 'legion/identity/process'
13
- rescue LoadError
14
- # legion-llm can still be loaded outside a full LegionIO runtime.
13
+ rescue LoadError => e
14
+ Object.new.extend(Legion::Logging::Helper).handle_exception(
15
+ e, level: :debug, handled: true, operation: 'llm.api.native.helpers.optional_identity_require'
16
+ )
15
17
  end
16
18
 
17
19
  module Legion
@@ -334,7 +336,7 @@ module Legion
334
336
  text = content.key?(:text) || content.key?('text') ? (content[:text] || content['text']) : (content[:content] || content['content'])
335
337
  extract_text_content(text)
336
338
  else
337
- content.to_s
339
+ content.respond_to?(:text) ? content.text.to_s : content.to_s
338
340
  end
339
341
  end
340
342
 
@@ -16,14 +16,13 @@ module Legion
16
16
  log.debug('[llm][api][instances] action=list_instances')
17
17
  require_llm!
18
18
 
19
- offerings = Legion::LLM::Inventory.offerings
20
- instances = Legion::LLM::API::Native::Instances.instances_from_offerings(offerings)
19
+ instances = Legion::LLM::API::Native::Instances.registry_instances
21
20
 
22
21
  json_response({
23
22
  instances: instances,
24
23
  summary: {
25
24
  total: instances.size,
26
- providers: instances.map { |instance| instance[:provider_family] }.uniq.size
25
+ providers: instances.map { |inst| inst[:provider] }.uniq.size
27
26
  }
28
27
  })
29
28
  rescue StandardError => e
@@ -36,11 +35,16 @@ module Legion
36
35
  log.debug("[llm][api][instances] action=get_instance id=#{instance_id}")
37
36
  require_llm!
38
37
 
39
- offerings = Legion::LLM::Inventory.offerings(instance_id: instance_id)
40
- instance = Legion::LLM::API::Native::Instances.instance_from_offerings(instance_id, offerings)
41
- halt json_error('instance_not_found', "Instance '#{instance_id}' not found", status_code: 404) unless instance
38
+ result = Legion::LLM::API::Native::Instances.find_registry_instance(instance_id)
39
+ if result == :ambiguous
40
+ halt json_error('ambiguous_instance_id',
41
+ "Instance id '#{instance_id}' matches multiple providers; " \
42
+ 'use composite id (provider/instance) to disambiguate',
43
+ status_code: 400)
44
+ end
45
+ halt json_error('instance_not_found', "Instance '#{instance_id}' not found", status_code: 404) unless result
42
46
 
43
- json_response({ instance: instance })
47
+ json_response({ instance: result })
44
48
  rescue StandardError => e
45
49
  handle_exception(e, level: :error, handled: true, operation: 'llm.api.instances.get')
46
50
  json_error('instance_inventory_error', e.message, status_code: 500)
@@ -49,39 +53,32 @@ module Legion
49
53
  log.debug('[llm][api][instances] provider instance inventory routes registered')
50
54
  end
51
55
 
52
- def self.instances_from_offerings(offerings)
53
- instances = offerings.group_by { |offering| offering[:instance_id] }.filter_map do |instance_id, rows|
54
- instance_from_offerings(instance_id, rows)
56
+ def self.registry_instances
57
+ instances = []
58
+ Legion::LLM::Call::Registry.available.each do |provider_name|
59
+ Legion::LLM::Call::Registry.instances_for(provider_name).each_key do |inst_id|
60
+ instances << {
61
+ id: "#{provider_name}/#{inst_id}",
62
+ provider: provider_name.to_s,
63
+ instance: inst_id.to_s
64
+ }
65
+ end
55
66
  end
56
- instances.sort_by { |instance| instance[:instance_id] }
67
+ instances.sort_by { |inst| inst[:id] }
57
68
  end
58
69
 
59
- def self.instance_from_offerings(instance_id, offerings)
60
- rows = Array(offerings)
61
- return nil if rows.empty?
70
+ def self.find_registry_instance(instance_id)
71
+ # Try exact composite id match first (e.g. "ollama/local")
72
+ exact = registry_instances.find { |inst| inst[:id] == instance_id }
73
+ return exact if exact
62
74
 
63
- {
64
- instance_id: instance_id.to_s,
65
- provider_family: rows.map { |offering| offering[:provider_family] }.uniq.sort.join(','),
66
- tiers: rows.map { |offering| offering[:tier].to_s }.uniq.sort,
67
- transports: rows.map { |offering| offering[:transport].to_s }.uniq.sort,
68
- health: aggregate_health(rows),
69
- capacity: aggregate_capacity(rows),
70
- offerings: rows.sort_by { |offering| offering[:offering_id].to_s }
71
- }
72
- end
73
-
74
- def self.aggregate_health(offerings)
75
- states = offerings.filter_map { |offering| offering.dig(:health, :circuit_state) }.uniq.sort
76
- { circuit_states: states }
77
- end
75
+ # Fall back to bare instance name, but guard against ambiguity
76
+ matches = registry_instances.select { |inst| inst[:instance] == instance_id }
77
+ return matches.first if matches.size == 1
78
+ return :ambiguous if matches.size > 1
78
79
 
79
- def self.aggregate_capacity(offerings)
80
- {
81
- max_context_window: offerings.filter_map { |offering| offering.dig(:limits, :context_window) }.max,
82
- max_output_tokens: offerings.filter_map { |offering| offering.dig(:limits, :max_output_tokens) }.max,
83
- offering_count: offerings.size
84
- }.compact
80
+ # No match found
81
+ nil
85
82
  end
86
83
  end
87
84
  end
@@ -9,10 +9,6 @@ module Legion
9
9
  module Providers
10
10
  extend Legion::Logging::Helper
11
11
 
12
- SECRET_KEYS = %w[
13
- api_key secret_key bearer_token session_token auth_token authorization password
14
- ].freeze
15
-
16
12
  def self.registered(app)
17
13
  log.debug('[llm][api][providers] registering provider routes')
18
14
 
@@ -20,19 +16,15 @@ module Legion
20
16
  log.debug('[llm][api][providers] action=list_providers')
21
17
  require_llm!
22
18
 
23
- providers_config = Legion::LLM::API::Native::Providers.settings_value(:providers, {})
24
- provider_list = providers_config.filter_map do |name, config|
25
- next unless Legion::LLM::API::Native::Providers.enabled_provider_config?(config)
26
-
27
- provider_name = name.to_s
19
+ instances = begin
20
+ Legion::LLM::Call::Registry.all_instances
21
+ rescue StandardError => e
22
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
23
+ []
24
+ end
28
25
 
29
- {
30
- name: provider_name,
31
- enabled: true,
32
- default_model: Legion::LLM::API::Native::Providers.config_value(config, :default_model),
33
- health: Legion::LLM::API::Native::Providers.provider_health(provider_name),
34
- native: Legion::LLM::Call::Registry.registered?(provider_name.to_sym)
35
- }
26
+ provider_list = instances.map do |entry|
27
+ Legion::LLM::API::Native::Providers.instance_to_hash(entry)
36
28
  end
37
29
 
38
30
  summary = {
@@ -53,22 +45,30 @@ module Legion
53
45
  require_llm!
54
46
 
55
47
  provider_name = params[:name].to_s
56
- provider_config = Legion::LLM::API::Native::Providers.find_provider_config(provider_name)
48
+ provider_sym = provider_name.to_sym
49
+
50
+ instances = begin
51
+ Legion::LLM::Call::Registry.all_instances
52
+ rescue StandardError => e
53
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
54
+ []
55
+ end
56
+
57
+ family = instances.select { |entry| entry[:provider].to_sym == provider_sym }
57
58
 
58
- unless Legion::LLM::API::Native::Providers.enabled_provider_config?(provider_config)
59
+ unless family.any?
59
60
  log.debug("[llm][api][providers] action=not_found name=#{params[:name]}")
60
- halt json_error('provider_not_found', "Provider '#{params[:name]}' not found or disabled", status_code: 404)
61
+ halt json_error('provider_not_found',
62
+ "Provider '#{params[:name]}' not found",
63
+ status_code: 404)
61
64
  end
62
65
 
63
- log.debug("[llm][api][providers] action=found name=#{params[:name]}")
64
- json_response({
65
- name: provider_name,
66
- enabled: true,
67
- default_model: Legion::LLM::API::Native::Providers.config_value(provider_config, :default_model),
68
- health: Legion::LLM::API::Native::Providers.provider_health(provider_name),
69
- native: Legion::LLM::Call::Registry.registered?(provider_name.to_sym),
70
- config: Legion::LLM::API::Native::Providers.safe_config(provider_config)
71
- })
66
+ provider_list = family.map do |entry|
67
+ Legion::LLM::API::Native::Providers.instance_to_hash(entry)
68
+ end
69
+
70
+ log.debug("[llm][api][providers] action=found name=#{params[:name]} instances=#{provider_list.size}")
71
+ json_response({ provider: provider_name, instances: provider_list })
72
72
  rescue StandardError => e
73
73
  handle_exception(e, level: :error, handled: true, operation: 'llm.api.providers.get')
74
74
  json_error('provider_error', e.message, status_code: 500)
@@ -77,54 +77,30 @@ module Legion
77
77
  log.debug('[llm][api][providers] provider routes registered')
78
78
  end
79
79
 
80
- def self.find_provider_config(name)
81
- providers = settings_value(:providers, {})
82
- providers[name.to_sym] || providers[name.to_s]
83
- end
84
-
85
- def self.enabled_provider_config?(config)
86
- config.is_a?(Hash) && config_value(config, :enabled, true) != false
87
- end
88
-
89
- def self.config_value(config, key, default = nil)
90
- return default unless config.respond_to?(:key?)
91
-
92
- string_key = key.to_s
93
- return config[string_key] if config.key?(string_key)
94
-
95
- config.key?(key) ? config[key] : default
96
- end
97
-
98
- def self.settings_value(key, default = nil)
99
- Legion::LLM::Settings.value(key, default: default)
100
- end
101
-
102
- def self.safe_config(config)
103
- config.each_with_object({}) do |(key, value), safe|
104
- safe[key] = redact_value(value) unless SECRET_KEYS.include?(key.to_s)
105
- end
106
- end
107
-
108
- def self.redact_value(value)
109
- case value
110
- when Hash
111
- safe_config(value)
112
- when Array
113
- value.map { |entry| redact_value(entry) }
114
- else
115
- value
116
- end
117
- end
118
-
119
- def self.provider_health(name)
120
- if Legion::LLM::Router.routing_enabled?
121
- tracker = Legion::LLM::Router.health_tracker
122
- provider_key = name.to_sym
123
- { circuit_state: tracker.circuit_state(provider_key).to_s,
124
- adjustment: tracker.adjustment(provider_key) }
125
- else
126
- { circuit_state: 'unknown' }
80
+ def self.instance_to_hash(entry)
81
+ health = begin
82
+ Legion::LLM::Router.health_tracker
83
+ rescue StandardError
84
+ nil
127
85
  end
86
+ provider_key = entry[:provider].to_sym
87
+ instance_key = entry[:instance].to_sym
88
+
89
+ {
90
+ provider: entry[:provider].to_s,
91
+ instance: entry[:instance].to_s,
92
+ tier: entry.dig(:metadata, :tier)&.to_s,
93
+ capabilities: Array(entry.dig(:metadata, :capabilities)).map(&:to_s),
94
+ health: if health
95
+ {
96
+ circuit_state: health.circuit_state(provider_key, instance: instance_key).to_s,
97
+ adjustment: health.adjustment(provider_key, instance: instance_key)
98
+ }
99
+ else
100
+ {}
101
+ end,
102
+ native: true
103
+ }
128
104
  end
129
105
  end
130
106
  end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/logging/helper'
4
+
5
+ module Legion
6
+ module LLM
7
+ module API
8
+ module Native
9
+ module Routing
10
+ extend Legion::Logging::Helper
11
+
12
+ def self.registered(app)
13
+ log.debug('[llm][api][routing] registering routing routes')
14
+
15
+ app.get '/api/llm/routing' do
16
+ log.debug('[llm][api][routing] action=list_rules')
17
+ require_llm!
18
+
19
+ rules = Legion::LLM::Router.send(:load_rules)
20
+ auto_count = rules.count { |r| r.name.to_s.start_with?('auto:') }
21
+ manual_count = rules.size - auto_count
22
+
23
+ rule_list = rules.map do |rule|
24
+ {
25
+ name: rule.name,
26
+ priority: rule.priority,
27
+ conditions: rule.conditions,
28
+ target: rule.target,
29
+ constraint: rule.constraint,
30
+ auto: rule.name.to_s.start_with?('auto:')
31
+ }.compact
32
+ end
33
+
34
+ json_response({
35
+ routing_enabled: Legion::LLM::Router.routing_enabled?,
36
+ auto_rules_populated: Legion::LLM::Router.auto_rules_populated?,
37
+ rules: rule_list,
38
+ summary: {
39
+ total: rules.size,
40
+ auto: auto_count,
41
+ manual: manual_count
42
+ }
43
+ })
44
+ rescue StandardError => e
45
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.routing.list')
46
+ json_error('routing_error', e.message, status_code: 500)
47
+ end
48
+
49
+ log.debug('[llm][api][routing] routing routes registered')
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -8,6 +8,7 @@ require_relative 'api/native/providers'
8
8
  require_relative 'api/native/models'
9
9
  require_relative 'api/native/offerings'
10
10
  require_relative 'api/native/instances'
11
+ require_relative 'api/native/routing'
11
12
  require_relative 'api/translators/openai_request'
12
13
  require_relative 'api/translators/openai_response'
13
14
  require_relative 'api/openai/chat_completions'
@@ -34,6 +35,7 @@ module Legion
34
35
  Native::Models.registered(app)
35
36
  Native::Offerings.registered(app)
36
37
  Native::Instances.registered(app)
38
+ Native::Routing.registered(app)
37
39
  OpenAI::ChatCompletions.registered(app)
38
40
  OpenAI::Models.registered(app)
39
41
  OpenAI::Embeddings.registered(app)
@@ -135,7 +135,8 @@ module Legion
135
135
 
136
136
  private_class_method def self.local_cache_backend?
137
137
  respond_to?(:local_cache_connected?) && local_cache_connected?
138
- rescue StandardError
138
+ rescue StandardError => e
139
+ log.debug("[llm][cache][response] action=local_cache_backend error=#{e.class} message=#{e.message}")
139
140
  false
140
141
  end
141
142