RubyGems - legion-llm - Versions diffs - 0.8.47 → 0.8.49 - Mend

legion-llm 0.8.47 → 0.8.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/CHANGELOG.md +18 -0
data/CLAUDE.md +20 -17
data/Gemfile +3 -0
data/README.md +46 -48
data/legion-llm.gemspec +1 -1
data/lib/legion/llm/api/native/helpers.rb +5 -3
data/lib/legion/llm/api/native/instances.rb +32 -35
data/lib/legion/llm/api/native/providers.rb +51 -75
data/lib/legion/llm/api/native/routing.rb +55 -0
data/lib/legion/llm/api.rb +2 -0
data/lib/legion/llm/cache/response.rb +2 -1
data/lib/legion/llm/cache.rb +4 -2
data/lib/legion/llm/call/daemon_client.rb +2 -2
data/lib/legion/llm/call/dispatch.rb +88 -49
data/lib/legion/llm/call/embeddings.rb +113 -503
data/lib/legion/llm/call/lex_llm_adapter.rb +43 -15
data/lib/legion/llm/call/providers.rb +22 -694
data/lib/legion/llm/call/registry.rb +94 -10
data/lib/legion/llm/call/structured_output.rb +1 -1
data/lib/legion/llm/call.rb +0 -2
data/lib/legion/llm/compat.rb +2 -7
data/lib/legion/llm/config.rb +11 -1
data/lib/legion/llm/context/curator.rb +2 -1
data/lib/legion/llm/discovery/memory_gate.rb +53 -0
data/lib/legion/llm/discovery/rule_generator.rb +147 -0
data/lib/legion/llm/discovery.rb +201 -50
data/lib/legion/llm/fleet/dispatcher.rb +4 -1
data/lib/legion/llm/fleet/handler.rb +6 -2
data/lib/legion/llm/fleet/lane.rb +1 -1
data/lib/legion/llm/fleet/reply_dispatcher.rb +1 -6
data/lib/legion/llm/hooks.rb +2 -2
data/lib/legion/llm/inference/conversation.rb +3 -2
data/lib/legion/llm/inference/executor.rb +172 -113
data/lib/legion/llm/inference/prompt.rb +0 -12
data/lib/legion/llm/inference/steps/classification.rb +4 -13
data/lib/legion/llm/inference/steps/debate.rb +19 -22
data/lib/legion/llm/inference/steps/gaia_advisory.rb +2 -2
data/lib/legion/llm/inference/steps/prompt_cache.rb +1 -13
data/lib/legion/llm/inference/steps/rag_context.rb +8 -20
data/lib/legion/llm/inference/steps/skill_injector.rb +1 -13
data/lib/legion/llm/inference/steps/sticky_helpers.rb +1 -13
data/lib/legion/llm/inference/steps/sticky_persist.rb +29 -8
data/lib/legion/llm/inference/steps/sticky_runners.rb +6 -6
data/lib/legion/llm/inference/steps/tool_discovery.rb +27 -13
data/lib/legion/llm/inference/steps/trigger_match.rb +3 -16
data/lib/legion/llm/inference.rb +6 -22
data/lib/legion/llm/inventory.rb +32 -38
data/lib/legion/llm/quality/checker.rb +2 -2
data/lib/legion/llm/quality/confidence/scorer.rb +3 -2
data/lib/legion/llm/router/health_tracker.rb +144 -57
data/lib/legion/llm/router/resolution.rb +9 -3
data/lib/legion/llm/router/rule.rb +6 -3
data/lib/legion/llm/router.rb +188 -57
data/lib/legion/llm/scheduling.rb +2 -2
data/lib/legion/llm/settings.rb +11 -66
data/lib/legion/llm/skills/base.rb +4 -1
data/lib/legion/llm/tools/confidence.rb +8 -6
data/lib/legion/llm/tools/dispatcher.rb +27 -43
data/lib/legion/llm/transport/message.rb +30 -1
data/lib/legion/llm/types/tool_definition.rb +17 -0
data/lib/legion/llm/version.rb +1 -1
data/lib/legion/llm.rb +6 -4
metadata +6 -7
data/lib/legion/llm/call/claude_config_loader.rb +0 -182
data/lib/legion/llm/call/codex_config_loader.rb +0 -137
data/lib/legion/llm/discovery/ollama.rb +0 -116
data/lib/legion/llm/discovery/vllm.rb +0 -134

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 758dae0c2bb2fba09c683bbcbf29459301ffdfe0239ff37214952e8c334422b0
-  data.tar.gz: 841c7d0ae47d825431601edb635bde28f1f7b60f938a008abe1c8d6f2ac5772d
+  metadata.gz: 88524bf330ed22e6ac366d35ddccdbad4557b115c447d87d238ab2670ca8da7e
+  data.tar.gz: 68c0470f4c10ee885bd28d0486f487777ae3c9dc35ab0fea3cc4ab860c68a0c6
 SHA512:
-  metadata.gz: 7a14e57a3a2bd88ac05cb83608e64461b710aed5370eb0686830dc705a80019eb7de351279204eb96429e06da167420c744d8ae79518dbeec6d7a69fcec14c7a
-  data.tar.gz: d442eeee484806be5f01dd13e212b155d8cb8d839cb50a0ddd9517d013a49675320936927ded888aa4c617ee8d282624dc93b69b814599c097ee6078a3e4409c
+  metadata.gz: ba9ad2293c9e65db838aca3921ba291ef3bfd71798fc044bfdcdb90f3a04e0536fa5c562f00ea3e06eda88c41ee1659d23ef46382c69424940b4ff058f5cd978
+  data.tar.gz: c7feaf54a2a618eb5d45741269ba65d855bb478142a3b90702d29de9d23f3bbceadca26b5eb765bedf6c2476b97901dc59cc597242051d9500df9733174532f4

data/.gitignore CHANGED Viewed

@@ -21,3 +21,6 @@ legion.log
 .worktrees/
 .claude/
 docs/
+bin/apollo-setup-postreboot.sh
+bin/apollo-setup-prereboot.sh
+legionio-bootstrap-uhg-v3.json

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,23 @@
 # Legion LLM Changelog
+## [0.8.49] - 2026-04-29
+### Changed
+- `Settings.register_defaults!` now calls `Legion::Settings.register_library` instead of `merge_settings`, using the idempotent legion-settings 1.4.0 API that prevents double-registration.
+- Bumped `legion-settings` dependency floor to `>= 1.4.0`.
+- Test stub `Legion::Settings` now exposes `register_library` matching the real 1.4.0 API.
+## [0.8.48] - 2026-04-29
+### Added
+- `ToolDefinition.from_registry_entry` builds tool definitions from `Legion::Settings::Extensions` registry entries.
+- `Dispatcher` checks `Settings::Extensions` for tool override resolution; when no matching entry is found it falls back to settings-based MCP overrides (no `Tools::Registry` or `Catalog::Registry` fallback).
+- `Executor#add_registry_tool_definitions` reads from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry` for backward compatibility.
+- `Steps::ToolDiscovery` discovers tools from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry`.
+### Changed
+- Bumped `legion-settings` dependency floor to `>= 1.4.0` (requires `Settings::Extensions` module).
 ## [0.8.47] - 2026-04-29
 ### Fixed

data/CLAUDE.md CHANGED Viewed

@@ -8,7 +8,7 @@
 Core LegionIO gem providing LLM capabilities to all extensions through Legion-native provider dispatch. Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
 **GitHub**: https://github.com/LegionIO/legion-llm
-**Version**: 0.8.0
+**Version**: 0.8.49
 **License**: Apache-2.0
 ## Architecture
@@ -61,7 +61,10 @@ Legion::LLM (lib/legion/llm.rb)          # Thin facade — delegates to Inferenc
 │   ├── Compressor   # Deterministic prompt compression (3 levels, code-block-aware)
 │   └── Curator      # Async conversation curation: strip thinking, distill tools, fold resolved exchanges
 ├── Discovery                            # Runtime introspection
-│   ├── Ollama       # Queries Ollama /api/tags for pulled models (TTL-cached)
+│   ├── Ollama       # Multi-instance Ollama /api/tags + /api/show discovery (TTL-cached)
+│   ├── Vllm         # Multi-instance vLLM /v1/models + /health discovery (TTL-cached)
+│   ├── RuleGenerator # Auto-generates routing rules from discovered instances/models
+│   ├── MemoryGate   # Checks available RAM before routing to local models
 │   └── System       # Queries OS memory: macOS (vm_stat/sysctl), Linux (/proc/meminfo)
 ├── Quality                              # Response quality evaluation
 │   ├── Checker      # Quality heuristics (empty, too_short, repetition, json_parse) + pluggable (was QualityChecker)
@@ -364,24 +367,21 @@ Settings read from `Legion::Settings[:llm]`:
 ### Provider Settings
-Each provider has: `enabled`, `api_key`, `vault_path`, plus provider-specific keys.
+Provider defaults now live in each `lex-llm-*` provider extension's `default_settings`. The `providers:` key in `Settings.default` ships as an empty hash; settings files and extension registrations populate it at runtime. Each provider has: `enabled`, `api_key`, plus provider-specific keys.
-Vault credential resolution: When `vault_path` is set and Legion::Crypt::Vault is connected, credentials are fetched from Vault at startup. Keys map to provider-specific fields automatically.
+Local/fleet providers (Ollama, vLLM, MLX) support multi-instance configs via an `instances:` hash. Discovery scans all instances in parallel, enriches models with real capability metadata, and generates per-instance routing rules.
-Bedrock supports two auth modes:
-- **SigV4** (default): `api_key` + `secret_key` (+ optional `session_token`)
-- **Bearer token**: `bearer_token` for AWS Identity Center/SSO. Native Bedrock providers consume it through lex-llm configuration.
+### Capability-Aware Routing
-### Auto-Detection Priority
+Routing rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The `RuleGenerator` creates rules from discovered instances without a static capability map -- each provider supplies real metadata.
+### Memory Gate
-When no defaults are configured, the first enabled provider is used:
+`Discovery::MemoryGate` checks available system memory before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
+### Auto-Detection Priority
-1. Bedrock -> `us.anthropic.claude-sonnet-4-6-v1`
-2. Anthropic -> `claude-sonnet-4-6`
-3. OpenAI -> `gpt-4o`
-4. Gemini -> `gemini-2.0-flash`
-5. Azure -> (endpoint-specific, from `api_base`)
-6. Ollama -> `llama3`
+When no defaults are configured, the first enabled provider is used. Detection order and default models are defined by each `lex-llm-*` provider extension.
 ### Routing Settings
@@ -501,7 +501,10 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
 | `lib/legion/llm/context/compressor.rb` | Deterministic prompt compression: 3 levels, code-block-aware, stopword removal |
 | `lib/legion/llm/context/curator.rb` | Async heuristic conversation curation (was ContextCurator) |
 | `lib/legion/llm/discovery.rb` | Discovery entry point: run, detect_embedding_capability, can_embed? |
-| `lib/legion/llm/discovery/ollama.rb` | Ollama /api/tags discovery with TTL cache |
+| `lib/legion/llm/discovery/ollama.rb` | Multi-instance Ollama /api/tags + /api/show discovery with TTL cache |
+| `lib/legion/llm/discovery/vllm.rb` | Multi-instance vLLM /v1/models + /health discovery with TTL cache |
+| `lib/legion/llm/discovery/rule_generator.rb` | Auto-generates routing rules from discovered instances/models |
+| `lib/legion/llm/discovery/memory_gate.rb` | Checks available RAM vs model size before routing to local models |
 | `lib/legion/llm/discovery/system.rb` | OS memory introspection (macOS + Linux) with TTL cache |
 | `lib/legion/llm/quality.rb` | Quality entry point |
 | `lib/legion/llm/quality/checker.rb` | Quality heuristics + pluggable callable (was QualityChecker) |
@@ -715,7 +718,7 @@ The legacy `vault_path` per-provider setting was removed in v0.3.1.
 Tests run without the full LegionIO stack. `spec/spec_helper.rb` uses real `Legion::Logging` and `Legion::Settings` (no stubs — hard dependencies are always present). Each test resets settings to defaults via `before(:each)`.
 ```bash
-bundle exec rspec    # 1661 examples, 0 failures
+bundle exec rspec    # 2379 examples, 0 failures
 bundle exec rubocop  # 0 offenses
 ```

data/Gemfile CHANGED Viewed

@@ -4,6 +4,9 @@ source 'https://rubygems.org'
 gemspec
+legion_settings_path = File.expand_path('../legion-settings', __dir__)
+gem 'legion-settings', path: legion_settings_path if Dir.exist?(legion_settings_path)
 group :test do
   lex_llm_path = File.expand_path('../extensions-ai/lex-llm', __dir__)
   if Dir.exist?(lex_llm_path)

data/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 LLM routing and provider orchestration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Routes chat, embeddings, tool use, fleet dispatch, auditing, and provider metadata through Legion-native `lex-llm-*` provider extensions.
-**Version**: 0.8.47
+**Version**: 0.8.49
 ## Installation
@@ -90,6 +90,8 @@ When enabled, validates `Authorization: Bearer <token>` or `x-api-key` headers a
 ## Configuration
+Provider defaults now live in each `lex-llm-*` provider extension. `legion-llm` ships an empty `providers: {}` hash; settings files and extension registrations populate it at runtime.
 Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
 ```json
@@ -103,53 +105,60 @@ Add to your LegionIO settings directory (e.g. `~/.legionio/settings/llm.json`):
         "region": "us-east-2",
         "bearer_token": ["vault://secret/data/llm/bedrock#bearer_token", "env://AWS_BEARER_TOKEN"]
       },
-      "anthropic": {
-        "enabled": false,
-        "api_key": "env://ANTHROPIC_API_KEY"
-      },
-      "openai": {
-        "enabled": false,
-        "api_key": "env://OPENAI_API_KEY"
-      },
       "ollama": {
-        "enabled": false,
-        "base_url": "http://localhost:11434"
-      },
-      "vllm": {
-        "enabled": false,
-        "base_url": "http://localhost:8000/v1",
-        "default_model": "qwen3.6-27b",
-        "enable_thinking": true
-      },
-      "mlx": {
-        "enabled": false,
-        "base_url": "http://localhost:8000"
+        "enabled": true,
+        "base_url": "http://localhost:11434",
+        "instances": {
+          "default": { "base_url": "http://localhost:11434" },
+          "gpu_server": { "base_url": "http://gpu-server:11434" }
+        }
       }
     }
   }
 }
 ```
-Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains — the first non-nil result wins.
+Credentials are resolved automatically by the universal secret resolver in `legion-settings` (v1.3.0+). Use `vault://` URIs for Vault secrets, `env://` for environment variables, or plain strings for static values. Array values act as fallback chains -- the first non-nil result wins.
+### Provider Extensions (lex-llm-*)
+Each provider is a standalone `lex-llm-*` gem that ships its own `default_settings`, model catalog, and capability declarations. The provider registers itself with `legion-llm` at load time. Provider gems implement:
+- **`default_settings`** -- Connection defaults (base_url, region, API key env vars)
+- **`model_allowed?(model_name)`** -- Provider-level model filtering
+- **`Model::Info`** -- Real capabilities, context lengths, and parameter counts for each model
+The routing layer only sees models the provider has already filtered and annotated.
+### Multi-Instance Providers
+Local and fleet providers (Ollama, vLLM, MLX) support multiple named instances:
+```json
+{
+  "ollama": {
+    "enabled": true,
+    "instances": {
+      "macbook":    { "base_url": "http://localhost:11434" },
+      "gpu_server": { "base_url": "http://gpu-server:11434" }
+    }
+  }
+}
+```
+Discovery scans all instances in parallel, enriches models with `/api/show` metadata, and generates per-instance routing rules. Each instance appears independently in the routing table so the router can target the exact hardware.
+### Capability-Aware Routing
-### Provider Configuration
+Routing rules and auto-generated rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The router uses these to match capability requirements (e.g., `thinking`, `vision`, `tools`) without a static lookup table.
-Each provider supports these common fields:
+### Generic Dispatch
-| Field | Type | Description |
-|-------|------|-------------|
-| `enabled` | Boolean | Enable this provider (default: `false`) |
-| `api_key` | String | API key (supports `vault://`, `env://`, or plain string) |
+`Call::Dispatch.call` accepts a `capability:` parameter (`:chat`, `:stream`, `:embed`) and routes to the registered `lex-llm-*` adapter. This replaces the old provider-specific dispatch paths.
-Provider-specific fields:
+### Memory Gate
-| Provider | Additional Fields |
-|----------|------------------|
-| **Bedrock** | `secret_key`, `session_token`, `region` (default: `us-east-2`), `bearer_token` (alternative to SigV4 — for AWS Identity Center/SSO) |
-| **Azure** | `api_base` (Azure OpenAI endpoint URL, required), `auth_token` (bearer token alternative to `api_key`) |
-| **Ollama** | `base_url` (default: `http://localhost:11434`) |
-| **vLLM** | `base_url` (default: `http://localhost:8000/v1`), `api_key`, `enable_thinking` |
-| **MLX** | `base_url` (default: `http://localhost:8000`), `api_key` |
+Discovery checks available system memory (macOS `vm_stat`/`sysctl`, Linux `/proc/meminfo`) before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
 ### Credential Resolution
@@ -171,18 +180,7 @@ By the time `Legion::LLM.start` runs, all `vault://` and `env://` references hav
 ### Auto-Detection
-If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider in priority order:
-| Priority | Provider | Default Model |
-|----------|----------|---------------|
-| 1 | Bedrock | `us.anthropic.claude-sonnet-4-6-v1` |
-| 2 | Anthropic | `claude-sonnet-4-6` |
-| 3 | OpenAI | `gpt-4o` |
-| 4 | Gemini | `gemini-2.0-flash` |
-| 5 | Azure | (endpoint-specific) |
-| 6 | Ollama | `qwen3.5:latest` |
-| 7 | vLLM | `qwen3.6-27b` |
-| 8 | MLX | (configured model) |
+If no `default_model` or `default_provider` is set, legion-llm auto-detects from the first enabled provider. The detection order and default models are defined by each `lex-llm-*` provider extension's `default_settings`.
 ## Core API

data/legion-llm.gemspec CHANGED Viewed

@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'legion-cache', '>= 1.4.2'
   spec.add_dependency 'legion-json', '>= 1.2.0'
   spec.add_dependency 'legion-logging', '>= 1.2.8'
-  spec.add_dependency 'legion-settings', '>= 1.3.12'
+  spec.add_dependency 'legion-settings', '>= 1.4.0'
   spec.add_dependency 'lex-knowledge'
   spec.add_dependency 'lex-llm', '>= 0.1.6'
   spec.add_dependency 'pdf-reader'

data/lib/legion/llm/api/native/helpers.rb CHANGED Viewed

@@ -10,8 +10,10 @@ require 'legion/llm/types'
 begin
   require 'legion/identity/request'
   require 'legion/identity/process'
-rescue LoadError
-  # legion-llm can still be loaded outside a full LegionIO runtime.
+rescue LoadError => e
+  Object.new.extend(Legion::Logging::Helper).handle_exception(
+    e, level: :debug, handled: true, operation: 'llm.api.native.helpers.optional_identity_require'
+  )
 end
 module Legion
@@ -334,7 +336,7 @@ module Legion
                   text = content.key?(:text) || content.key?('text') ? (content[:text] || content['text']) : (content[:content] || content['content'])
                   extract_text_content(text)
                 else
-                  content.to_s
+                  content.respond_to?(:text) ? content.text.to_s : content.to_s
                 end
               end

data/lib/legion/llm/api/native/instances.rb CHANGED Viewed

@@ -16,14 +16,13 @@ module Legion
               log.debug('[llm][api][instances] action=list_instances')
               require_llm!
-              offerings = Legion::LLM::Inventory.offerings
-              instances = Legion::LLM::API::Native::Instances.instances_from_offerings(offerings)
+              instances = Legion::LLM::API::Native::Instances.registry_instances
               json_response({
                               instances: instances,
                               summary:   {
                                 total:     instances.size,
-                                providers: instances.map { |instance| instance[:provider_family] }.uniq.size
+                                providers: instances.map { |inst| inst[:provider] }.uniq.size
                               }
                             })
             rescue StandardError => e
@@ -36,11 +35,16 @@ module Legion
               log.debug("[llm][api][instances] action=get_instance id=#{instance_id}")
               require_llm!
-              offerings = Legion::LLM::Inventory.offerings(instance_id: instance_id)
-              instance = Legion::LLM::API::Native::Instances.instance_from_offerings(instance_id, offerings)
-              halt json_error('instance_not_found', "Instance '#{instance_id}' not found", status_code: 404) unless instance
+              result = Legion::LLM::API::Native::Instances.find_registry_instance(instance_id)
+              if result == :ambiguous
+                halt json_error('ambiguous_instance_id',
+                                "Instance id '#{instance_id}' matches multiple providers; " \
+                                'use composite id (provider/instance) to disambiguate',
+                                status_code: 400)
+              end
+              halt json_error('instance_not_found', "Instance '#{instance_id}' not found", status_code: 404) unless result
-              json_response({ instance: instance })
+              json_response({ instance: result })
             rescue StandardError => e
               handle_exception(e, level: :error, handled: true, operation: 'llm.api.instances.get')
               json_error('instance_inventory_error', e.message, status_code: 500)
@@ -49,39 +53,32 @@ module Legion
             log.debug('[llm][api][instances] provider instance inventory routes registered')
           end
-          def self.instances_from_offerings(offerings)
-            instances = offerings.group_by { |offering| offering[:instance_id] }.filter_map do |instance_id, rows|
-              instance_from_offerings(instance_id, rows)
+          def self.registry_instances
+            instances = []
+            Legion::LLM::Call::Registry.available.each do |provider_name|
+              Legion::LLM::Call::Registry.instances_for(provider_name).each_key do |inst_id|
+                instances << {
+                  id:       "#{provider_name}/#{inst_id}",
+                  provider: provider_name.to_s,
+                  instance: inst_id.to_s
+                }
+              end
             end
-            instances.sort_by { |instance| instance[:instance_id] }
+            instances.sort_by { |inst| inst[:id] }
           end
-          def self.instance_from_offerings(instance_id, offerings)
-            rows = Array(offerings)
-            return nil if rows.empty?
+          def self.find_registry_instance(instance_id)
+            # Try exact composite id match first (e.g. "ollama/local")
+            exact = registry_instances.find { |inst| inst[:id] == instance_id }
+            return exact if exact
-            {
-              instance_id:     instance_id.to_s,
-              provider_family: rows.map { |offering| offering[:provider_family] }.uniq.sort.join(','),
-              tiers:           rows.map { |offering| offering[:tier].to_s }.uniq.sort,
-              transports:      rows.map { |offering| offering[:transport].to_s }.uniq.sort,
-              health:          aggregate_health(rows),
-              capacity:        aggregate_capacity(rows),
-              offerings:       rows.sort_by { |offering| offering[:offering_id].to_s }
-            }
-          end
-          def self.aggregate_health(offerings)
-            states = offerings.filter_map { |offering| offering.dig(:health, :circuit_state) }.uniq.sort
-            { circuit_states: states }
-          end
+            # Fall back to bare instance name, but guard against ambiguity
+            matches = registry_instances.select { |inst| inst[:instance] == instance_id }
+            return matches.first if matches.size == 1
+            return :ambiguous if matches.size > 1
-          def self.aggregate_capacity(offerings)
-            {
-              max_context_window: offerings.filter_map { |offering| offering.dig(:limits, :context_window) }.max,
-              max_output_tokens:  offerings.filter_map { |offering| offering.dig(:limits, :max_output_tokens) }.max,
-              offering_count:     offerings.size
-            }.compact
+            # No match found
+            nil
           end
         end
       end

data/lib/legion/llm/api/native/providers.rb CHANGED Viewed

@@ -9,10 +9,6 @@ module Legion
         module Providers
           extend Legion::Logging::Helper
-          SECRET_KEYS = %w[
-            api_key secret_key bearer_token session_token auth_token authorization password
-          ].freeze
           def self.registered(app)
             log.debug('[llm][api][providers] registering provider routes')
@@ -20,19 +16,15 @@ module Legion
               log.debug('[llm][api][providers] action=list_providers')
               require_llm!
-              providers_config = Legion::LLM::API::Native::Providers.settings_value(:providers, {})
-              provider_list = providers_config.filter_map do |name, config|
-                next unless Legion::LLM::API::Native::Providers.enabled_provider_config?(config)
-                provider_name = name.to_s
+              instances = begin
+                Legion::LLM::Call::Registry.all_instances
+              rescue StandardError => e
+                handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
+                []
+              end
-                {
-                  name:          provider_name,
-                  enabled:       true,
-                  default_model: Legion::LLM::API::Native::Providers.config_value(config, :default_model),
-                  health:        Legion::LLM::API::Native::Providers.provider_health(provider_name),
-                  native:        Legion::LLM::Call::Registry.registered?(provider_name.to_sym)
-                }
+              provider_list = instances.map do |entry|
+                Legion::LLM::API::Native::Providers.instance_to_hash(entry)
               end
               summary = {
@@ -53,22 +45,30 @@ module Legion
               require_llm!
               provider_name = params[:name].to_s
-              provider_config = Legion::LLM::API::Native::Providers.find_provider_config(provider_name)
+              provider_sym = provider_name.to_sym
+              instances = begin
+                Legion::LLM::Call::Registry.all_instances
+              rescue StandardError => e
+                handle_exception(e, level: :warn, handled: true, operation: 'llm.api.providers.registry_read')
+                []
+              end
+              family = instances.select { |entry| entry[:provider].to_sym == provider_sym }
-              unless Legion::LLM::API::Native::Providers.enabled_provider_config?(provider_config)
+              unless family.any?
                 log.debug("[llm][api][providers] action=not_found name=#{params[:name]}")
-                halt json_error('provider_not_found', "Provider '#{params[:name]}' not found or disabled", status_code: 404)
+                halt json_error('provider_not_found',
+                                "Provider '#{params[:name]}' not found",
+                                status_code: 404)
               end
-              log.debug("[llm][api][providers] action=found name=#{params[:name]}")
-              json_response({
-                              name:          provider_name,
-                              enabled:       true,
-                              default_model: Legion::LLM::API::Native::Providers.config_value(provider_config, :default_model),
-                              health:        Legion::LLM::API::Native::Providers.provider_health(provider_name),
-                              native:        Legion::LLM::Call::Registry.registered?(provider_name.to_sym),
-                              config:        Legion::LLM::API::Native::Providers.safe_config(provider_config)
-                            })
+              provider_list = family.map do |entry|
+                Legion::LLM::API::Native::Providers.instance_to_hash(entry)
+              end
+              log.debug("[llm][api][providers] action=found name=#{params[:name]} instances=#{provider_list.size}")
+              json_response({ provider: provider_name, instances: provider_list })
             rescue StandardError => e
               handle_exception(e, level: :error, handled: true, operation: 'llm.api.providers.get')
               json_error('provider_error', e.message, status_code: 500)
@@ -77,54 +77,30 @@ module Legion
             log.debug('[llm][api][providers] provider routes registered')
           end
-          def self.find_provider_config(name)
-            providers = settings_value(:providers, {})
-            providers[name.to_sym] || providers[name.to_s]
-          end
-          def self.enabled_provider_config?(config)
-            config.is_a?(Hash) && config_value(config, :enabled, true) != false
-          end
-          def self.config_value(config, key, default = nil)
-            return default unless config.respond_to?(:key?)
-            string_key = key.to_s
-            return config[string_key] if config.key?(string_key)
-            config.key?(key) ? config[key] : default
-          end
-          def self.settings_value(key, default = nil)
-            Legion::LLM::Settings.value(key, default: default)
-          end
-          def self.safe_config(config)
-            config.each_with_object({}) do |(key, value), safe|
-              safe[key] = redact_value(value) unless SECRET_KEYS.include?(key.to_s)
-            end
-          end
-          def self.redact_value(value)
-            case value
-            when Hash
-              safe_config(value)
-            when Array
-              value.map { |entry| redact_value(entry) }
-            else
-              value
-            end
-          end
-          def self.provider_health(name)
-            if Legion::LLM::Router.routing_enabled?
-              tracker = Legion::LLM::Router.health_tracker
-              provider_key = name.to_sym
-              { circuit_state: tracker.circuit_state(provider_key).to_s,
-                adjustment:    tracker.adjustment(provider_key) }
-            else
-              { circuit_state: 'unknown' }
+          def self.instance_to_hash(entry)
+            health = begin
+              Legion::LLM::Router.health_tracker
+            rescue StandardError
+              nil
             end
+            provider_key = entry[:provider].to_sym
+            instance_key = entry[:instance].to_sym
+            {
+              provider:     entry[:provider].to_s,
+              instance:     entry[:instance].to_s,
+              tier:         entry.dig(:metadata, :tier)&.to_s,
+              capabilities: Array(entry.dig(:metadata, :capabilities)).map(&:to_s),
+              health:       if health
+                              {
+                                circuit_state: health.circuit_state(provider_key, instance: instance_key).to_s,
+                                adjustment:    health.adjustment(provider_key, instance: instance_key)
+                              }
+                            else
+                              {}
+                            end,
+              native:       true
+            }
           end
         end
       end

data/lib/legion/llm/api/native/routing.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+require 'legion/logging/helper'
+module Legion
+  module LLM
+    module API
+      module Native
+        module Routing
+          extend Legion::Logging::Helper
+          def self.registered(app)
+            log.debug('[llm][api][routing] registering routing routes')
+            app.get '/api/llm/routing' do
+              log.debug('[llm][api][routing] action=list_rules')
+              require_llm!
+              rules = Legion::LLM::Router.send(:load_rules)
+              auto_count = rules.count { |r| r.name.to_s.start_with?('auto:') }
+              manual_count = rules.size - auto_count
+              rule_list = rules.map do |rule|
+                {
+                  name:       rule.name,
+                  priority:   rule.priority,
+                  conditions: rule.conditions,
+                  target:     rule.target,
+                  constraint: rule.constraint,
+                  auto:       rule.name.to_s.start_with?('auto:')
+                }.compact
+              end
+              json_response({
+                              routing_enabled:      Legion::LLM::Router.routing_enabled?,
+                              auto_rules_populated: Legion::LLM::Router.auto_rules_populated?,
+                              rules:                rule_list,
+                              summary:              {
+                                total:  rules.size,
+                                auto:   auto_count,
+                                manual: manual_count
+                              }
+                            })
+            rescue StandardError => e
+              handle_exception(e, level: :error, handled: true, operation: 'llm.api.routing.list')
+              json_error('routing_error', e.message, status_code: 500)
+            end
+            log.debug('[llm][api][routing] routing routes registered')
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/llm/api.rb CHANGED Viewed

@@ -8,6 +8,7 @@ require_relative 'api/native/providers'
 require_relative 'api/native/models'
 require_relative 'api/native/offerings'
 require_relative 'api/native/instances'
+require_relative 'api/native/routing'
 require_relative 'api/translators/openai_request'
 require_relative 'api/translators/openai_response'
 require_relative 'api/openai/chat_completions'
@@ -34,6 +35,7 @@ module Legion
         Native::Models.registered(app)
         Native::Offerings.registered(app)
         Native::Instances.registered(app)
+        Native::Routing.registered(app)
         OpenAI::ChatCompletions.registered(app)
         OpenAI::Models.registered(app)
         OpenAI::Embeddings.registered(app)

data/lib/legion/llm/cache/response.rb CHANGED Viewed

@@ -135,7 +135,8 @@ module Legion
         private_class_method def self.local_cache_backend?
           respond_to?(:local_cache_connected?) && local_cache_connected?
-        rescue StandardError
+        rescue StandardError => e
+          log.debug("[llm][cache][response] action=local_cache_backend error=#{e.class} message=#{e.message}")
           false
         end