RubyGems - legion-llm - Versions diffs - 0.10.2 → 0.11.2 - Mend

legion-llm 0.10.2 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +120 -0
data/Gemfile +3 -1
data/lib/legion/llm/api/auth.rb +2 -2
data/lib/legion/llm/api/namespaces/anthropic/files.rb +2 -1
data/lib/legion/llm/api/namespaces/anthropic/messages.rb +17 -4
data/lib/legion/llm/api/namespaces/helpers.rb +2 -1
data/lib/legion/llm/api/namespaces/native/chat.rb +9 -1
data/lib/legion/llm/api/namespaces/openai/audio/speech.rb +4 -2
data/lib/legion/llm/api/namespaces/openai/audio/transcriptions.rb +2 -1
data/lib/legion/llm/api/namespaces/openai/audio/translations.rb +2 -1
data/lib/legion/llm/api/namespaces/openai/batches.rb +4 -3
data/lib/legion/llm/api/namespaces/openai/chat/completions.rb +2 -3
data/lib/legion/llm/api/namespaces/openai/completions.rb +1 -1
data/lib/legion/llm/api/namespaces/openai/embeddings.rb +12 -2
data/lib/legion/llm/api/namespaces/openai/files.rb +4 -2
data/lib/legion/llm/api/namespaces/openai/images.rb +5 -3
data/lib/legion/llm/api/namespaces/openai/models.rb +13 -5
data/lib/legion/llm/api/namespaces/openai/responses.rb +1 -1
data/lib/legion/llm/api/native/helpers.rb +17 -4
data/lib/legion/llm/api/native/models.rb +3 -1
data/lib/legion/llm/api/native/tiers.rb +2 -2
data/lib/legion/llm/api/openai/chat_completions.rb +1 -1
data/lib/legion/llm/api/openai/embeddings.rb +1 -1
data/lib/legion/llm/api/openai/responses.rb +1 -1
data/lib/legion/llm/api/shared_helpers.rb +2 -1
data/lib/legion/llm/api/translators/anthropic_request.rb +17 -6
data/lib/legion/llm/api/translators/anthropic_response.rb +7 -5
data/lib/legion/llm/api/translators/openai_request.rb +20 -9
data/lib/legion/llm/api/translators/openai_response.rb +10 -3
data/lib/legion/llm/api.rb +1 -1
data/lib/legion/llm/audit.rb +2 -2
data/lib/legion/llm/cache/response.rb +3 -3
data/lib/legion/llm/cache.rb +2 -2
data/lib/legion/llm/call/daemon_client.rb +5 -7
data/lib/legion/llm/call/embeddings.rb +25 -13
data/lib/legion/llm/call/lex_llm_adapter.rb +24 -4
data/lib/legion/llm/call/providers.rb +4 -4
data/lib/legion/llm/call/structured_output.rb +3 -3
data/lib/legion/llm/config.rb +7 -7
data/lib/legion/llm/context/compressor.rb +17 -5
data/lib/legion/llm/context/curator.rb +56 -41
data/lib/legion/llm/discovery/memory_gate.rb +2 -2
data/lib/legion/llm/discovery/rule_generator.rb +3 -3
data/lib/legion/llm/discovery/system.rb +1 -1
data/lib/legion/llm/discovery.rb +151 -83
data/lib/legion/llm/fleet/dispatcher.rb +14 -20
data/lib/legion/llm/fleet/handler.rb +7 -6
data/lib/legion/llm/fleet/reply_dispatcher.rb +4 -3
data/lib/legion/llm/fleet/token_issuer.rb +2 -6
data/lib/legion/llm/helper.rb +3 -3
data/lib/legion/llm/hooks/budget_guard.rb +1 -5
data/lib/legion/llm/hooks/rag_guard.rb +2 -2
data/lib/legion/llm/hooks/reflection.rb +2 -5
data/lib/legion/llm/inference/audit_publisher.rb +40 -14
data/lib/legion/llm/inference/conversation.rb +3 -3
data/lib/legion/llm/inference/enrichment_injector.rb +2 -4
data/lib/legion/llm/inference/executor.rb +354 -106
data/lib/legion/llm/inference/native_tool_loop.rb +61 -12
data/lib/legion/llm/inference/prompt.rb +2 -9
data/lib/legion/llm/inference/request.rb +1 -4
data/lib/legion/llm/inference/route_attempts.rb +5 -5
data/lib/legion/llm/inference/steps/billing.rb +1 -1
data/lib/legion/llm/inference/steps/classification.rb +9 -5
data/lib/legion/llm/inference/steps/confidence_scoring.rb +10 -0
data/lib/legion/llm/inference/steps/debate.rb +23 -16
data/lib/legion/llm/inference/steps/gaia_advisory.rb +3 -1
data/lib/legion/llm/inference/steps/knowledge_capture.rb +9 -3
data/lib/legion/llm/inference/steps/logging.rb +2 -1
data/lib/legion/llm/inference/steps/mcp_discovery.rb +1 -0
data/lib/legion/llm/inference/steps/metering.rb +6 -1
data/lib/legion/llm/inference/steps/post_response.rb +6 -1
data/lib/legion/llm/inference/steps/prompt_cache.rb +4 -5
data/lib/legion/llm/inference/steps/rag_context.rb +27 -22
data/lib/legion/llm/inference/steps/rag_guard.rb +2 -2
data/lib/legion/llm/inference/steps/rbac.rb +1 -1
data/lib/legion/llm/inference/steps/skill_injector.rb +5 -6
data/lib/legion/llm/inference/steps/sticky_helpers.rb +4 -5
data/lib/legion/llm/inference/steps/tier_assigner.rb +7 -1
data/lib/legion/llm/inference/steps/token_budget.rb +4 -1
data/lib/legion/llm/inference/steps/tool_calls.rb +60 -21
data/lib/legion/llm/inference/steps/tool_discovery.rb +4 -1
data/lib/legion/llm/inference/steps/trigger_match.rb +7 -6
data/lib/legion/llm/inference.rb +97 -43
data/lib/legion/llm/inventory.rb +1 -1
data/lib/legion/llm/metering/tokens.rb +11 -3
data/lib/legion/llm/metering/tracker.rb +3 -3
data/lib/legion/llm/metering.rb +117 -12
data/lib/legion/llm/publisher_identity.rb +2 -1
data/lib/legion/llm/quality/checker.rb +35 -8
data/lib/legion/llm/quality/confidence/scorer.rb +31 -17
data/lib/legion/llm/quality/shadow_eval.rb +2 -1
data/lib/legion/llm/router/arbitrage.rb +3 -2
data/lib/legion/llm/router/escalation/chain.rb +5 -2
data/lib/legion/llm/router/health_tracker.rb +12 -27
data/lib/legion/llm/router.rb +36 -63
data/lib/legion/llm/scheduling/batch.rb +1 -1
data/lib/legion/llm/scheduling.rb +5 -13
data/lib/legion/llm/settings.rb +80 -179
data/lib/legion/llm/skills/external_discovery.rb +2 -2
data/lib/legion/llm/skills.rb +1 -4
data/lib/legion/llm/tools/dispatcher.rb +16 -4
data/lib/legion/llm/tools/interceptor.rb +10 -0
data/lib/legion/llm/transport/messages/metering_event.rb +6 -2
data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
data/lib/legion/llm/transport/messages/skill_event.rb +1 -1
data/lib/legion/llm/transport/messages/tool_event.rb +1 -1
data/lib/legion/llm/types/tool_call.rb +43 -25
data/lib/legion/llm/vector_store/storage.rb +2 -2
data/lib/legion/llm/version.rb +1 -1
data/lib/legion/llm.rb +6 -6
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f5c1d01be312fb6e070b3e75236a8ecd6c6841653e42eae391cb5b2519a668b6
-  data.tar.gz: f6703f5c706dd134e85993c11da4222fc68edced3ca88e09a841f04ca2fb2474
+  metadata.gz: 555bff51c05efea04f283dc4a0c005703fef2344d57b9692fdd70d6ab95d9646
+  data.tar.gz: 454c9cd0be750aec0d597c9e343a35fc8939c22a67af8d048d3887214c225cba
 SHA512:
-  metadata.gz: 06aef6de56965e58876fa98bfcdf8b411996d00ea3faff44f9aa0f9035d385d9da3591300ac2663a31a2820cf56610572fff3b801ec417af20f20fd377ad0e8a
-  data.tar.gz: 77b720ac764fbb899dbbea7d372edfb7a6a8c2671d2b8eef4f92680439d4572e9e8986519c68d4452232ede7c7c87ae74b3b38fbb6383bd9e3dcc1aa0f30670a
+  metadata.gz: c153ef24c678502b0bd9249c6ff8d39070e07b8e06eb950686b552bfdf8bcc5d03b060c333dff2c06418cfc4a1046c3a779274756fb784e821166b3605ee430d
+  data.tar.gz: b1ab367c71a7098292fecbcf2c67758de0734ecd7e70cb108d8ee08abc9fd3917e01adefe2a7bc2d0472a0d731e86b0bf73a9a2a7f566619502322ff0eb9d4ec

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,125 @@
 # Legion LLM Changelog
+## [0.11.2] - 2026-06-02
+### Removed
+- **Legion::LLM::Settings abstraction layer** — Removed `value`, `config_value`, `global_value`, `set_value`, `transport_connected?`, `enterprise_privacy?`, `current_settings`, and `namespace` methods. All settings reads now go directly through `Legion::Settings[:llm]` or `Legion::Settings.dig(:llm, ...)`. The module retains only `default`, `register_defaults!`, `validate!`, and the `*_defaults` class methods.
+- **Passthrough wrapper methods** — Removed `routing_settings`, `discovery_settings`, `default_settings_model`, `default_settings_provider`, `llm_settings` and similar indirection methods from Router, Discovery, and Inference modules.
+- **String-key dual-lookup** — `config_value` previously tried both symbol and string keys on any hash. All settings are now symbol-keyed; string-key fallback is gone.
+- **Deprecated "embeddings" (plural) settings key** — Use `embedding` (singular) only.
+### Added
+- Default values for `compliance.encrypt_metering`, `compliance.audit_max_messages`, `budget.session_usd`, `rag_guard.evaluators`, `discovery.memory_overhead_factor`, and `structured_output` settings so all accessed keys have registered defaults.
+## [0.12.1] - 2026-06-02
+### Fixed
+- **Provider-scoped discovery refresh** — `Discovery.refresh_discovered_models!` now accepts an optional `provider:` keyword argument. When filtered, only refreshes that provider's models and merges with the existing cache instead of re-querying all providers (discovery.rb)
+## [0.12.0] - 2026-06-01
+### Fixed
+- **cacheable? treated nil temperature as zero** — Added `default_temperature` setting (default 1.0). Requests without an explicit temperature now resolve against the default instead of being treated as `0.0`, preventing non-deterministic responses from being served from cache (inference.rb, settings.rb)
+- **ReDoS in infer_tool_name** — Possessive quantifier `\d++` replaced with `\d+` in search detection regex (context/curator.rb)
+- **ReDoS in strip_thinking regex** — `[^#\n][^\n]*` double-character-class pattern created exponential backtracking on long non-heading lines (10k-char lines). Replaced with anchored negative-lookahead variant that matches only lines starting with `#+ Thinking` headings. Benchmarked: pathological input drops from potential timeout to <5ms (context/curator.rb)
+- **Shell injection in dispatch_client_tool** — Added audit logging for all shell commands executed via native client tools (api/native/helpers.rb)
+- **Path traversal in file operations** — Added `validate_client_tool_path` that constrains file_read/file_write/file_edit to working directory, rejecting paths that escape via `..` (api/native/helpers.rb)
+- **Text block concatenation loss** — Anthropic translator now joins assistant text parts with `\n\n` separator instead of empty string (api/translators/anthropic_request.rb)
+- **Raw part leak in responses_content_part** — LexLLMAdapter no longer returns unnormalized/unsanitized parts; unknown types are converted to `input_text` with serialized content (call/lex_llm_adapter.rb)
+- **Metering failures silently dropped** — step_metering now attempts `Metering.spool_event` on publish failure so billing events are spooled to disk instead of lost (inference/executor.rb)
+- **Error category extraction always nil** — `extract_error_category_from_attempt` now handles string failures and hash `:error` keys in addition to `:category` (inference.rb)
+- **provider_scoped_instance false negatives** — Now checks `Registry.instances_for` before returning nil, only drops instance when provider has other registered instances (inference/executor.rb)
+- **build_fallback_resolutions double-exclusion** — Merged two separate exclusion checks into single predicate; `exclude_instance: nil` now only excludes the specified provider+instance combo (inference/executor.rb)
+- **find_fallback_provider hardcoded local exclusion** — ollama/vllm fallback exclusion is now configurable via `fallback.allow_local` setting instead of being permanently blocked (inference/executor.rb)
+- **extract_content double transform_keys** — Normalizes block keys once up front instead of calling transform_keys 2-3 times per block (api/translators/openai_request.rb)
+- **@pending_tool_history data race** — Tool history mutations in step_tool_calls now wrapped in `@pending_tool_history_mutex.synchronize` to match executor's async event emission (inference/steps/tool_calls.rb)
+- **Client passthrough tool events never emitted** — `client_passthrough_tool_loop_result` now emits both `emit_tool_call_event` and `emit_tool_result_event` for passthrough tools so they appear in `@pending_tool_history`, fire `@tool_event_handler` callbacks, and generate tool audit events (inference/steps/tool_calls.rb)
+- **Thinking tag pattern divergence** — Executor's `strip_thinking_from_history` only handled `<thinking>`/`<think(?:ing)?>` (Anthropic/long form) but not short `<think>` (DeepSeek, Qwen, Ollama, vLLM) or `<thought>` (various models). Added `THINKING_TAG_PATTERN_SHORT` and `THINKING_TAG_PATTERN_THOUGHT` constants, applied all three gsubs so all thinking block variants are stripped before dispatch on every turn (inference/executor.rb)
+- **Thinking tag stripping corrupted passthrough content** — Unanchored regex in `strip_thinking_from_history` and `strip_thinking_tags` treated backtick-quoted or mid-content `<think>`/`</think>` references as real thinking blocks, deleting content between them. Replaced regex with string-based `start_with?`/`index` approach that only strips tags at the beginning of a message where providers actually emit them (inference/executor.rb, context/curator.rb)
+- **ToolResultEvent unresolved constant** — `client_passthrough_tool_loop_result` referenced `ToolResultEvent` without namespace; Ruby's lexical constant lookup failed in the included module. Qualified as `Executor::ToolResultEvent` (inference/steps/tool_calls.rb)
+- **client_tool_methods_spec sandbox failure** — Tests created temp files in `/tmp` which `validate_client_tool_path` correctly rejects. Moved to project-relative `tmp/` directory (spec/api/native/client_tool_methods_spec.rb)
+### Changed
+- **Removed `llm_setting` abstraction** — All `llm_setting(:key)` calls replaced with direct `Legion::Settings[:llm][:key]` access. The indirection obscured that settings must flow through `Legion::Settings` to pick up dynamic user overrides. Affected: inference/executor.rb, inference.rb, inference/native_tool_loop.rb, inference/prompt.rb
+- **`fallback.allow_local` defaults to true** — Local providers (ollama/vllm) are now allowed as fallback targets by default instead of being permanently excluded (settings.rb)
+- **Text join separator** — OpenAI translator `extract_content` text blocks now join with `\n\n` instead of empty string for consistency
+## [0.11.1] - 2026-06-01
+### Fixed
+- **Embedding instance selection honored** — Discovery honors a configured `embedding.instance` pin over a higher-tier-ranked empty instance (and skips empty candidates whose resolved model is absent), and `Embeddings.generate`/`generate_batch` resolve that configured instance on the dispatch path instead of falling back to the provider default
+## [0.11.0] - 2026-05-31
+### Added
+- **Comprehensive diagnostic logging** — 28 files across executor, pipeline steps, tool loop, context, router, quality checker with structured `[llm][component] action=verb key=value` format at appropriate severity levels (debug/info/warn)
+- **Context window enforcement** — Pre-dispatch compaction triggers at 90% of model's context window, preserving recent turns and aggressively compacting older history
+- **Tool result trimming** — Oversized tool results from prior turns trimmed to 4000 chars before dispatch (current turn preserved in full)
+- **Thinking block stripping** — Historical `<think>` blocks removed from prior assistant turns before dispatch
+- **Empty response guard** — Streaming responses with no text and no tool calls emit `overloaded_error` instead of valid empty message, triggering client retry
+- **System prompt: no tool call limit** — Added instruction telling models there is no tool call limit per turn
+- **Conversation ID always generated** — API handler generates conv_id when client doesn't provide one; returned via `X-Legion-Conversation-Id` header
+- **Metering spool encryption** — Spool file encrypts via `Legion::Crypt` when `:compliance, :encrypt_spool` enabled
+- **Audit publisher improvements** — Preserves caller identity, includes agent_id/node_id, extracts provider metrics, hashes truncated conversations
+### Fixed
+- **Quality checker ignores tool-use responses** — No longer flags empty_response when model returns tool calls with no text content
+- **Confidence scoring skips tool-use** — Score=0.0 no longer reported for valid tool call responses
+- **Context overflow doesn't trip circuit breaker** — ContextLengthExceededError no longer reports `:error` signal to health tracker
+- **HealthTracker deadlock prevention** — `Mutex` replaced with `Monitor` (reentrant) to prevent deadlock when custom handlers call back into report/adjustment
+- **Thread pool fallback policy** — Chat/batch pools use `:caller_runs` instead of `:abort` (no silent request drops under load)
+- **Bare Thread.new eliminated** — All async work uses managed `ASYNC_THREAD_POOL` with `at_exit` shutdown hooks
+- **Conversation#replace preserves internal roles** — `__metadata__` and `__curated__` entries no longer wiped on replace
+- **EscalationChain method naming** — `padded_resolutions` renamed to `capped_resolutions` (it truncates, not pads)
+- **trigger_tool_limit default mismatch** — Fallback default fixed from 50 to 25 to match settings.rb
+- **Debate extract_question string keys** — `m[:role] == :user` changed to `.to_s == 'user'` for mixed-key messages
+- **EnrichmentInjector nil safety** — `enrichments` param defaults to `{}` when nil
+- **Stop reason preserved from provider** — `message_response` and `chunk_response` extract actual `finish_reason` from raw provider response instead of discarding
+- **OpenAI streaming usage stats** — Always included in final chunk (was gated behind `include_reasoning`)
+- **Metering identity** — Uses caller identity from request, not process publisher identity
+- **Metering request_type** — Derived from request metadata (image/audio/chat), not hardcoded 'chat'
+- **Metering actual cost** — Prefers provider-reported cost over local estimate
+- **Metering encryption** — `encrypt?` respects `:compliance, :encrypt_metering` setting
+- **Audit identity clobbering** — `attributed_event` uses `||=` to preserve caller identity
+- **Audit step order** — `post_response` (audit) now runs before `metering` (financial records need supporting evidence)
+- **Audit tool spooling** — Failed tool audit events spool to disk instead of silent drop
+- **Audit timeline** — Preserves RBAC, classification, billing, confidence decisions
+- **Budget cap** — Pre-flight check estimates output tokens (assumes output ≈ input) instead of `output_tokens: 0`
+- **Embeddings audit** — POST /v1/embeddings now emits audit event
+- **Native chat audit** — Async chat path emits `Audit.emit_prompt` after completion
+- **Knowledge capture embedding** — Truncates content to 2000 chars before embedding to prevent ContextLengthExceededError
+- **Dedup performance** — O(n²) → O(n×20) via sliding window comparison
+- **22 silent rescue swallows** — All `rescue StandardError` without variable capture now log at debug level
+### Changed
+- **max_tool_calls_per_turn: 50** — New setting (was dead `MAX_TOOL_LOOPS = 10` constant); deferred tool calls get error result telling model to retry
+- **max_tool_rounds** — Removed `MAX_NATIVE_TOOL_ROUNDS` constant; reads directly from settings
+- **Settings-driven limits** — Redundant fallback defaults removed from `llm_setting` call sites
+## [0.10.4] - 2026-05-31
+### Fixed
+- **TRANSLATION-BUG-01**: Anthropic `tool_result` content blocks preserved as arrays — multimodal tool results (images) no longer flattened to string.
+- **TRANSLATION-BUG-03**: Anthropic `stop_reason` properly maps `content_filter`; distinguishes `stop` with/without `stop_sequence`.
+- **TRANSLATION-BUG-04**: OpenAI `map_finish_reason` returns `error` for unknown stop reasons instead of `stop` (errors no longer disguised as success).
+- **TRANSLATION-BUG-05**: OpenAI `extract_content` preserves `image_url` and non-text content parts — vision input no longer silently dropped.
+- **TRANSLATION-BUG-06**: Anthropic streaming `content_block_start` includes tool arguments in `input` field (was empty `{}`).
+- **TRANSLATION-BUG-09**: Anthropic system prompt `cache_control` metadata preserved when present — prompt caching no longer silently disabled.
+- **TRANSLATION-BUG-10**: Stable `tool_call_id` generated when OpenAI client sends nil — multi-turn tool chains no longer break.
+- **TRANSLATION-BUG-11**: OpenAI translator uses symbol roles (`:user`, `:assistant`) matching Anthropic — executor symbol comparisons now work.
+- **TRANSLATION-BUG-12**: Unsupported OpenAI tool types (`code_interpreter`, `file_search`) logged at debug instead of silent drop.
+## [0.10.3] - 2026-05-31
+### Fixed
+- **DaemonClient HTTPS support** — `http_get` and `http_post` now set `http.use_ssl = true` when the daemon URL scheme is `https://`. Previously, all daemon communication was plain HTTP, silently failing for HTTPS URLs or sending credentials in cleartext.
+- **Context compression guard against preserve_recent: 0** — `auto_compact` now enforces a minimum `preserve_recent` of 1. A value of 0 would compact the entire conversation including the latest messages, producing empty context.
+- **Context curator thread safety** — `curate_turn` and `curated_messages` now synchronize on a per-instance `@curation_mutex`. Concurrent turns could race on `@curated_messages`, causing stale or nil curation state.
+- **Recursive compaction guard** — `maybe_compact_history` now uses `Thread.current[:legion_compacting]` to prevent infinite recursion when `Context::Compressor.auto_compact` triggers its own LLM summarization call, which would recursively trigger compaction again.
+- **Metering::Tokens unbounded memory growth** — `TokenTracker#record` now evicts oldest entries when the store exceeds `MAX_ENTRIES` (10,000). Long-running high-throughput processes would leak memory.
+- **Tool timeline index per-call resolution** — `build_tool_timeline_index` now tracks per-tool-name call counts and produces keys like `"read_file:2"` for repeated calls. `build_response_tool_calls` matches each tool call to its corresponding timeline entry, fixing wrong duration/status when the same tool is called multiple times in a round.
+- **Streaming escalation quality bypass documented** — Added explicit comment noting that streaming escalation attempts always pass quality check because in-flight stream quality-checking is not supported.
 ## [0.10.2] - 2026-05-30
 ### Fixed

data/Gemfile CHANGED Viewed

@@ -12,7 +12,9 @@ group :test do
   if Dir.exist?(lex_llm_path)
     gem 'lex-llm', path: lex_llm_path
   else
-    gem 'lex-llm'
+    # TEMP (revert to `gem 'lex-llm'` once 0.4.16 is published): track lex-llm PR #16, which
+    # adds the fleet TokenValidator verify_issuer + WorkerExecution policy-warn behavior these specs require.
+    gem 'lex-llm', git: 'https://github.com/LegionIO/lex-llm.git', branch: 'fix/audit-fleet-security'
   end
   %w[

data/lib/legion/llm/api/auth.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module Legion
           app.helpers do
             define_method(:auth_enabled?) do
-              Legion::LLM::Settings.value(:api, :auth, :enabled) == true
+              Legion::Settings.dig(:llm, :api, :auth, :enabled) == true
             end
             define_method(:extract_token) do |req|
@@ -56,7 +56,7 @@ module Legion
               return true unless auth_enabled?
               return false if token.nil? || token.empty?
-              keys = Legion::LLM::Settings.value(:api, :auth, :api_keys, default: [])
+              keys = Legion::Settings.dig(:llm, :api, :auth, :api_keys) || []
               keys.include?(token)
             end
           end

data/lib/legion/llm/api/namespaces/anthropic/files.rb CHANGED Viewed

@@ -201,7 +201,8 @@ module Legion
               def files_storage_path
                 configured = begin
                   Legion::Settings.dig(:llm, :files, :storage_path)
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][anthropic][files] action=files_storage_path_fallback error=#{e.class} message=#{e.message}"
                   nil
                 end
                 base = configured.to_s.empty? ? ::File.join(Dir.home, '.legionio', 'data', 'files') : configured

data/lib/legion/llm/api/namespaces/anthropic/messages.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Legion
               tool_defs = build_tool_definitions(normalized[:tools] || [], executable: false)
               modality = detect_modality(normalized[:messages])
-              conv_id = env['HTTP_X_LEGION_CONVERSATION_ID'] || body[:conversation_id]
+              conv_id = env['HTTP_X_LEGION_CONVERSATION_ID'] || body[:conversation_id] || "conv_#{SecureRandom.hex(8)}"
               ext_provider = env['HTTP_X_LEGION_PROVIDER'] || body[:provider]
               ext_tier = env['HTTP_X_LEGION_TIER'] || body[:tier]
               ext_instance = env['HTTP_X_LEGION_INSTANCE'] || body[:instance]
@@ -78,7 +78,8 @@ module Legion
               if streaming
                 content_type 'text/event-stream'
-                headers 'Cache-Control' => 'no-cache', 'Connection' => 'keep-alive', 'X-Accel-Buffering' => 'no'
+                headers 'Cache-Control' => 'no-cache', 'Connection' => 'keep-alive',
+                        'X-Accel-Buffering' => 'no', 'X-Legion-Conversation-Id' => conv_id
                 stream do |out|
                   full_text = +''
@@ -90,7 +91,7 @@ module Legion
                                                                               id: request_id, type: 'message', role: 'assistant',
                       content: [], model: model.to_s,
                       stop_reason: nil, stop_sequence: nil,
-                      usage: { input_tokens: 0, output_tokens: 0 }
+                      usage: { input_tokens: est_tokens, output_tokens: 0 }
                                                                             }
                                                                           })}\n\n"
@@ -137,6 +138,16 @@ module Legion
                            "tool_calls=#{tool_calls.size} stop_reason=#{stop_reason} " \
                            "text_block_opened=#{text_block_opened} full_text_length=#{full_text.length}"
+                  if tool_calls.empty? && full_text.empty?
+                    log.warn "[llm][api][anthropic] action=empty_response request_id=#{request_id} " \
+                             "model=#{model} text_block_opened=#{text_block_opened} — provider returned no content, signaling overloaded"
+                    out << "event: error\ndata: #{Legion::JSON.dump({
+                                                                      type: 'error', error: { type:    'overloaded_error',
+                                                                                              message: 'Model returned empty response. Please retry.' }
+                                                                    })}\n\n"
+                    next
+                  end
                   if text_block_opened
                     out << "event: content_block_stop\ndata: #{Legion::JSON.dump({ type: 'content_block_stop', index: 0 })}\n\n"
                     content_index = 1
@@ -158,7 +169,8 @@ module Legion
                   out << "event: message_delta\ndata: #{Legion::JSON.dump({
                                                                             type:  'message_delta',
                                                                             delta: { stop_reason: stop_reason, stop_sequence: nil },
-                                                                            usage: { output_tokens: translator.token_count(tokens, :output) }
+                                                                            usage: { input_tokens:  translator.token_count(tokens, :input),
+                                                                                     output_tokens: translator.token_count(tokens, :output) }
                                                                           })}\n\n"
                   out << "event: message_stop\ndata: #{Legion::JSON.dump({ type: 'message_stop' })}\n\n"
                   log.info "[llm][api][anthropic] action=stream_complete request_id=#{request_id} stop_reason=#{stop_reason}"
@@ -172,6 +184,7 @@ module Legion
                   pipeline_response, model: model, request_id: request_id
                 )
+                headers 'X-Legion-Conversation-Id' => conv_id
                 content_type :json
                 status 200
                 Legion::JSON.dump(formatted)

data/lib/legion/llm/api/namespaces/helpers.rb CHANGED Viewed

@@ -42,7 +42,8 @@ module Legion
           def data_subsystem_available?
             defined?(Legion::Data) && Legion::Data.respond_to?(:connected?) && Legion::Data.connected?
-          rescue StandardError
+          rescue StandardError => e
+            log.debug "[llm][api][namespaces][helpers] action=data_subsystem_check_fallback error=#{e.class} message=#{e.message}"
             false
           end
         end

data/lib/legion/llm/api/namespaces/native/chat.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Legion
             ASYNC_POOL = Concurrent::FixedThreadPool.new(
               [4, (Concurrent.processor_count / 2)].max,
-              fallback_policy: :abort
+              fallback_policy: :caller_runs
             )
             # Ensure the thread pool is shut down cleanly when the process exits.
@@ -66,6 +66,14 @@ module Legion
                           tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
                         }
                       )
+                      Legion::LLM::Audit.emit_prompt(
+                        request_id: request_id,
+                        caller:     { requested_by: { identity: 'api:chat:async', type: :external } },
+                        routing:    { model: session.model.to_s, provider: provider },
+                        tokens:     { input_tokens:  response.respond_to?(:input_tokens) ? response.input_tokens : 0,
+                                      output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : 0 },
+                        timestamp:  Time.now
+                      )
                       log.debug("[llm][api][namespaces][chat] action=async_complete request_id=#{request_id}")
                     rescue StandardError => e
                       handle_exception(e, level: :error, handled: true, operation: 'llm.api.chat.async', request_id: request_id)

data/lib/legion/llm/api/namespaces/openai/audio/speech.rb CHANGED Viewed

@@ -36,7 +36,8 @@ module Legion
               def self.capable_provider_available?
                 instances = begin
                   Legion::LLM::Call::Registry.all_instances
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][openai][audio][speech] action=registry_fallback error=#{e.class} message=#{e.message}"
                   []
                 end
                 instances.any? do |entry|
@@ -94,7 +95,8 @@ module Legion
                        else
                          begin
                            Legion::JSON.load(raw)
-                         rescue StandardError
+                         rescue StandardError => e
+                           log.debug "[llm][api][openai][audio][speech] action=parse_body_fallback error=#{e.class} message=#{e.message}"
                            {}
                          end
                        end

data/lib/legion/llm/api/namespaces/openai/audio/transcriptions.rb CHANGED Viewed

@@ -25,7 +25,8 @@ module Legion
               def self.capable_provider_available?
                 instances = begin
                   Legion::LLM::Call::Registry.all_instances
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][openai][audio][transcriptions] action=registry_fallback error=#{e.class} message=#{e.message}"
                   []
                 end
                 instances.any? do |entry|

data/lib/legion/llm/api/namespaces/openai/audio/translations.rb CHANGED Viewed

@@ -25,7 +25,8 @@ module Legion
               def self.capable_provider_available?
                 instances = begin
                   Legion::LLM::Call::Registry.all_instances
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][openai][audio][translations] action=registry_fallback error=#{e.class} message=#{e.message}"
                   []
                 end
                 instances.any? do |entry|

data/lib/legion/llm/api/namespaces/openai/batches.rb CHANGED Viewed

@@ -23,8 +23,8 @@ module Legion
               BATCH_POOL_MUTEX.synchronize do
                 @batch_pool ||= begin
-                  pool_size = Legion::LLM::Settings.value(:api, :batch_pool_size, default: 4)
-                  Concurrent::FixedThreadPool.new(pool_size, fallback_policy: :abort)
+                  pool_size = Legion::Settings[:llm][:api][:batch_pool_size] || 4
+                  Concurrent::FixedThreadPool.new(pool_size, fallback_policy: :caller_runs)
                 end
               end
             end
@@ -237,7 +237,8 @@ module Legion
               ::File.readlines(file_path).filter_map do |line|
                 Legion::JSON.load(line.strip)
-              rescue StandardError
+              rescue StandardError => e
+                log.debug "[llm][api][openai][batches] action=load_batch_line_fallback file=#{file_id} error=#{e.class} message=#{e.message}"
                 nil
               end
             end

data/lib/legion/llm/api/namespaces/openai/chat/completions.rb CHANGED Viewed

@@ -31,7 +31,7 @@ module Legion
                   request_id = body[:request_id] || SecureRandom.uuid
                   normalized = Legion::LLM::API::Translators::OpenAIRequest.normalize(body)
-                  model      = normalized[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+                  model      = normalized[:model] || Legion::Settings[:llm][:default_model] || 'default'
                   streaming  = normalized[:stream] == true
                   include_reasoning = body[:include_reasoning] == true || body[:include_thinking] == true
                   tool_decls = Completions.build_tool_declarations(normalized[:tools])
@@ -231,8 +231,7 @@ module Legion
               end
               def self.append_usage_stats(done_chunk, pipeline_response, include_reasoning)
-                return unless include_reasoning
+                _ = include_reasoning
                 tokens = pipeline_response.tokens || {}
                 oai = Legion::LLM::API::Translators::OpenAIResponse
                 input_count = oai.extract_token_count(tokens, :input).to_i

data/lib/legion/llm/api/namespaces/openai/completions.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Legion
                 end
                 request_id = SecureRandom.uuid
-                model      = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+                model      = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
                 messages   = [{ role: 'user', content: prompt.to_s }]
                 log.info("[llm][api][namespaces][openai][completions] action=accepted request_id=#{request_id} model=#{model}")

data/lib/legion/llm/api/namespaces/openai/embeddings.rb CHANGED Viewed

@@ -15,11 +15,11 @@ module Legion
             def self.registered(app)
               log.debug('[llm][api][namespaces][openai][embeddings] registering routes')
-              app.post '/v1/embeddings' do
+              app.post '/v1/embeddings' do # rubocop:disable Metrics/BlockLength
                 require_llm!
                 body  = parse_request_body
                 input = body[:input]
-                model = body[:model] || Legion::LLM::Settings.value(:default_model)
+                model = body[:model] || Legion::Settings[:llm][:default_model]
                 if input.nil? || (input.respond_to?(:empty?) && input.empty?)
                   return openai_error('input is required', type: 'invalid_request_error',
@@ -43,6 +43,16 @@ module Legion
                 )
                 log.info("[llm][api][namespaces][openai][embeddings] action=complete model=#{model} dims=#{vector_array.size}")
+                Legion::LLM::Audit.emit_prompt(
+                  request_id:   SecureRandom.uuid,
+                  caller:       build_server_caller(source: 'openai_embeddings', path: request.path, env: env),
+                  routing:      { model: model, provider: 'embed' },
+                  tokens:       { input_tokens: (text.length / 4.0).ceil, output_tokens: 0 },
+                  request_type: 'embedding',
+                  timestamp:    Time.now
+                )
                 content_type :json
                 Legion::JSON.dump(response_body)
               rescue Legion::LLM::AuthError => e

data/lib/legion/llm/api/namespaces/openai/files.rb CHANGED Viewed

@@ -53,12 +53,14 @@ module Legion
                 file_id  = "file-#{SecureRandom.hex(16)}"
                 filename = begin
                   uploaded[:filename] || uploaded.original_filename
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][openai][files] action=filename_fallback error=#{e.class} message=#{e.message}"
                   'upload.bin'
                 end
                 data = begin
                   uploaded[:tempfile]&.read || uploaded.read
-                rescue StandardError
+                rescue StandardError => e
+                  log.debug "[llm][api][openai][files] action=file_read_fallback error=#{e.class} message=#{e.message}"
                   ''
                 end

data/lib/legion/llm/api/namespaces/openai/images.rb CHANGED Viewed

@@ -23,7 +23,8 @@ module Legion
             def self.capable_provider_available?(capability)
               instances = begin
                 Legion::LLM::Call::Registry.all_instances
-              rescue StandardError
+              rescue StandardError => e
+                log.debug "[llm][api][openai][images] action=registry_fallback capability=#{capability} error=#{e.class} message=#{e.message}"
                 []
               end
               instances.any? do |entry|
@@ -76,7 +77,8 @@ module Legion
                 Legion::JSON.load(raw)
               end
-            rescue StandardError
+            rescue StandardError => e
+              log.debug "[llm][api][openai][images] action=parse_media_body_fallback error=#{e.class} message=#{e.message}"
               {}
             end
@@ -116,7 +118,7 @@ module Legion
                      Legion::JSON.dump({ error: { message: 'prompt is required', type: 'invalid_request_error', code: nil } })
               end
-              model      = (body[:model] || body['model'] || Legion::LLM::Settings.value(:default_model) || 'dall-e-3').to_s
+              model      = (body[:model] || body['model'] || Legion::Settings[:llm][:default_model] || 'dall-e-3').to_s
               n          = [(body[:n] || body['n'] || 1).to_i, 1].max
               size       = (body[:size] || body['size'] || '1024x1024').to_s
               quality    = (body[:quality] || body['quality'] || 'standard').to_s

data/lib/legion/llm/api/namespaces/openai/models.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require 'time'
 require 'legion/logging/helper'
 require 'legion/llm/api/namespaces/helpers'
+require 'legion/llm/api/native/models'
 require 'legion/llm/api/translators/openai_response'
 module Legion
@@ -81,10 +82,14 @@ module Legion
             end
             def self.build_openai_model_list
-              models = Legion::LLM::Inventory.offerings(type: :inference).map do |offering|
+              offerings = Legion::LLM::Inventory.offerings(type: :inference)
+              offerings = Legion::LLM::API::Native::Models.with_auto_routing_offering(offerings, {})
+              models = offerings.map do |offering|
                 Legion::LLM::API::Translators::OpenAIResponse.format_model_object(
                   offering[:model],
-                  owned_by: offering[:provider_family]
+                  owned_by: offering[:provider_family],
+                  limits:   offering[:limits]
                 )
               end
               seen = {}
@@ -99,12 +104,15 @@ module Legion
             end
             def self.openai_to_anthropic_model(openai_model)
-              {
+              model = {
+                type:         'model',
                 id:           openai_model[:id],
                 display_name: openai_model[:id],
-                created_at:   Time.at(openai_model[:created] || Time.now.to_i).utc.strftime('%Y-%m-%dT%H:%M:%SZ'),
-                type:         'model'
+                created_at:   Time.at(openai_model[:created] || Time.now.to_i).utc.strftime('%Y-%m-%dT%H:%M:%SZ')
               }
+              model[:max_input_tokens] = openai_model[:context_window] if openai_model[:context_window]
+              model[:max_tokens] = openai_model[:max_output_tokens] if openai_model[:max_output_tokens]
+              model
             end
           end
         end

data/lib/legion/llm/api/namespaces/openai/responses.rb CHANGED Viewed

@@ -36,7 +36,7 @@ module Legion
                 messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
-                model       = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+                model       = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
                 streaming   = body[:stream] == true
                 tool_decls  = Responses.build_tool_declarations(body[:tools])

data/lib/legion/llm/api/native/helpers.rb CHANGED Viewed

@@ -52,22 +52,34 @@ module Legion
             end
           end
+          def validate_client_tool_path(path)
+            return 'file operation error: path is required' if path.nil? || path.to_s.empty?
+            expanded = ::File.expand_path(path)
+            sandbox_root = ::File.expand_path(Dir.pwd)
+            return "file operation error: path '#{path}' escapes working directory #{sandbox_root}" unless expanded.start_with?(sandbox_root)
+            expanded
+          end
           def dispatch_client_tool(ref, **kwargs) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
             case ref
             when 'sh'
               cmd = kwargs[:command] || kwargs[:cmd] || kwargs.values.first.to_s
+              log.warn("[llm][native] client_tool=sh command=#{cmd[0, 120]}")
               output, status = ::Open3.capture2e(cmd, chdir: Dir.pwd)
               "exit=#{status.exitstatus}\n#{output}"
             when 'file_read'
-              path = kwargs[:path] || kwargs[:file_path] || kwargs.values.first.to_s
+              path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path] || kwargs.values.first.to_s)
               read_client_file(path)
             when 'file_write'
-              path = kwargs[:path] || kwargs[:file_path]
+              path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path])
               content = kwargs[:content] || kwargs[:contents]
               ::File.write(path, content)
               "Written #{content.to_s.bytesize} bytes to #{path}"
             when 'file_edit'
-              path = kwargs[:path] || kwargs[:file_path]
+              path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path])
               old_text = kwargs[:old_text] || kwargs[:search]
               new_text = kwargs[:new_text] || kwargs[:replace]
               return 'file_edit error: old_text is required' if old_text.nil? || old_text.empty?
@@ -374,7 +386,8 @@ module Legion
                 return raw_args unless raw_args.is_a?(String)
                 Legion::JSON.parse(raw_args, symbolize_names: true)
-              rescue StandardError
+              rescue StandardError => e
+                log.debug "[llm][api][native][helpers] action=openai_tool_call_arguments_fallback error=#{e.class} message=#{e.message}"
                 raw_args
               end

data/lib/legion/llm/api/native/models.rb CHANGED Viewed

@@ -133,6 +133,8 @@ module Legion
           end
           def self.auto_routing_offering
+            ctx = Legion::Settings[:llm][:context_window] || 262_144
+            max_out = Legion::Settings[:llm][:max_output_tokens] || 16_384
             {
               id:                    AUTO_ROUTING_OFFERING_ID,
               offering_id:           AUTO_ROUTING_OFFERING_ID,
@@ -148,7 +150,7 @@ module Legion
               transport:             :internal,
               enabled:               true,
               capabilities:          AUTO_ROUTING_CAPABILITIES,
-              limits:                {},
+              limits:                { context_window: ctx, max_output_tokens: max_out },
               health:                { circuit_state: 'available' },
               metadata:              { auto_route: true, placeholder: true, display_name: AUTO_ROUTING_MODEL_DISPLAY },
               routing_metadata:      { strategy: 'auto' },

data/lib/legion/llm/api/native/tiers.rb CHANGED Viewed

@@ -162,8 +162,8 @@ module Legion
           def self.tier_priority
             return Legion::LLM::Router.tier_priority if defined?(Legion::LLM::Router)
-            routing_config = Legion::LLM::Settings.value(:routing) || {}
-            top_level = Legion::LLM::Settings.value(:tier_order, default: nil)
+            routing_config = Legion::Settings[:llm][:routing] || {}
+            top_level = Legion::Settings[:llm][:tier_order] || nil
             Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
                   %w[local direct fleet openai_compat cloud frontier])
           end

data/lib/legion/llm/api/openai/chat_completions.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module Legion
               request_id = body[:request_id] || SecureRandom.uuid
               normalized = Legion::LLM::API::Translators::OpenAIRequest.normalize(body)
-              model = normalized[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+              model = normalized[:model] || Legion::Settings[:llm][:default_model] || 'default'
               streaming = normalized[:stream] == true
               include_reasoning = body[:include_reasoning] == true || body[:include_thinking] == true

data/lib/legion/llm/api/openai/embeddings.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module Legion
               body = parse_request_body
               input = body[:input] || body['input']
-              model = body[:model] || body['model'] || Legion::LLM::Settings.value(:default_model)
+              model = body[:model] || body['model'] || Legion::Settings[:llm][:default_model]
               if input.nil? || (input.respond_to?(:empty?) && input.empty?)
                 halt 400, { 'Content-Type' => 'application/json' },

data/lib/legion/llm/api/openai/responses.rb CHANGED Viewed

@@ -42,7 +42,7 @@ module Legion
               messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
-              model = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
+              model = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
               streaming = body[:stream] == true
               tool_declarations = Responses.build_tool_declarations(body[:tools])