legion-llm 0.10.2 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +120 -0
- data/Gemfile +3 -1
- data/lib/legion/llm/api/auth.rb +2 -2
- data/lib/legion/llm/api/namespaces/anthropic/files.rb +2 -1
- data/lib/legion/llm/api/namespaces/anthropic/messages.rb +17 -4
- data/lib/legion/llm/api/namespaces/helpers.rb +2 -1
- data/lib/legion/llm/api/namespaces/native/chat.rb +9 -1
- data/lib/legion/llm/api/namespaces/openai/audio/speech.rb +4 -2
- data/lib/legion/llm/api/namespaces/openai/audio/transcriptions.rb +2 -1
- data/lib/legion/llm/api/namespaces/openai/audio/translations.rb +2 -1
- data/lib/legion/llm/api/namespaces/openai/batches.rb +4 -3
- data/lib/legion/llm/api/namespaces/openai/chat/completions.rb +2 -3
- data/lib/legion/llm/api/namespaces/openai/completions.rb +1 -1
- data/lib/legion/llm/api/namespaces/openai/embeddings.rb +12 -2
- data/lib/legion/llm/api/namespaces/openai/files.rb +4 -2
- data/lib/legion/llm/api/namespaces/openai/images.rb +5 -3
- data/lib/legion/llm/api/namespaces/openai/models.rb +13 -5
- data/lib/legion/llm/api/namespaces/openai/responses.rb +1 -1
- data/lib/legion/llm/api/native/helpers.rb +17 -4
- data/lib/legion/llm/api/native/models.rb +3 -1
- data/lib/legion/llm/api/native/tiers.rb +2 -2
- data/lib/legion/llm/api/openai/chat_completions.rb +1 -1
- data/lib/legion/llm/api/openai/embeddings.rb +1 -1
- data/lib/legion/llm/api/openai/responses.rb +1 -1
- data/lib/legion/llm/api/shared_helpers.rb +2 -1
- data/lib/legion/llm/api/translators/anthropic_request.rb +17 -6
- data/lib/legion/llm/api/translators/anthropic_response.rb +7 -5
- data/lib/legion/llm/api/translators/openai_request.rb +20 -9
- data/lib/legion/llm/api/translators/openai_response.rb +10 -3
- data/lib/legion/llm/api.rb +1 -1
- data/lib/legion/llm/audit.rb +2 -2
- data/lib/legion/llm/cache/response.rb +3 -3
- data/lib/legion/llm/cache.rb +2 -2
- data/lib/legion/llm/call/daemon_client.rb +5 -7
- data/lib/legion/llm/call/embeddings.rb +25 -13
- data/lib/legion/llm/call/lex_llm_adapter.rb +24 -4
- data/lib/legion/llm/call/providers.rb +4 -4
- data/lib/legion/llm/call/structured_output.rb +3 -3
- data/lib/legion/llm/config.rb +7 -7
- data/lib/legion/llm/context/compressor.rb +17 -5
- data/lib/legion/llm/context/curator.rb +56 -41
- data/lib/legion/llm/discovery/memory_gate.rb +2 -2
- data/lib/legion/llm/discovery/rule_generator.rb +3 -3
- data/lib/legion/llm/discovery/system.rb +1 -1
- data/lib/legion/llm/discovery.rb +151 -83
- data/lib/legion/llm/fleet/dispatcher.rb +14 -20
- data/lib/legion/llm/fleet/handler.rb +7 -6
- data/lib/legion/llm/fleet/reply_dispatcher.rb +4 -3
- data/lib/legion/llm/fleet/token_issuer.rb +2 -6
- data/lib/legion/llm/helper.rb +3 -3
- data/lib/legion/llm/hooks/budget_guard.rb +1 -5
- data/lib/legion/llm/hooks/rag_guard.rb +2 -2
- data/lib/legion/llm/hooks/reflection.rb +2 -5
- data/lib/legion/llm/inference/audit_publisher.rb +40 -14
- data/lib/legion/llm/inference/conversation.rb +3 -3
- data/lib/legion/llm/inference/enrichment_injector.rb +2 -4
- data/lib/legion/llm/inference/executor.rb +354 -106
- data/lib/legion/llm/inference/native_tool_loop.rb +61 -12
- data/lib/legion/llm/inference/prompt.rb +2 -9
- data/lib/legion/llm/inference/request.rb +1 -4
- data/lib/legion/llm/inference/route_attempts.rb +5 -5
- data/lib/legion/llm/inference/steps/billing.rb +1 -1
- data/lib/legion/llm/inference/steps/classification.rb +9 -5
- data/lib/legion/llm/inference/steps/confidence_scoring.rb +10 -0
- data/lib/legion/llm/inference/steps/debate.rb +23 -16
- data/lib/legion/llm/inference/steps/gaia_advisory.rb +3 -1
- data/lib/legion/llm/inference/steps/knowledge_capture.rb +9 -3
- data/lib/legion/llm/inference/steps/logging.rb +2 -1
- data/lib/legion/llm/inference/steps/mcp_discovery.rb +1 -0
- data/lib/legion/llm/inference/steps/metering.rb +6 -1
- data/lib/legion/llm/inference/steps/post_response.rb +6 -1
- data/lib/legion/llm/inference/steps/prompt_cache.rb +4 -5
- data/lib/legion/llm/inference/steps/rag_context.rb +27 -22
- data/lib/legion/llm/inference/steps/rag_guard.rb +2 -2
- data/lib/legion/llm/inference/steps/rbac.rb +1 -1
- data/lib/legion/llm/inference/steps/skill_injector.rb +5 -6
- data/lib/legion/llm/inference/steps/sticky_helpers.rb +4 -5
- data/lib/legion/llm/inference/steps/tier_assigner.rb +7 -1
- data/lib/legion/llm/inference/steps/token_budget.rb +4 -1
- data/lib/legion/llm/inference/steps/tool_calls.rb +60 -21
- data/lib/legion/llm/inference/steps/tool_discovery.rb +4 -1
- data/lib/legion/llm/inference/steps/trigger_match.rb +7 -6
- data/lib/legion/llm/inference.rb +97 -43
- data/lib/legion/llm/inventory.rb +1 -1
- data/lib/legion/llm/metering/tokens.rb +11 -3
- data/lib/legion/llm/metering/tracker.rb +3 -3
- data/lib/legion/llm/metering.rb +117 -12
- data/lib/legion/llm/publisher_identity.rb +2 -1
- data/lib/legion/llm/quality/checker.rb +35 -8
- data/lib/legion/llm/quality/confidence/scorer.rb +31 -17
- data/lib/legion/llm/quality/shadow_eval.rb +2 -1
- data/lib/legion/llm/router/arbitrage.rb +3 -2
- data/lib/legion/llm/router/escalation/chain.rb +5 -2
- data/lib/legion/llm/router/health_tracker.rb +12 -27
- data/lib/legion/llm/router.rb +36 -63
- data/lib/legion/llm/scheduling/batch.rb +1 -1
- data/lib/legion/llm/scheduling.rb +5 -13
- data/lib/legion/llm/settings.rb +80 -179
- data/lib/legion/llm/skills/external_discovery.rb +2 -2
- data/lib/legion/llm/skills.rb +1 -4
- data/lib/legion/llm/tools/dispatcher.rb +16 -4
- data/lib/legion/llm/tools/interceptor.rb +10 -0
- data/lib/legion/llm/transport/messages/metering_event.rb +6 -2
- data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
- data/lib/legion/llm/transport/messages/skill_event.rb +1 -1
- data/lib/legion/llm/transport/messages/tool_event.rb +1 -1
- data/lib/legion/llm/types/tool_call.rb +43 -25
- data/lib/legion/llm/vector_store/storage.rb +2 -2
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +6 -6
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 555bff51c05efea04f283dc4a0c005703fef2344d57b9692fdd70d6ab95d9646
|
|
4
|
+
data.tar.gz: 454c9cd0be750aec0d597c9e343a35fc8939c22a67af8d048d3887214c225cba
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c153ef24c678502b0bd9249c6ff8d39070e07b8e06eb950686b552bfdf8bcc5d03b060c333dff2c06418cfc4a1046c3a779274756fb784e821166b3605ee430d
|
|
7
|
+
data.tar.gz: b1ab367c71a7098292fecbcf2c67758de0734ecd7e70cb108d8ee08abc9fd3917e01adefe2a7bc2d0472a0d731e86b0bf73a9a2a7f566619502322ff0eb9d4ec
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,125 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.11.2] - 2026-06-02
|
|
4
|
+
|
|
5
|
+
### Removed
|
|
6
|
+
- **Legion::LLM::Settings abstraction layer** — Removed `value`, `config_value`, `global_value`, `set_value`, `transport_connected?`, `enterprise_privacy?`, `current_settings`, and `namespace` methods. All settings reads now go directly through `Legion::Settings[:llm]` or `Legion::Settings.dig(:llm, ...)`. The module retains only `default`, `register_defaults!`, `validate!`, and the `*_defaults` class methods.
|
|
7
|
+
- **Passthrough wrapper methods** — Removed `routing_settings`, `discovery_settings`, `default_settings_model`, `default_settings_provider`, `llm_settings` and similar indirection methods from Router, Discovery, and Inference modules.
|
|
8
|
+
- **String-key dual-lookup** — `config_value` previously tried both symbol and string keys on any hash. All settings are now symbol-keyed; string-key fallback is gone.
|
|
9
|
+
- **Deprecated "embeddings" (plural) settings key** — Use `embedding` (singular) only.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- Default values for `compliance.encrypt_metering`, `compliance.audit_max_messages`, `budget.session_usd`, `rag_guard.evaluators`, `discovery.memory_overhead_factor`, and `structured_output` settings so all accessed keys have registered defaults.
|
|
13
|
+
|
|
14
|
+
## [0.12.1] - 2026-06-02
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
- **Provider-scoped discovery refresh** — `Discovery.refresh_discovered_models!` now accepts an optional `provider:` keyword argument. When filtered, only refreshes that provider's models and merges with the existing cache instead of re-querying all providers (discovery.rb)
|
|
18
|
+
|
|
19
|
+
## [0.12.0] - 2026-06-01
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
- **cacheable? treated nil temperature as zero** — Added `default_temperature` setting (default 1.0). Requests without an explicit temperature now resolve against the default instead of being treated as `0.0`, preventing non-deterministic responses from being served from cache (inference.rb, settings.rb)
|
|
23
|
+
- **ReDoS in infer_tool_name** — Possessive quantifier `\d++` replaced with `\d+` in search detection regex (context/curator.rb)
|
|
24
|
+
- **ReDoS in strip_thinking regex** — `[^#\n][^\n]*` double-character-class pattern created exponential backtracking on long non-heading lines (10k-char lines). Replaced with anchored negative-lookahead variant that matches only lines starting with `#+ Thinking` headings. Benchmarked: pathological input drops from potential timeout to <5ms (context/curator.rb)
|
|
25
|
+
- **Shell injection in dispatch_client_tool** — Added audit logging for all shell commands executed via native client tools (api/native/helpers.rb)
|
|
26
|
+
- **Path traversal in file operations** — Added `validate_client_tool_path` that constrains file_read/file_write/file_edit to working directory, rejecting paths that escape via `..` (api/native/helpers.rb)
|
|
27
|
+
- **Text block concatenation loss** — Anthropic translator now joins assistant text parts with `\n\n` separator instead of empty string (api/translators/anthropic_request.rb)
|
|
28
|
+
- **Raw part leak in responses_content_part** — LexLLMAdapter no longer returns unnormalized/unsanitized parts; unknown types are converted to `input_text` with serialized content (call/lex_llm_adapter.rb)
|
|
29
|
+
- **Metering failures silently dropped** — step_metering now attempts `Metering.spool_event` on publish failure so billing events are spooled to disk instead of lost (inference/executor.rb)
|
|
30
|
+
- **Error category extraction always nil** — `extract_error_category_from_attempt` now handles string failures and hash `:error` keys in addition to `:category` (inference.rb)
|
|
31
|
+
- **provider_scoped_instance false negatives** — Now checks `Registry.instances_for` before returning nil, only drops instance when provider has other registered instances (inference/executor.rb)
|
|
32
|
+
- **build_fallback_resolutions double-exclusion** — Merged two separate exclusion checks into single predicate; `exclude_instance: nil` now only excludes the specified provider+instance combo (inference/executor.rb)
|
|
33
|
+
- **find_fallback_provider hardcoded local exclusion** — ollama/vllm fallback exclusion is now configurable via `fallback.allow_local` setting instead of being permanently blocked (inference/executor.rb)
|
|
34
|
+
- **extract_content double transform_keys** — Normalizes block keys once up front instead of calling transform_keys 2-3 times per block (api/translators/openai_request.rb)
|
|
35
|
+
- **@pending_tool_history data race** — Tool history mutations in step_tool_calls now wrapped in `@pending_tool_history_mutex.synchronize` to match executor's async event emission (inference/steps/tool_calls.rb)
|
|
36
|
+
- **Client passthrough tool events never emitted** — `client_passthrough_tool_loop_result` now emits both `emit_tool_call_event` and `emit_tool_result_event` for passthrough tools so they appear in `@pending_tool_history`, fire `@tool_event_handler` callbacks, and generate tool audit events (inference/steps/tool_calls.rb)
|
|
37
|
+
- **Thinking tag pattern divergence** — Executor's `strip_thinking_from_history` only handled `<thinking>`/`<think(?:ing)?>` (Anthropic/long form) but not short `<think>` (DeepSeek, Qwen, Ollama, vLLM) or `<thought>` (various models). Added `THINKING_TAG_PATTERN_SHORT` and `THINKING_TAG_PATTERN_THOUGHT` constants, applied all three gsubs so all thinking block variants are stripped before dispatch on every turn (inference/executor.rb)
|
|
38
|
+
- **Thinking tag stripping corrupted passthrough content** — Unanchored regex in `strip_thinking_from_history` and `strip_thinking_tags` treated backtick-quoted or mid-content `<think>`/`</think>` references as real thinking blocks, deleting content between them. Replaced regex with string-based `start_with?`/`index` approach that only strips tags at the beginning of a message where providers actually emit them (inference/executor.rb, context/curator.rb)
|
|
39
|
+
- **ToolResultEvent unresolved constant** — `client_passthrough_tool_loop_result` referenced `ToolResultEvent` without namespace; Ruby's lexical constant lookup failed in the included module. Qualified as `Executor::ToolResultEvent` (inference/steps/tool_calls.rb)
|
|
40
|
+
- **client_tool_methods_spec sandbox failure** — Tests created temp files in `/tmp` which `validate_client_tool_path` correctly rejects. Moved to project-relative `tmp/` directory (spec/api/native/client_tool_methods_spec.rb)
|
|
41
|
+
|
|
42
|
+
### Changed
|
|
43
|
+
- **Removed `llm_setting` abstraction** — All `llm_setting(:key)` calls replaced with direct `Legion::Settings[:llm][:key]` access. The indirection obscured that settings must flow through `Legion::Settings` to pick up dynamic user overrides. Affected: inference/executor.rb, inference.rb, inference/native_tool_loop.rb, inference/prompt.rb
|
|
44
|
+
- **`fallback.allow_local` defaults to true** — Local providers (ollama/vllm) are now allowed as fallback targets by default instead of being permanently excluded (settings.rb)
|
|
45
|
+
- **Text join separator** — OpenAI translator `extract_content` text blocks now join with `\n\n` instead of empty string for consistency
|
|
46
|
+
|
|
47
|
+
## [0.11.1] - 2026-06-01
|
|
48
|
+
|
|
49
|
+
### Fixed
|
|
50
|
+
- **Embedding instance selection honored** — Discovery honors a configured `embedding.instance` pin over a higher-tier-ranked empty instance (and skips empty candidates whose resolved model is absent), and `Embeddings.generate`/`generate_batch` resolve that configured instance on the dispatch path instead of falling back to the provider default
|
|
51
|
+
|
|
52
|
+
## [0.11.0] - 2026-05-31
|
|
53
|
+
|
|
54
|
+
### Added
|
|
55
|
+
- **Comprehensive diagnostic logging** — 28 files across executor, pipeline steps, tool loop, context, router, quality checker with structured `[llm][component] action=verb key=value` format at appropriate severity levels (debug/info/warn)
|
|
56
|
+
- **Context window enforcement** — Pre-dispatch compaction triggers at 90% of model's context window, preserving recent turns and aggressively compacting older history
|
|
57
|
+
- **Tool result trimming** — Oversized tool results from prior turns trimmed to 4000 chars before dispatch (current turn preserved in full)
|
|
58
|
+
- **Thinking block stripping** — Historical `<think>` blocks removed from prior assistant turns before dispatch
|
|
59
|
+
- **Empty response guard** — Streaming responses with no text and no tool calls emit `overloaded_error` instead of valid empty message, triggering client retry
|
|
60
|
+
- **System prompt: no tool call limit** — Added instruction telling models there is no tool call limit per turn
|
|
61
|
+
- **Conversation ID always generated** — API handler generates conv_id when client doesn't provide one; returned via `X-Legion-Conversation-Id` header
|
|
62
|
+
- **Metering spool encryption** — Spool file encrypts via `Legion::Crypt` when `:compliance, :encrypt_spool` enabled
|
|
63
|
+
- **Audit publisher improvements** — Preserves caller identity, includes agent_id/node_id, extracts provider metrics, hashes truncated conversations
|
|
64
|
+
|
|
65
|
+
### Fixed
|
|
66
|
+
- **Quality checker ignores tool-use responses** — No longer flags empty_response when model returns tool calls with no text content
|
|
67
|
+
- **Confidence scoring skips tool-use** — Score=0.0 no longer reported for valid tool call responses
|
|
68
|
+
- **Context overflow doesn't trip circuit breaker** — ContextLengthExceededError no longer reports `:error` signal to health tracker
|
|
69
|
+
- **HealthTracker deadlock prevention** — `Mutex` replaced with `Monitor` (reentrant) to prevent deadlock when custom handlers call back into report/adjustment
|
|
70
|
+
- **Thread pool fallback policy** — Chat/batch pools use `:caller_runs` instead of `:abort` (no silent request drops under load)
|
|
71
|
+
- **Bare Thread.new eliminated** — All async work uses managed `ASYNC_THREAD_POOL` with `at_exit` shutdown hooks
|
|
72
|
+
- **Conversation#replace preserves internal roles** — `__metadata__` and `__curated__` entries no longer wiped on replace
|
|
73
|
+
- **EscalationChain method naming** — `padded_resolutions` renamed to `capped_resolutions` (it truncates, not pads)
|
|
74
|
+
- **trigger_tool_limit default mismatch** — Fallback default fixed from 50 to 25 to match settings.rb
|
|
75
|
+
- **Debate extract_question string keys** — `m[:role] == :user` changed to `.to_s == 'user'` for mixed-key messages
|
|
76
|
+
- **EnrichmentInjector nil safety** — `enrichments` param defaults to `{}` when nil
|
|
77
|
+
- **Stop reason preserved from provider** — `message_response` and `chunk_response` extract actual `finish_reason` from raw provider response instead of discarding
|
|
78
|
+
- **OpenAI streaming usage stats** — Always included in final chunk (was gated behind `include_reasoning`)
|
|
79
|
+
- **Metering identity** — Uses caller identity from request, not process publisher identity
|
|
80
|
+
- **Metering request_type** — Derived from request metadata (image/audio/chat), not hardcoded 'chat'
|
|
81
|
+
- **Metering actual cost** — Prefers provider-reported cost over local estimate
|
|
82
|
+
- **Metering encryption** — `encrypt?` respects `:compliance, :encrypt_metering` setting
|
|
83
|
+
- **Audit identity clobbering** — `attributed_event` uses `||=` to preserve caller identity
|
|
84
|
+
- **Audit step order** — `post_response` (audit) now runs before `metering` (financial records need supporting evidence)
|
|
85
|
+
- **Audit tool spooling** — Failed tool audit events spool to disk instead of silent drop
|
|
86
|
+
- **Audit timeline** — Preserves RBAC, classification, billing, confidence decisions
|
|
87
|
+
- **Budget cap** — Pre-flight check estimates output tokens (assumes output ≈ input) instead of `output_tokens: 0`
|
|
88
|
+
- **Embeddings audit** — POST /v1/embeddings now emits audit event
|
|
89
|
+
- **Native chat audit** — Async chat path emits `Audit.emit_prompt` after completion
|
|
90
|
+
- **Knowledge capture embedding** — Truncates content to 2000 chars before embedding to prevent ContextLengthExceededError
|
|
91
|
+
- **Dedup performance** — O(n²) → O(n×20) via sliding window comparison
|
|
92
|
+
- **22 silent rescue swallows** — All `rescue StandardError` without variable capture now log at debug level
|
|
93
|
+
|
|
94
|
+
### Changed
|
|
95
|
+
- **max_tool_calls_per_turn: 50** — New setting (was dead `MAX_TOOL_LOOPS = 10` constant); deferred tool calls get error result telling model to retry
|
|
96
|
+
- **max_tool_rounds** — Removed `MAX_NATIVE_TOOL_ROUNDS` constant; reads directly from settings
|
|
97
|
+
- **Settings-driven limits** — Redundant fallback defaults removed from `llm_setting` call sites
|
|
98
|
+
|
|
99
|
+
## [0.10.4] - 2026-05-31
|
|
100
|
+
|
|
101
|
+
### Fixed
|
|
102
|
+
- **TRANSLATION-BUG-01**: Anthropic `tool_result` content blocks preserved as arrays — multimodal tool results (images) no longer flattened to string.
|
|
103
|
+
- **TRANSLATION-BUG-03**: Anthropic `stop_reason` properly maps `content_filter`; distinguishes `stop` with/without `stop_sequence`.
|
|
104
|
+
- **TRANSLATION-BUG-04**: OpenAI `map_finish_reason` returns `error` for unknown stop reasons instead of `stop` (errors no longer disguised as success).
|
|
105
|
+
- **TRANSLATION-BUG-05**: OpenAI `extract_content` preserves `image_url` and non-text content parts — vision input no longer silently dropped.
|
|
106
|
+
- **TRANSLATION-BUG-06**: Anthropic streaming `content_block_start` includes tool arguments in `input` field (was empty `{}`).
|
|
107
|
+
- **TRANSLATION-BUG-09**: Anthropic system prompt `cache_control` metadata preserved when present — prompt caching no longer silently disabled.
|
|
108
|
+
- **TRANSLATION-BUG-10**: Stable `tool_call_id` generated when OpenAI client sends nil — multi-turn tool chains no longer break.
|
|
109
|
+
- **TRANSLATION-BUG-11**: OpenAI translator uses symbol roles (`:user`, `:assistant`) matching Anthropic — executor symbol comparisons now work.
|
|
110
|
+
- **TRANSLATION-BUG-12**: Unsupported OpenAI tool types (`code_interpreter`, `file_search`) logged at debug instead of silent drop.
|
|
111
|
+
|
|
112
|
+
## [0.10.3] - 2026-05-31
|
|
113
|
+
|
|
114
|
+
### Fixed
|
|
115
|
+
- **DaemonClient HTTPS support** — `http_get` and `http_post` now set `http.use_ssl = true` when the daemon URL scheme is `https://`. Previously, all daemon communication was plain HTTP, silently failing for HTTPS URLs or sending credentials in cleartext.
|
|
116
|
+
- **Context compression guard against preserve_recent: 0** — `auto_compact` now enforces a minimum `preserve_recent` of 1. A value of 0 would compact the entire conversation including the latest messages, producing empty context.
|
|
117
|
+
- **Context curator thread safety** — `curate_turn` and `curated_messages` now synchronize on a per-instance `@curation_mutex`. Concurrent turns could race on `@curated_messages`, causing stale or nil curation state.
|
|
118
|
+
- **Recursive compaction guard** — `maybe_compact_history` now uses `Thread.current[:legion_compacting]` to prevent infinite recursion when `Context::Compressor.auto_compact` triggers its own LLM summarization call, which would recursively trigger compaction again.
|
|
119
|
+
- **Metering::Tokens unbounded memory growth** — `TokenTracker#record` now evicts oldest entries when the store exceeds `MAX_ENTRIES` (10,000). Long-running high-throughput processes would leak memory.
|
|
120
|
+
- **Tool timeline index per-call resolution** — `build_tool_timeline_index` now tracks per-tool-name call counts and produces keys like `"read_file:2"` for repeated calls. `build_response_tool_calls` matches each tool call to its corresponding timeline entry, fixing wrong duration/status when the same tool is called multiple times in a round.
|
|
121
|
+
- **Streaming escalation quality bypass documented** — Added explicit comment noting that streaming escalation attempts always pass quality check because in-flight stream quality-checking is not supported.
|
|
122
|
+
|
|
3
123
|
## [0.10.2] - 2026-05-30
|
|
4
124
|
|
|
5
125
|
### Fixed
|
data/Gemfile
CHANGED
|
@@ -12,7 +12,9 @@ group :test do
|
|
|
12
12
|
if Dir.exist?(lex_llm_path)
|
|
13
13
|
gem 'lex-llm', path: lex_llm_path
|
|
14
14
|
else
|
|
15
|
-
gem 'lex-llm'
|
|
15
|
+
# TEMP (revert to `gem 'lex-llm'` once 0.4.16 is published): track lex-llm PR #16, which
|
|
16
|
+
# adds the fleet TokenValidator verify_issuer + WorkerExecution policy-warn behavior these specs require.
|
|
17
|
+
gem 'lex-llm', git: 'https://github.com/LegionIO/lex-llm.git', branch: 'fix/audit-fleet-security'
|
|
16
18
|
end
|
|
17
19
|
|
|
18
20
|
%w[
|
data/lib/legion/llm/api/auth.rb
CHANGED
|
@@ -37,7 +37,7 @@ module Legion
|
|
|
37
37
|
|
|
38
38
|
app.helpers do
|
|
39
39
|
define_method(:auth_enabled?) do
|
|
40
|
-
Legion::
|
|
40
|
+
Legion::Settings.dig(:llm, :api, :auth, :enabled) == true
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
define_method(:extract_token) do |req|
|
|
@@ -56,7 +56,7 @@ module Legion
|
|
|
56
56
|
return true unless auth_enabled?
|
|
57
57
|
return false if token.nil? || token.empty?
|
|
58
58
|
|
|
59
|
-
keys = Legion::
|
|
59
|
+
keys = Legion::Settings.dig(:llm, :api, :auth, :api_keys) || []
|
|
60
60
|
keys.include?(token)
|
|
61
61
|
end
|
|
62
62
|
end
|
|
@@ -201,7 +201,8 @@ module Legion
|
|
|
201
201
|
def files_storage_path
|
|
202
202
|
configured = begin
|
|
203
203
|
Legion::Settings.dig(:llm, :files, :storage_path)
|
|
204
|
-
rescue StandardError
|
|
204
|
+
rescue StandardError => e
|
|
205
|
+
log.debug "[llm][api][anthropic][files] action=files_storage_path_fallback error=#{e.class} message=#{e.message}"
|
|
205
206
|
nil
|
|
206
207
|
end
|
|
207
208
|
base = configured.to_s.empty? ? ::File.join(Dir.home, '.legionio', 'data', 'files') : configured
|
|
@@ -34,7 +34,7 @@ module Legion
|
|
|
34
34
|
tool_defs = build_tool_definitions(normalized[:tools] || [], executable: false)
|
|
35
35
|
modality = detect_modality(normalized[:messages])
|
|
36
36
|
|
|
37
|
-
conv_id = env['HTTP_X_LEGION_CONVERSATION_ID'] || body[:conversation_id]
|
|
37
|
+
conv_id = env['HTTP_X_LEGION_CONVERSATION_ID'] || body[:conversation_id] || "conv_#{SecureRandom.hex(8)}"
|
|
38
38
|
ext_provider = env['HTTP_X_LEGION_PROVIDER'] || body[:provider]
|
|
39
39
|
ext_tier = env['HTTP_X_LEGION_TIER'] || body[:tier]
|
|
40
40
|
ext_instance = env['HTTP_X_LEGION_INSTANCE'] || body[:instance]
|
|
@@ -78,7 +78,8 @@ module Legion
|
|
|
78
78
|
|
|
79
79
|
if streaming
|
|
80
80
|
content_type 'text/event-stream'
|
|
81
|
-
headers 'Cache-Control' => 'no-cache', 'Connection' => 'keep-alive',
|
|
81
|
+
headers 'Cache-Control' => 'no-cache', 'Connection' => 'keep-alive',
|
|
82
|
+
'X-Accel-Buffering' => 'no', 'X-Legion-Conversation-Id' => conv_id
|
|
82
83
|
|
|
83
84
|
stream do |out|
|
|
84
85
|
full_text = +''
|
|
@@ -90,7 +91,7 @@ module Legion
|
|
|
90
91
|
id: request_id, type: 'message', role: 'assistant',
|
|
91
92
|
content: [], model: model.to_s,
|
|
92
93
|
stop_reason: nil, stop_sequence: nil,
|
|
93
|
-
usage: { input_tokens:
|
|
94
|
+
usage: { input_tokens: est_tokens, output_tokens: 0 }
|
|
94
95
|
}
|
|
95
96
|
})}\n\n"
|
|
96
97
|
|
|
@@ -137,6 +138,16 @@ module Legion
|
|
|
137
138
|
"tool_calls=#{tool_calls.size} stop_reason=#{stop_reason} " \
|
|
138
139
|
"text_block_opened=#{text_block_opened} full_text_length=#{full_text.length}"
|
|
139
140
|
|
|
141
|
+
if tool_calls.empty? && full_text.empty?
|
|
142
|
+
log.warn "[llm][api][anthropic] action=empty_response request_id=#{request_id} " \
|
|
143
|
+
"model=#{model} text_block_opened=#{text_block_opened} — provider returned no content, signaling overloaded"
|
|
144
|
+
out << "event: error\ndata: #{Legion::JSON.dump({
|
|
145
|
+
type: 'error', error: { type: 'overloaded_error',
|
|
146
|
+
message: 'Model returned empty response. Please retry.' }
|
|
147
|
+
})}\n\n"
|
|
148
|
+
next
|
|
149
|
+
end
|
|
150
|
+
|
|
140
151
|
if text_block_opened
|
|
141
152
|
out << "event: content_block_stop\ndata: #{Legion::JSON.dump({ type: 'content_block_stop', index: 0 })}\n\n"
|
|
142
153
|
content_index = 1
|
|
@@ -158,7 +169,8 @@ module Legion
|
|
|
158
169
|
out << "event: message_delta\ndata: #{Legion::JSON.dump({
|
|
159
170
|
type: 'message_delta',
|
|
160
171
|
delta: { stop_reason: stop_reason, stop_sequence: nil },
|
|
161
|
-
usage: {
|
|
172
|
+
usage: { input_tokens: translator.token_count(tokens, :input),
|
|
173
|
+
output_tokens: translator.token_count(tokens, :output) }
|
|
162
174
|
})}\n\n"
|
|
163
175
|
out << "event: message_stop\ndata: #{Legion::JSON.dump({ type: 'message_stop' })}\n\n"
|
|
164
176
|
log.info "[llm][api][anthropic] action=stream_complete request_id=#{request_id} stop_reason=#{stop_reason}"
|
|
@@ -172,6 +184,7 @@ module Legion
|
|
|
172
184
|
pipeline_response, model: model, request_id: request_id
|
|
173
185
|
)
|
|
174
186
|
|
|
187
|
+
headers 'X-Legion-Conversation-Id' => conv_id
|
|
175
188
|
content_type :json
|
|
176
189
|
status 200
|
|
177
190
|
Legion::JSON.dump(formatted)
|
|
@@ -42,7 +42,8 @@ module Legion
|
|
|
42
42
|
|
|
43
43
|
def data_subsystem_available?
|
|
44
44
|
defined?(Legion::Data) && Legion::Data.respond_to?(:connected?) && Legion::Data.connected?
|
|
45
|
-
rescue StandardError
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
log.debug "[llm][api][namespaces][helpers] action=data_subsystem_check_fallback error=#{e.class} message=#{e.message}"
|
|
46
47
|
false
|
|
47
48
|
end
|
|
48
49
|
end
|
|
@@ -16,7 +16,7 @@ module Legion
|
|
|
16
16
|
|
|
17
17
|
ASYNC_POOL = Concurrent::FixedThreadPool.new(
|
|
18
18
|
[4, (Concurrent.processor_count / 2)].max,
|
|
19
|
-
fallback_policy: :
|
|
19
|
+
fallback_policy: :caller_runs
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
# Ensure the thread pool is shut down cleanly when the process exits.
|
|
@@ -66,6 +66,14 @@ module Legion
|
|
|
66
66
|
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
67
67
|
}
|
|
68
68
|
)
|
|
69
|
+
Legion::LLM::Audit.emit_prompt(
|
|
70
|
+
request_id: request_id,
|
|
71
|
+
caller: { requested_by: { identity: 'api:chat:async', type: :external } },
|
|
72
|
+
routing: { model: session.model.to_s, provider: provider },
|
|
73
|
+
tokens: { input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : 0,
|
|
74
|
+
output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : 0 },
|
|
75
|
+
timestamp: Time.now
|
|
76
|
+
)
|
|
69
77
|
log.debug("[llm][api][namespaces][chat] action=async_complete request_id=#{request_id}")
|
|
70
78
|
rescue StandardError => e
|
|
71
79
|
handle_exception(e, level: :error, handled: true, operation: 'llm.api.chat.async', request_id: request_id)
|
|
@@ -36,7 +36,8 @@ module Legion
|
|
|
36
36
|
def self.capable_provider_available?
|
|
37
37
|
instances = begin
|
|
38
38
|
Legion::LLM::Call::Registry.all_instances
|
|
39
|
-
rescue StandardError
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
log.debug "[llm][api][openai][audio][speech] action=registry_fallback error=#{e.class} message=#{e.message}"
|
|
40
41
|
[]
|
|
41
42
|
end
|
|
42
43
|
instances.any? do |entry|
|
|
@@ -94,7 +95,8 @@ module Legion
|
|
|
94
95
|
else
|
|
95
96
|
begin
|
|
96
97
|
Legion::JSON.load(raw)
|
|
97
|
-
rescue StandardError
|
|
98
|
+
rescue StandardError => e
|
|
99
|
+
log.debug "[llm][api][openai][audio][speech] action=parse_body_fallback error=#{e.class} message=#{e.message}"
|
|
98
100
|
{}
|
|
99
101
|
end
|
|
100
102
|
end
|
|
@@ -25,7 +25,8 @@ module Legion
|
|
|
25
25
|
def self.capable_provider_available?
|
|
26
26
|
instances = begin
|
|
27
27
|
Legion::LLM::Call::Registry.all_instances
|
|
28
|
-
rescue StandardError
|
|
28
|
+
rescue StandardError => e
|
|
29
|
+
log.debug "[llm][api][openai][audio][transcriptions] action=registry_fallback error=#{e.class} message=#{e.message}"
|
|
29
30
|
[]
|
|
30
31
|
end
|
|
31
32
|
instances.any? do |entry|
|
|
@@ -25,7 +25,8 @@ module Legion
|
|
|
25
25
|
def self.capable_provider_available?
|
|
26
26
|
instances = begin
|
|
27
27
|
Legion::LLM::Call::Registry.all_instances
|
|
28
|
-
rescue StandardError
|
|
28
|
+
rescue StandardError => e
|
|
29
|
+
log.debug "[llm][api][openai][audio][translations] action=registry_fallback error=#{e.class} message=#{e.message}"
|
|
29
30
|
[]
|
|
30
31
|
end
|
|
31
32
|
instances.any? do |entry|
|
|
@@ -23,8 +23,8 @@ module Legion
|
|
|
23
23
|
|
|
24
24
|
BATCH_POOL_MUTEX.synchronize do
|
|
25
25
|
@batch_pool ||= begin
|
|
26
|
-
pool_size = Legion::
|
|
27
|
-
Concurrent::FixedThreadPool.new(pool_size, fallback_policy: :
|
|
26
|
+
pool_size = Legion::Settings[:llm][:api][:batch_pool_size] || 4
|
|
27
|
+
Concurrent::FixedThreadPool.new(pool_size, fallback_policy: :caller_runs)
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
30
|
end
|
|
@@ -237,7 +237,8 @@ module Legion
|
|
|
237
237
|
|
|
238
238
|
::File.readlines(file_path).filter_map do |line|
|
|
239
239
|
Legion::JSON.load(line.strip)
|
|
240
|
-
rescue StandardError
|
|
240
|
+
rescue StandardError => e
|
|
241
|
+
log.debug "[llm][api][openai][batches] action=load_batch_line_fallback file=#{file_id} error=#{e.class} message=#{e.message}"
|
|
241
242
|
nil
|
|
242
243
|
end
|
|
243
244
|
end
|
|
@@ -31,7 +31,7 @@ module Legion
|
|
|
31
31
|
|
|
32
32
|
request_id = body[:request_id] || SecureRandom.uuid
|
|
33
33
|
normalized = Legion::LLM::API::Translators::OpenAIRequest.normalize(body)
|
|
34
|
-
model = normalized[:model] || Legion::
|
|
34
|
+
model = normalized[:model] || Legion::Settings[:llm][:default_model] || 'default'
|
|
35
35
|
streaming = normalized[:stream] == true
|
|
36
36
|
include_reasoning = body[:include_reasoning] == true || body[:include_thinking] == true
|
|
37
37
|
tool_decls = Completions.build_tool_declarations(normalized[:tools])
|
|
@@ -231,8 +231,7 @@ module Legion
|
|
|
231
231
|
end
|
|
232
232
|
|
|
233
233
|
def self.append_usage_stats(done_chunk, pipeline_response, include_reasoning)
|
|
234
|
-
|
|
235
|
-
|
|
234
|
+
_ = include_reasoning
|
|
236
235
|
tokens = pipeline_response.tokens || {}
|
|
237
236
|
oai = Legion::LLM::API::Translators::OpenAIResponse
|
|
238
237
|
input_count = oai.extract_token_count(tokens, :input).to_i
|
|
@@ -27,7 +27,7 @@ module Legion
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
request_id = SecureRandom.uuid
|
|
30
|
-
model = body[:model] || Legion::
|
|
30
|
+
model = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
|
|
31
31
|
messages = [{ role: 'user', content: prompt.to_s }]
|
|
32
32
|
|
|
33
33
|
log.info("[llm][api][namespaces][openai][completions] action=accepted request_id=#{request_id} model=#{model}")
|
|
@@ -15,11 +15,11 @@ module Legion
|
|
|
15
15
|
def self.registered(app)
|
|
16
16
|
log.debug('[llm][api][namespaces][openai][embeddings] registering routes')
|
|
17
17
|
|
|
18
|
-
app.post '/v1/embeddings' do
|
|
18
|
+
app.post '/v1/embeddings' do # rubocop:disable Metrics/BlockLength
|
|
19
19
|
require_llm!
|
|
20
20
|
body = parse_request_body
|
|
21
21
|
input = body[:input]
|
|
22
|
-
model = body[:model] || Legion::
|
|
22
|
+
model = body[:model] || Legion::Settings[:llm][:default_model]
|
|
23
23
|
|
|
24
24
|
if input.nil? || (input.respond_to?(:empty?) && input.empty?)
|
|
25
25
|
return openai_error('input is required', type: 'invalid_request_error',
|
|
@@ -43,6 +43,16 @@ module Legion
|
|
|
43
43
|
)
|
|
44
44
|
|
|
45
45
|
log.info("[llm][api][namespaces][openai][embeddings] action=complete model=#{model} dims=#{vector_array.size}")
|
|
46
|
+
|
|
47
|
+
Legion::LLM::Audit.emit_prompt(
|
|
48
|
+
request_id: SecureRandom.uuid,
|
|
49
|
+
caller: build_server_caller(source: 'openai_embeddings', path: request.path, env: env),
|
|
50
|
+
routing: { model: model, provider: 'embed' },
|
|
51
|
+
tokens: { input_tokens: (text.length / 4.0).ceil, output_tokens: 0 },
|
|
52
|
+
request_type: 'embedding',
|
|
53
|
+
timestamp: Time.now
|
|
54
|
+
)
|
|
55
|
+
|
|
46
56
|
content_type :json
|
|
47
57
|
Legion::JSON.dump(response_body)
|
|
48
58
|
rescue Legion::LLM::AuthError => e
|
|
@@ -53,12 +53,14 @@ module Legion
|
|
|
53
53
|
file_id = "file-#{SecureRandom.hex(16)}"
|
|
54
54
|
filename = begin
|
|
55
55
|
uploaded[:filename] || uploaded.original_filename
|
|
56
|
-
rescue StandardError
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
log.debug "[llm][api][openai][files] action=filename_fallback error=#{e.class} message=#{e.message}"
|
|
57
58
|
'upload.bin'
|
|
58
59
|
end
|
|
59
60
|
data = begin
|
|
60
61
|
uploaded[:tempfile]&.read || uploaded.read
|
|
61
|
-
rescue StandardError
|
|
62
|
+
rescue StandardError => e
|
|
63
|
+
log.debug "[llm][api][openai][files] action=file_read_fallback error=#{e.class} message=#{e.message}"
|
|
62
64
|
''
|
|
63
65
|
end
|
|
64
66
|
|
|
@@ -23,7 +23,8 @@ module Legion
|
|
|
23
23
|
def self.capable_provider_available?(capability)
|
|
24
24
|
instances = begin
|
|
25
25
|
Legion::LLM::Call::Registry.all_instances
|
|
26
|
-
rescue StandardError
|
|
26
|
+
rescue StandardError => e
|
|
27
|
+
log.debug "[llm][api][openai][images] action=registry_fallback capability=#{capability} error=#{e.class} message=#{e.message}"
|
|
27
28
|
[]
|
|
28
29
|
end
|
|
29
30
|
instances.any? do |entry|
|
|
@@ -76,7 +77,8 @@ module Legion
|
|
|
76
77
|
|
|
77
78
|
Legion::JSON.load(raw)
|
|
78
79
|
end
|
|
79
|
-
rescue StandardError
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
log.debug "[llm][api][openai][images] action=parse_media_body_fallback error=#{e.class} message=#{e.message}"
|
|
80
82
|
{}
|
|
81
83
|
end
|
|
82
84
|
|
|
@@ -116,7 +118,7 @@ module Legion
|
|
|
116
118
|
Legion::JSON.dump({ error: { message: 'prompt is required', type: 'invalid_request_error', code: nil } })
|
|
117
119
|
end
|
|
118
120
|
|
|
119
|
-
model = (body[:model] || body['model'] || Legion::
|
|
121
|
+
model = (body[:model] || body['model'] || Legion::Settings[:llm][:default_model] || 'dall-e-3').to_s
|
|
120
122
|
n = [(body[:n] || body['n'] || 1).to_i, 1].max
|
|
121
123
|
size = (body[:size] || body['size'] || '1024x1024').to_s
|
|
122
124
|
quality = (body[:quality] || body['quality'] || 'standard').to_s
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require 'time'
|
|
4
4
|
require 'legion/logging/helper'
|
|
5
5
|
require 'legion/llm/api/namespaces/helpers'
|
|
6
|
+
require 'legion/llm/api/native/models'
|
|
6
7
|
require 'legion/llm/api/translators/openai_response'
|
|
7
8
|
|
|
8
9
|
module Legion
|
|
@@ -81,10 +82,14 @@ module Legion
|
|
|
81
82
|
end
|
|
82
83
|
|
|
83
84
|
def self.build_openai_model_list
|
|
84
|
-
|
|
85
|
+
offerings = Legion::LLM::Inventory.offerings(type: :inference)
|
|
86
|
+
offerings = Legion::LLM::API::Native::Models.with_auto_routing_offering(offerings, {})
|
|
87
|
+
|
|
88
|
+
models = offerings.map do |offering|
|
|
85
89
|
Legion::LLM::API::Translators::OpenAIResponse.format_model_object(
|
|
86
90
|
offering[:model],
|
|
87
|
-
owned_by: offering[:provider_family]
|
|
91
|
+
owned_by: offering[:provider_family],
|
|
92
|
+
limits: offering[:limits]
|
|
88
93
|
)
|
|
89
94
|
end
|
|
90
95
|
seen = {}
|
|
@@ -99,12 +104,15 @@ module Legion
|
|
|
99
104
|
end
|
|
100
105
|
|
|
101
106
|
def self.openai_to_anthropic_model(openai_model)
|
|
102
|
-
{
|
|
107
|
+
model = {
|
|
108
|
+
type: 'model',
|
|
103
109
|
id: openai_model[:id],
|
|
104
110
|
display_name: openai_model[:id],
|
|
105
|
-
created_at: Time.at(openai_model[:created] || Time.now.to_i).utc.strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
106
|
-
type: 'model'
|
|
111
|
+
created_at: Time.at(openai_model[:created] || Time.now.to_i).utc.strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
107
112
|
}
|
|
113
|
+
model[:max_input_tokens] = openai_model[:context_window] if openai_model[:context_window]
|
|
114
|
+
model[:max_tokens] = openai_model[:max_output_tokens] if openai_model[:max_output_tokens]
|
|
115
|
+
model
|
|
108
116
|
end
|
|
109
117
|
end
|
|
110
118
|
end
|
|
@@ -36,7 +36,7 @@ module Legion
|
|
|
36
36
|
|
|
37
37
|
messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
|
|
38
38
|
|
|
39
|
-
model = body[:model] || Legion::
|
|
39
|
+
model = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
|
|
40
40
|
streaming = body[:stream] == true
|
|
41
41
|
tool_decls = Responses.build_tool_declarations(body[:tools])
|
|
42
42
|
|
|
@@ -52,22 +52,34 @@ module Legion
|
|
|
52
52
|
end
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
def validate_client_tool_path(path)
|
|
56
|
+
return 'file operation error: path is required' if path.nil? || path.to_s.empty?
|
|
57
|
+
|
|
58
|
+
expanded = ::File.expand_path(path)
|
|
59
|
+
sandbox_root = ::File.expand_path(Dir.pwd)
|
|
60
|
+
|
|
61
|
+
return "file operation error: path '#{path}' escapes working directory #{sandbox_root}" unless expanded.start_with?(sandbox_root)
|
|
62
|
+
|
|
63
|
+
expanded
|
|
64
|
+
end
|
|
65
|
+
|
|
55
66
|
def dispatch_client_tool(ref, **kwargs) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
|
56
67
|
case ref
|
|
57
68
|
when 'sh'
|
|
58
69
|
cmd = kwargs[:command] || kwargs[:cmd] || kwargs.values.first.to_s
|
|
70
|
+
log.warn("[llm][native] client_tool=sh command=#{cmd[0, 120]}")
|
|
59
71
|
output, status = ::Open3.capture2e(cmd, chdir: Dir.pwd)
|
|
60
72
|
"exit=#{status.exitstatus}\n#{output}"
|
|
61
73
|
when 'file_read'
|
|
62
|
-
path = kwargs[:path] || kwargs[:file_path] || kwargs.values.first.to_s
|
|
74
|
+
path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path] || kwargs.values.first.to_s)
|
|
63
75
|
read_client_file(path)
|
|
64
76
|
when 'file_write'
|
|
65
|
-
path = kwargs[:path] || kwargs[:file_path]
|
|
77
|
+
path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path])
|
|
66
78
|
content = kwargs[:content] || kwargs[:contents]
|
|
67
79
|
::File.write(path, content)
|
|
68
80
|
"Written #{content.to_s.bytesize} bytes to #{path}"
|
|
69
81
|
when 'file_edit'
|
|
70
|
-
path = kwargs[:path] || kwargs[:file_path]
|
|
82
|
+
path = validate_client_tool_path(kwargs[:path] || kwargs[:file_path])
|
|
71
83
|
old_text = kwargs[:old_text] || kwargs[:search]
|
|
72
84
|
new_text = kwargs[:new_text] || kwargs[:replace]
|
|
73
85
|
return 'file_edit error: old_text is required' if old_text.nil? || old_text.empty?
|
|
@@ -374,7 +386,8 @@ module Legion
|
|
|
374
386
|
return raw_args unless raw_args.is_a?(String)
|
|
375
387
|
|
|
376
388
|
Legion::JSON.parse(raw_args, symbolize_names: true)
|
|
377
|
-
rescue StandardError
|
|
389
|
+
rescue StandardError => e
|
|
390
|
+
log.debug "[llm][api][native][helpers] action=openai_tool_call_arguments_fallback error=#{e.class} message=#{e.message}"
|
|
378
391
|
raw_args
|
|
379
392
|
end
|
|
380
393
|
|
|
@@ -133,6 +133,8 @@ module Legion
|
|
|
133
133
|
end
|
|
134
134
|
|
|
135
135
|
def self.auto_routing_offering
|
|
136
|
+
ctx = Legion::Settings[:llm][:context_window] || 262_144
|
|
137
|
+
max_out = Legion::Settings[:llm][:max_output_tokens] || 16_384
|
|
136
138
|
{
|
|
137
139
|
id: AUTO_ROUTING_OFFERING_ID,
|
|
138
140
|
offering_id: AUTO_ROUTING_OFFERING_ID,
|
|
@@ -148,7 +150,7 @@ module Legion
|
|
|
148
150
|
transport: :internal,
|
|
149
151
|
enabled: true,
|
|
150
152
|
capabilities: AUTO_ROUTING_CAPABILITIES,
|
|
151
|
-
limits: {},
|
|
153
|
+
limits: { context_window: ctx, max_output_tokens: max_out },
|
|
152
154
|
health: { circuit_state: 'available' },
|
|
153
155
|
metadata: { auto_route: true, placeholder: true, display_name: AUTO_ROUTING_MODEL_DISPLAY },
|
|
154
156
|
routing_metadata: { strategy: 'auto' },
|
|
@@ -162,8 +162,8 @@ module Legion
|
|
|
162
162
|
def self.tier_priority
|
|
163
163
|
return Legion::LLM::Router.tier_priority if defined?(Legion::LLM::Router)
|
|
164
164
|
|
|
165
|
-
routing_config = Legion::
|
|
166
|
-
top_level = Legion::
|
|
165
|
+
routing_config = Legion::Settings[:llm][:routing] || {}
|
|
166
|
+
top_level = Legion::Settings[:llm][:tier_order] || nil
|
|
167
167
|
Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
|
|
168
168
|
%w[local direct fleet openai_compat cloud frontier])
|
|
169
169
|
end
|
|
@@ -35,7 +35,7 @@ module Legion
|
|
|
35
35
|
|
|
36
36
|
request_id = body[:request_id] || SecureRandom.uuid
|
|
37
37
|
normalized = Legion::LLM::API::Translators::OpenAIRequest.normalize(body)
|
|
38
|
-
model = normalized[:model] || Legion::
|
|
38
|
+
model = normalized[:model] || Legion::Settings[:llm][:default_model] || 'default'
|
|
39
39
|
streaming = normalized[:stream] == true
|
|
40
40
|
include_reasoning = body[:include_reasoning] == true || body[:include_thinking] == true
|
|
41
41
|
|
|
@@ -26,7 +26,7 @@ module Legion
|
|
|
26
26
|
body = parse_request_body
|
|
27
27
|
|
|
28
28
|
input = body[:input] || body['input']
|
|
29
|
-
model = body[:model] || body['model'] || Legion::
|
|
29
|
+
model = body[:model] || body['model'] || Legion::Settings[:llm][:default_model]
|
|
30
30
|
|
|
31
31
|
if input.nil? || (input.respond_to?(:empty?) && input.empty?)
|
|
32
32
|
halt 400, { 'Content-Type' => 'application/json' },
|
|
@@ -42,7 +42,7 @@ module Legion
|
|
|
42
42
|
|
|
43
43
|
messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
|
|
44
44
|
|
|
45
|
-
model = body[:model] || Legion::
|
|
45
|
+
model = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
|
|
46
46
|
streaming = body[:stream] == true
|
|
47
47
|
|
|
48
48
|
tool_declarations = Responses.build_tool_declarations(body[:tools])
|