legion-llm 0.8.30 → 0.8.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +127 -8
- data/CLAUDE.md +8 -20
- data/Gemfile +22 -0
- data/README.md +95 -103
- data/legion-llm.gemspec +5 -8
- data/lib/legion/llm/api/anthropic/messages.rb +8 -10
- data/lib/legion/llm/api/auth.rb +2 -2
- data/lib/legion/llm/api/native/chat.rb +6 -2
- data/lib/legion/llm/api/native/helpers.rb +54 -76
- data/lib/legion/llm/api/native/inference.rb +3 -9
- data/lib/legion/llm/api/native/instances.rb +90 -0
- data/lib/legion/llm/api/native/models.rb +117 -0
- data/lib/legion/llm/api/native/offerings.rb +75 -0
- data/lib/legion/llm/api/native/providers.rb +70 -32
- data/lib/legion/llm/api/openai/chat_completions.rb +13 -12
- data/lib/legion/llm/api/openai/embeddings.rb +1 -1
- data/lib/legion/llm/api/openai/models.rb +6 -40
- data/lib/legion/llm/api.rb +6 -0
- data/lib/legion/llm/audit.rb +1 -2
- data/lib/legion/llm/cache/response.rb +47 -23
- data/lib/legion/llm/cache.rb +45 -22
- data/lib/legion/llm/call/claude_config_loader.rb +23 -13
- data/lib/legion/llm/call/codex_config_loader.rb +8 -3
- data/lib/legion/llm/call/daemon_client.rb +4 -4
- data/lib/legion/llm/call/dispatch.rb +94 -10
- data/lib/legion/llm/call/embeddings.rb +117 -40
- data/lib/legion/llm/call/lex_llm_adapter.rb +202 -0
- data/lib/legion/llm/call/providers.rb +365 -169
- data/lib/legion/llm/call/structured_output.rb +3 -3
- data/lib/legion/llm/call.rb +0 -1
- data/lib/legion/llm/compat.rb +7 -5
- data/lib/legion/llm/config.rb +8 -12
- data/lib/legion/llm/context/compressor.rb +5 -1
- data/lib/legion/llm/context/curator.rb +7 -5
- data/lib/legion/llm/discovery/ollama.rb +23 -3
- data/lib/legion/llm/discovery/system.rb +1 -1
- data/lib/legion/llm/discovery/vllm.rb +26 -6
- data/lib/legion/llm/discovery.rb +57 -27
- data/lib/legion/llm/fleet/dispatcher.rb +125 -32
- data/lib/legion/llm/fleet/handler.rb +48 -19
- data/lib/legion/llm/fleet/lane.rb +132 -0
- data/lib/legion/llm/fleet/reply_dispatcher.rb +50 -33
- data/lib/legion/llm/fleet.rb +1 -0
- data/lib/legion/llm/helper.rb +3 -9
- data/lib/legion/llm/hooks/budget_guard.rb +1 -3
- data/lib/legion/llm/hooks/rag_guard.rb +2 -4
- data/lib/legion/llm/hooks/reflection.rb +1 -2
- data/lib/legion/llm/inference/audit_publisher.rb +36 -12
- data/lib/legion/llm/inference/enrichment_injector.rb +1 -3
- data/lib/legion/llm/inference/executor.rb +363 -203
- data/lib/legion/llm/inference/prompt.rb +25 -2
- data/lib/legion/llm/inference/response.rb +1 -1
- data/lib/legion/llm/inference/steps/classification.rb +77 -26
- data/lib/legion/llm/inference/steps/debate.rb +44 -23
- data/lib/legion/llm/inference/steps/metering.rb +9 -7
- data/lib/legion/llm/inference/steps/prompt_cache.rb +27 -12
- data/lib/legion/llm/inference/steps/rag_context.rb +42 -21
- data/lib/legion/llm/inference/steps/rbac.rb +1 -1
- data/lib/legion/llm/inference/steps/skill_injector.rb +28 -6
- data/lib/legion/llm/inference/steps/sticky_helpers.rb +33 -6
- data/lib/legion/llm/inference/steps/tier_assigner.rb +28 -9
- data/lib/legion/llm/inference/steps/tool_calls.rb +16 -3
- data/lib/legion/llm/inference/steps/tool_history.rb +5 -1
- data/lib/legion/llm/inference/steps/trigger_match.rb +25 -2
- data/lib/legion/llm/inference.rb +155 -108
- data/lib/legion/llm/inventory.rb +409 -0
- data/lib/legion/llm/metering/tokens.rb +2 -6
- data/lib/legion/llm/metering/tracker.rb +10 -3
- data/lib/legion/llm/metering.rb +31 -9
- data/lib/legion/llm/quality/confidence/scorer.rb +3 -9
- data/lib/legion/llm/quality/shadow_eval.rb +7 -3
- data/lib/legion/llm/router/arbitrage.rb +1 -4
- data/lib/legion/llm/router/gateway_interceptor.rb +4 -5
- data/lib/legion/llm/router/health_tracker.rb +31 -10
- data/lib/legion/llm/router/resolution.rb +26 -9
- data/lib/legion/llm/router/rule.rb +5 -4
- data/lib/legion/llm/router.rb +50 -41
- data/lib/legion/llm/scheduling/batch.rb +1 -4
- data/lib/legion/llm/scheduling.rb +1 -4
- data/lib/legion/llm/settings.rb +128 -5
- data/lib/legion/llm/skills/external_discovery.rb +2 -2
- data/lib/legion/llm/skills.rb +1 -1
- data/lib/legion/llm/tools/confidence.rb +38 -6
- data/lib/legion/llm/tools/dispatcher.rb +58 -1
- data/lib/legion/llm/tools.rb +0 -1
- data/lib/legion/llm/transport/exchanges/fleet.rb +1 -1
- data/lib/legion/llm/transport/message.rb +53 -8
- data/lib/legion/llm/transport/messages/fleet_error.rb +25 -16
- data/lib/legion/llm/transport/messages/fleet_request.rb +45 -1
- data/lib/legion/llm/transport/messages/fleet_response.rb +22 -15
- data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
- data/lib/legion/llm/transport/messages/skill_event.rb +1 -1
- data/lib/legion/llm/transport/messages/tool_event.rb +1 -1
- data/lib/legion/llm/types/tool_definition.rb +54 -0
- data/lib/legion/llm/types.rb +1 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +15 -14
- metadata +27 -70
- data/lib/legion/llm/bedrock_bearer_auth.rb +0 -4
- data/lib/legion/llm/call/bedrock_auth.rb +0 -53
- data/lib/legion/llm/call/bedrock_embeddings.rb +0 -270
- data/lib/legion/llm/inference/mcp_tool_adapter.rb +0 -5
- data/lib/legion/llm/inference/tool_adapter.rb +0 -13
- data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb +0 -102
- data/lib/legion/llm/patches/ruby_llm_vllm.rb +0 -78
- data/lib/legion/llm/tools/adapter.rb +0 -101
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 758dae0c2bb2fba09c683bbcbf29459301ffdfe0239ff37214952e8c334422b0
|
|
4
|
+
data.tar.gz: 841c7d0ae47d825431601edb635bde28f1f7b60f938a008abe1c8d6f2ac5772d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7a14e57a3a2bd88ac05cb83608e64461b710aed5370eb0686830dc705a80019eb7de351279204eb96429e06da167420c744d8ae79518dbeec6d7a69fcec14c7a
|
|
7
|
+
data.tar.gz: d442eeee484806be5f01dd13e212b155d8cb8d839cb50a0ddd9517d013a49675320936927ded888aa4c617ee8d282624dc93b69b814599c097ee6078a3e4409c
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,124 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.47] - 2026-04-29
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Tool-bearing native inference now forwards native tool definitions and runs a bounded native tool loop through `Call::Dispatch` and `Inference::ToolDispatcher`.
|
|
7
|
+
- OpenAI-compatible, Anthropic-compatible, and native API tool declarations now use provider-neutral native tool definitions.
|
|
8
|
+
- Embedding generation and discovery health checks now route through native `Call::Dispatch.dispatch_embed`.
|
|
9
|
+
|
|
10
|
+
## [0.8.46] - 2026-04-29
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- Pipeline native dispatch now rejects tool-bearing requests that lack native tool-loop support instead of silently dropping tools.
|
|
14
|
+
- Escalation exhaustion now raises `EscalationExhausted` consistently when RubyLLM is unavailable, preserving the RubyLLM-absent native failure details in the error message.
|
|
15
|
+
|
|
16
|
+
## [0.8.45] - 2026-04-29
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- Native LLM API now exposes model-offering inventory at `/api/llm/offerings` and provider-instance inventory at `/api/llm/instances`, matching the routing redesign metadata surface.
|
|
20
|
+
|
|
21
|
+
## [0.8.44] - 2026-04-28
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
- Native LLM API caller metadata now uses the unified `Legion::Identity::Request`/`Process` identity path instead of ad-hoc request waterfalls.
|
|
25
|
+
- Identity broker credential lookups now include `purpose:` and `context:` metadata so credential grants are auditable.
|
|
26
|
+
- Fleet request expiration no longer logs warnings for omitted or blank TTL values, and unified identity caller metadata accepts string-keyed `requested_by` hashes.
|
|
27
|
+
|
|
28
|
+
## [0.8.43] - 2026-04-28
|
|
29
|
+
|
|
30
|
+
### Changed
|
|
31
|
+
- RubyLLM is now loaded as an optional compatibility layer instead of a router runtime dependency, and native provider dispatch no longer falls back to RubyLLM by default.
|
|
32
|
+
- Non-pipeline chat, escalation, provider probes, startup defaults, discovery checks, and compatibility API tool builders now degrade cleanly when RubyLLM is unavailable, routing native direct calls where a registered native provider exists and raising `ProviderError` instead of `NameError` for RubyLLM-only paths.
|
|
33
|
+
|
|
34
|
+
## [0.8.42] - 2026-04-28
|
|
35
|
+
|
|
36
|
+
### Fixed
|
|
37
|
+
- LLM provider settings snapshots now initialize their fallback source before deep-copying settings, preventing an uninitialized local fallback when snapshot duplication raises.
|
|
38
|
+
|
|
39
|
+
## [0.8.41] - 2026-04-28
|
|
40
|
+
|
|
41
|
+
### Fixed
|
|
42
|
+
- The router gemspec now depends on the provider-neutral `lex-llm` base instead of installing legacy provider gems (`lex-bedrock`, `lex-claude`, `lex-gemini`, and `lex-openai`) as runtime dependencies.
|
|
43
|
+
|
|
44
|
+
## [0.8.40] - 2026-04-28
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
- Streaming inference now uses native lex-llm provider dispatch when the provider layer selects native mode.
|
|
48
|
+
- Startup discovery now refreshes local system facts independently of Ollama model refresh, and metering publish now loads its transport message class when connected instead of depending on prior boot order.
|
|
49
|
+
|
|
50
|
+
## [0.8.39] - 2026-04-28
|
|
51
|
+
|
|
52
|
+
### Fixed
|
|
53
|
+
- OpenAI-compatible, Anthropic-compatible, and native chat API routes now use the same server-resolved caller identity metadata as native inference, preserving audit and metering identity fields across compatibility routes.
|
|
54
|
+
|
|
55
|
+
## [0.8.38] - 2026-04-28
|
|
56
|
+
|
|
57
|
+
### Fixed
|
|
58
|
+
- `require 'legion/llm'` now loads `legion-settings` when the host has not already loaded `Legion::Settings`, preserving standalone settings defaults and override behavior during LegionIO load-phase initialization.
|
|
59
|
+
|
|
60
|
+
## [0.8.37] - 2026-04-28
|
|
61
|
+
|
|
62
|
+
### Fixed
|
|
63
|
+
- Router resolutions, health tracking, inventory, native dispatch, inference responses, audit, and metering now preserve optional lex-llm model offering metadata while keeping provider/model fallback behavior compatible with older callers.
|
|
64
|
+
- Inventory now consumes lex-llm 0.1.5 `ModelOffering`-style fields from configured settings or native provider adapters when available, exposing offering IDs, model families, canonical aliases, provider instances, and routing metadata without credentials.
|
|
65
|
+
|
|
66
|
+
## [0.8.36] - 2026-04-28
|
|
67
|
+
|
|
68
|
+
### Fixed
|
|
69
|
+
- Fleet transport now publishes requests through the `llm.fleet` exchange, keeps provider/model in fleet message bodies for workers, and publishes handler replies through the mandatory confirmed fleet response message path.
|
|
70
|
+
- Inventory now exposes exact offering lanes and non-secret offering metadata so provider instances can opt into offering-level routing without losing shared fleet lane compatibility.
|
|
71
|
+
|
|
72
|
+
## [0.8.35] - 2026-04-28
|
|
73
|
+
|
|
74
|
+
### Fixed
|
|
75
|
+
- Fleet lane routing now rejects sensitive or oversized public boundary, eligibility, and offering-instance segments before they can enter routing keys.
|
|
76
|
+
- Lex-llm native provider adapters now memoize provider instances and cover streaming, token-count estimation, provider failures, missing namespaces, and non-hash message inputs.
|
|
77
|
+
- Local Ollama/vLLM health probes now distinguish malformed base URL configuration from ordinary unreachable services.
|
|
78
|
+
- Inventory failures now re-raise programmer/config-shape errors instead of silently returning an empty model list.
|
|
79
|
+
- Fleet reply dispatch logging now includes operation tags, JSON parse failures are logged at warning level, and unwired broker return/nack helpers were removed so pending replies rely on the documented timeout path.
|
|
80
|
+
- The vLLM thinking patch now rescues only expected settings-shape errors instead of swallowing all runtime failures.
|
|
81
|
+
|
|
82
|
+
## [0.8.34] - 2026-04-28
|
|
83
|
+
|
|
84
|
+
### Fixed
|
|
85
|
+
- Native lex-llm provider dispatch now preserves injected system instructions when routing through `LexLLMAdapter`.
|
|
86
|
+
- Lex-llm bridge configuration now normalizes OpenAI-compatible `/v1` base URLs for `vllm` and `openai` providers while preserving versioned non-OpenAI-compatible endpoints.
|
|
87
|
+
|
|
88
|
+
## [0.8.33] - 2026-04-27
|
|
89
|
+
|
|
90
|
+
### Fixed
|
|
91
|
+
- `legion-llm` can now bridge loaded `lex-llm-*` provider classes into native dispatch through a `LexLLMAdapter`, allowing the new provider-gem split to participate without duplicating old `lex-*` runner constants.
|
|
92
|
+
- Provider-layer defaults now prefer `auto` dispatch and include the new `ollama`, `vllm`, `anthropic`, `openai`, `gemini`, and `mlx` native provider names.
|
|
93
|
+
- Inventory now recognizes `mlx` as a local HTTP provider.
|
|
94
|
+
- Fleet dispatch now registers reply futures before publishing requests, consumes structured publish results, fails fast on unroutable/nacked/confirm-timeout publishes, and validates reply metadata before fulfilling pending requests.
|
|
95
|
+
- `FleetRequest` now opts out of live-request spooling, requires mandatory publish and publisher confirms by default, and includes reply routing fields in the worker payload.
|
|
96
|
+
- `FleetResponse` and `FleetError` now publish live replies with mandatory routing, publisher confirms, and no spool/replay.
|
|
97
|
+
- LLM transport message IDs are memoized per message instance so AMQP return/confirm correlation sees the same `message_id` that was published.
|
|
98
|
+
- Embedding provider/model resolution, provider disable gates, prefix injection, fallback chains, Azure settings, Ollama base URLs, and metering caller context now honor JSON/string-keyed settings in addition to symbol-keyed runtime settings.
|
|
99
|
+
- Discovery for Ollama, vLLM, and embedding fallback chains now honors JSON/string-keyed provider, embedding, base URL, model metadata, and refresh TTL settings.
|
|
100
|
+
- Inference executor routing defaults, conversation compaction, pipeline escalation, native dispatch, async post-step, telemetry span, tool-loop, and fallback-provider settings now honor JSON/string-keyed settings.
|
|
101
|
+
- Module-level inference, prompt dispatch, prompt-cache, debate, and skill-injector settings now honor JSON/string-keyed settings.
|
|
102
|
+
- Sticky tool history, trigger matching, and RAG context settings now honor JSON/string-keyed settings.
|
|
103
|
+
- Shared settings helpers now normalize string and symbol keys across router, fleet, scheduling, response cache, API auth/defaults, metering, quality, guards, skills, discovery, inventory, daemon, config loaders, and audit checks.
|
|
104
|
+
- Shared settings helpers now register defaults through `Legion::Settings.merge_settings(:llm, ...)` and read directly from the canonical `Legion::Settings[:llm]` store so JSON-loaded settings files and runtime overrides remain authoritative.
|
|
105
|
+
- LLM cache, response-cache, and tool-confidence paths now prefer connected local cache backends while preserving shared cache fallback behavior.
|
|
106
|
+
- Boot, compatibility, Bedrock embedding, transport-connected, identity, RBAC, and API helper paths now use shared LLM settings/logging helpers instead of direct `Legion::Settings`, `Legion::Logging`, and `Legion::Cache` calls.
|
|
107
|
+
- LLM transport messages now promote tracing metadata into W3C `traceparent`, `baggage`, and Legion trace headers for fleet/audit/metering correlation.
|
|
108
|
+
- Fleet dispatch replies now avoid request-side metadata gates by default and expose both `model` and `model_id` so downstream metering and metadata readers can resolve the model consistently.
|
|
109
|
+
- Fleet lane sanitization and vLLM health URL normalization now avoid regex patterns flagged by CodeQL for uncontrolled input.
|
|
110
|
+
- The lex-llm provider bridge now loads only the Legion-native `Legion::Extensions::Llm` namespace and no longer probes removed fork-era entrypoints.
|
|
111
|
+
|
|
112
|
+
## [0.8.32] - 2026-04-27
|
|
113
|
+
|
|
114
|
+
### Fixed
|
|
115
|
+
- Embedding calls now return a clear unavailable-provider error when no embedding provider is configured or detected, preventing RubyLLM from implicitly selecting a chat/default provider.
|
|
116
|
+
|
|
117
|
+
## [0.8.31] - 2026-04-27
|
|
118
|
+
|
|
119
|
+
### Fixed
|
|
120
|
+
- Embedding calls no longer inherit the chat `llm.default_provider`, preventing vLLM or other chat defaults from receiving embedding traffic unless explicitly configured for embeddings. Fixes #104
|
|
121
|
+
|
|
3
122
|
## [0.8.30] - 2026-04-27
|
|
4
123
|
|
|
5
124
|
### Fixed
|
|
@@ -27,7 +146,7 @@
|
|
|
27
146
|
## [0.8.28] - 2026-04-24
|
|
28
147
|
|
|
29
148
|
### Fixed
|
|
30
|
-
- Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The
|
|
149
|
+
- Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The routing paths blindly paired it with `default_provider` (typically `bedrock`), causing provider model lookup failures. Now infers the correct provider from model naming patterns before using the global default.
|
|
31
150
|
- `arbitrage_fallback` hardcoded `:cloud` tier and `:bedrock` provider when inference failed. Now uses `PROVIDER_TIER` to resolve the correct tier for the inferred provider.
|
|
32
151
|
|
|
33
152
|
### Added
|
|
@@ -72,7 +191,7 @@
|
|
|
72
191
|
## [0.8.23] - 2026-04-23
|
|
73
192
|
|
|
74
193
|
### Fixed
|
|
75
|
-
- `Call::StructuredOutput`
|
|
194
|
+
- `Call::StructuredOutput` parse-retry path passed `messages:` (plural) to `chat_single` which only accepts `message:` (singular), leaking the unknown kwarg into the provider chat call. Visible as repeated "unknown keyword: :messages" warnings during dream cycle contradiction detection. Flattened instruction + messages into a single string via `extract_user_content`.
|
|
76
195
|
|
|
77
196
|
## [0.8.22] - 2026-04-22
|
|
78
197
|
|
|
@@ -122,7 +241,7 @@
|
|
|
122
241
|
## [0.8.16] - 2026-04-22
|
|
123
242
|
|
|
124
243
|
### Fixed
|
|
125
|
-
-
|
|
244
|
+
- Provider bad-request and context-length errors now trigger the provider retry chain instead of bubbling up as unhandled 500s. Both `run_provider_call_single` and `step_provider_call_stream` retry on the next available provider before giving up.
|
|
126
245
|
- Resolved provider/model is now logged (`log.info`) in `step_routing` so provider errors can be diagnosed from daemon logs without relying on SSE done events.
|
|
127
246
|
|
|
128
247
|
### Changed
|
|
@@ -438,7 +557,7 @@
|
|
|
438
557
|
- `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
|
|
439
558
|
|
|
440
559
|
### Changed
|
|
441
|
-
-
|
|
560
|
+
- Tool-loop round cap raised from `25` to `200` for the configurable `max_tool_rounds` setting.
|
|
442
561
|
|
|
443
562
|
### Fixed
|
|
444
563
|
- `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
|
|
@@ -483,7 +602,7 @@
|
|
|
483
602
|
### Added
|
|
484
603
|
- Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
|
|
485
604
|
- Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
|
|
486
|
-
-
|
|
605
|
+
- Tool-loop round cap (25) to prevent infinite cycling
|
|
487
606
|
- `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
|
|
488
607
|
|
|
489
608
|
### Fixed
|
|
@@ -629,9 +748,9 @@
|
|
|
629
748
|
- `Legion::LLM::ProviderRegistry` — thread-safe registry for native lex-* provider extensions: `register(name, ext)`, `for(name)`, `available`, `registered?(name)`, `reset!`; cleared automatically on `Legion::LLM.shutdown` (closes #37)
|
|
630
749
|
- `Legion::LLM::NativeDispatch` — native provider dispatch layer: `dispatch_chat`, `dispatch_embed`, `dispatch_stream`, `dispatch_count_tokens` route calls to registered lex-* extension modules and return standardized `{ result:, usage: Usage }` hashes; raises `ProviderError` when provider is not registered (closes #37)
|
|
631
750
|
- `Legion::LLM::NativeResponseAdapter` — adapter wrapping native dispatch result hash to expose the same `.content`, `.input_tokens`, `.output_tokens`, `.usage` interface as a RubyLLM response object (closes #37)
|
|
632
|
-
- `provider_layer` settings section: `mode` (`'
|
|
751
|
+
- `provider_layer` settings section: `mode` (`'native'` / `'auto'`) and `native_providers` (default `['claude', 'bedrock']`) for native provider dispatch (closes #37)
|
|
633
752
|
- Auto-registration in `Legion::LLM.start`: detects loaded lex-* extensions via `Object.const_defined?` and registers them — `lex-claude` → `:claude`/`:anthropic`, `lex-bedrock` → `:bedrock`, `lex-openai` → `:openai`, `lex-gemini` → `:gemini`; no hard dependencies added (closes #37)
|
|
634
|
-
- `Pipeline::Executor` provider layer integration: `use_native_dispatch?` checks `provider_layer.mode`; `execute_provider_request_native` calls `NativeDispatch.dispatch_chat` and wraps result in `NativeResponseAdapter
|
|
753
|
+
- `Pipeline::Executor` provider layer integration: `use_native_dispatch?` checks `provider_layer.mode`; `execute_provider_request_native` calls `NativeDispatch.dispatch_chat` and wraps result in `NativeResponseAdapter` (closes #37)
|
|
635
754
|
- Optional adversarial debate pipeline step for high-stakes decisions (closes #28): `Pipeline::Steps::Debate` runs a multi-round advocate/challenger/judge debate after `provider_call`; the initial response is the advocate, a challenger model critiques it, the advocate rebuts, and a judge model synthesizes all sides into the final response; activation via `debate: true` in `chat()` kwargs, or `Legion::Settings[:llm][:debate][:enabled]`, or GAIA auto-trigger when `gaia_auto_trigger: true` and `high_stakes`/`debate_recommended` are set in the advisory enrichment; debate is disabled by default; GAIA auto-trigger defaults to false in v0.6.0; different models are required for each role (advocate, challenger, judge) to avoid training bias — model rotation picks from enabled providers automatically when not explicitly configured; model strings use `provider:model` format; all LLM calls use `chat_direct` to avoid pipeline recursion; configurable via `debate.default_rounds` (default 1), `debate.max_rounds` (cap, default 3), `debate.advocate_model`, `debate.challenger_model`, `debate.judge_model`, `debate.model_selection_strategy` (default `'rotate'`); debate metadata (`enabled`, `rounds`, `advocate_model`, `challenger_model`, `judge_model`, `advocate_summary`, `challenger_summary`, `judge_confidence`) stored in `enrichments['debate:result']`; gracefully degrades to single-model mode with a warning when fewer than 2 models are available
|
|
636
755
|
- Async context curation (`Legion::LLM::ContextCurator`): keeps LLM context lean without compaction (closes #38). Heuristic curation runs async in `Thread.new` after each `step_context_store` — zero latency impact. Curated messages are used in `step_context_load` when available, falling back to raw history. Heuristic pipeline: `strip_thinking` removes `<thinking>` blocks; `distill_tool_result` summarizes large tool outputs by tool type (`read_file` → line count + first/last, `search`/`grep` → match counts, `bash` → exit code + last lines, default → char count + preview); `fold_resolved_exchanges` detects multi-turn clarification reaching agreement and folds to a system note; `evict_superseded` keeps only the latest read of each file path; `dedup_similar` removes near-duplicate messages via Jaccard similarity (delegates to `Compressor.deduplicate_messages`). LLM-assisted mode is built but off by default (`llm_assisted: false`); when enabled with `mode: 'llm_assisted'`, a configurable small/fast model produces better summaries with automatic fallback to heuristic on any error. All behavior gated by `Legion::Settings[:llm][:context_curation]`: `enabled` (default `true`), `mode` (`'heuristic'`), `llm_assisted` (`false`), `llm_model` (`nil`), `tool_result_max_chars` (2000), `thinking_eviction` (`true`), `exchange_folding` (`true`), `superseded_eviction` (`true`), `dedup_enabled` (`true`), `dedup_threshold` (0.85), `target_context_tokens` (40000).
|
|
637
756
|
- Message chain architecture with parent links and sidechain support in `ConversationStore` (closes #39): every message now carries `id` (UUID), `parent_id`, `sidechain` (default `false`), `message_group_id`, and `agent_id` fields; `build_chain(conversation_id, include_sidechains: false)` reconstructs ordered message history from parent links with rooted-leaf selection, parallel sibling recovery via `message_group_id`, and orphan appending; `sidechain_messages(conversation_id, agent_id: nil)` queries background/subagent messages with optional agent filter; `branch(conversation_id, from_message_id:)` creates a new conversation by copying history up to the given message; `store_metadata` / `read_metadata` provide tail-window session metadata storage; `migrate_parent_links!` backfills parent links on pre-migration sequential data; `messages()` backward-compatible flat array uses chain reconstruction when parent links are present, seq ordering otherwise; DB persistence adds `message_id`, `parent_id`, `sidechain`, `message_group_id`, `agent_id` columns when present (graceful degradation without migration)
|
|
@@ -1178,7 +1297,7 @@
|
|
|
1178
1297
|
### Added
|
|
1179
1298
|
- `ResponseCache` module for async response delivery via memcached with spool overflow at 8MB
|
|
1180
1299
|
- `DaemonClient` module for HTTP routing to LegionIO daemon with health caching (30s TTL)
|
|
1181
|
-
- `Legion::LLM.ask` one-shot method: daemon-first routing with direct
|
|
1300
|
+
- `Legion::LLM.ask` one-shot method: daemon-first routing with direct provider execution
|
|
1182
1301
|
- `DaemonDeniedError` and `DaemonRateLimitedError` error classes
|
|
1183
1302
|
- Daemon settings: `daemon.url` and `daemon.enabled` in defaults
|
|
1184
1303
|
- HTTP status code contract: 200 (cached), 201 (sync), 202 (async poll), 403, 429, 503
|
data/CLAUDE.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
## Purpose
|
|
7
7
|
|
|
8
|
-
Core LegionIO gem providing LLM capabilities to all extensions
|
|
8
|
+
Core LegionIO gem providing LLM capabilities to all extensions through Legion-native provider dispatch. Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
11
|
**Version**: 0.8.0
|
|
@@ -37,8 +37,6 @@ Legion::LLM.start
|
|
|
37
37
|
|
|
38
38
|
```
|
|
39
39
|
Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inference, Call, Discovery
|
|
40
|
-
├── Patches # Monkey-patches for upstream gems
|
|
41
|
-
│ └── RubyLLMParallelTools # Parallel tool execution patch for RubyLLM
|
|
42
40
|
├── Errors # Typed error hierarchy (LLMError base + subtypes, retryable?)
|
|
43
41
|
│ └── EscalationExhausted / DaemonDeniedError / DaemonRateLimitedError / AuthError /
|
|
44
42
|
│ RateLimitError / ContextOverflow / ProviderError / ProviderDown /
|
|
@@ -57,7 +55,6 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
|
|
|
57
55
|
│ ├── Embeddings # generate, generate_batch, default_model, fallback chain
|
|
58
56
|
│ ├── StructuredOutput # JSON schema enforcement with native response_format and prompt fallback
|
|
59
57
|
│ ├── DaemonClient # HTTP routing to LegionIO daemon with 30s health cache
|
|
60
|
-
│ ├── BedrockAuth # Monkey-patch for Bedrock Bearer Token auth (required lazily)
|
|
61
58
|
│ ├── ClaudeConfigLoader # Import Claude CLI config from ~/.claude/settings.json
|
|
62
59
|
│ └── CodexConfigLoader # Import OpenAI bearer token from ~/.codex/auth.json
|
|
63
60
|
├── Context # Prompt and conversation context management
|
|
@@ -86,12 +83,10 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
|
|
|
86
83
|
│ ├── Executor # 18-step skeleton with profile-aware execution and call_stream
|
|
87
84
|
│ ├── Conversation # In-memory LRU (256 slots) + optional Sequel DB persistence (was ConversationStore)
|
|
88
85
|
│ ├── Prompt # Clean dispatch API: dispatch, request, summarize, extract, decide
|
|
89
|
-
│ ├──
|
|
90
|
-
│ ├── ToolDispatcher # Routes tool calls: MCP client / LEX runner / RubyLLM builtin
|
|
86
|
+
│ ├── ToolDispatcher # Routes tool calls: MCP client / LEX runner / native execution
|
|
91
87
|
│ ├── AuditPublisher # Publishes audit events to llm.audit exchange
|
|
92
88
|
│ ├── EnrichmentInjector # Converts RAG/GAIA enrichments into system prompt
|
|
93
89
|
│ ├── GaiaCaller # Gaia-specific chat dispatch with phase/tick tracing
|
|
94
|
-
│ ├── McpToolAdapter # Backward-compat alias for ToolAdapter
|
|
95
90
|
│ └── Steps/ # All 18+ pipeline step modules
|
|
96
91
|
│ ├── Metering, Billing, TokenBudget, PromptCache, Classification, Rbac
|
|
97
92
|
│ ├── GaiaAdvisory, TierAssigner, TriggerMatch, ToolDiscovery, McpDiscovery, RagContext
|
|
@@ -148,9 +143,8 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
|
|
|
148
143
|
│ └── OffPeak # Peak-hour deferral (delegates to Scheduling)
|
|
149
144
|
├── Tools # Tool call layer
|
|
150
145
|
│ ├── Confidence # 4-tier degrading confidence storage (was OverrideConfidence)
|
|
151
|
-
│ ├── Dispatcher # Routes tool calls to MCP/LEX/
|
|
146
|
+
│ ├── Dispatcher # Routes tool calls to MCP/LEX/native execution
|
|
152
147
|
│ ├── Interceptor # Extensible pre-dispatch intercept registry
|
|
153
|
-
│ ├── Adapter # Wraps lex-* extension tool as RubyLLM::Tool
|
|
154
148
|
│ └── Interceptors/
|
|
155
149
|
│ └── PythonVenv # Redirects python3/pip3 tool calls to isolated venv
|
|
156
150
|
├── Hooks # before/after chat interceptor registry
|
|
@@ -286,7 +280,6 @@ All compatibility routes normalize requests through `API::Translators` (OpenAIRe
|
|
|
286
280
|
|
|
287
281
|
| Gem | Purpose |
|
|
288
282
|
|-----|---------|
|
|
289
|
-
| `ruby_llm` (>= 1.0) | Multi-provider LLM client |
|
|
290
283
|
| `tzinfo` (>= 2.0) | IANA timezone conversion for schedule windows |
|
|
291
284
|
| `legion-logging` | Logging |
|
|
292
285
|
| `legion-settings` | Configuration |
|
|
@@ -377,7 +370,7 @@ Vault credential resolution: When `vault_path` is set and Legion::Crypt::Vault i
|
|
|
377
370
|
|
|
378
371
|
Bedrock supports two auth modes:
|
|
379
372
|
- **SigV4** (default): `api_key` + `secret_key` (+ optional `session_token`)
|
|
380
|
-
- **Bearer token**: `bearer_token` for AWS Identity Center/SSO.
|
|
373
|
+
- **Bearer token**: `bearer_token` for AWS Identity Center/SSO. Native Bedrock providers consume it through lex-llm configuration.
|
|
381
374
|
|
|
382
375
|
### Auto-Detection Priority
|
|
383
376
|
|
|
@@ -485,7 +478,6 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
485
478
|
| Path | Purpose |
|
|
486
479
|
|------|---------|
|
|
487
480
|
| `lib/legion/llm.rb` | Thin facade: start, shutdown, delegates to Inference/Call/Discovery |
|
|
488
|
-
| `lib/legion/llm/patches/ruby_llm_parallel_tools.rb` | Monkey-patch for RubyLLM parallel tool execution |
|
|
489
481
|
| `lib/legion/llm/compat.rb` | Backward-compat aliases via const_missing with deprecation warnings |
|
|
490
482
|
| `lib/legion/llm/errors.rb` | Typed error hierarchy: LLMError base + all subtypes, retryable? predicate |
|
|
491
483
|
| `lib/legion/llm/version.rb` | Version constant |
|
|
@@ -503,7 +495,6 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
503
495
|
| `lib/legion/llm/call/embeddings.rb` | generate, generate_batch, fallback chain, dimension enforcement |
|
|
504
496
|
| `lib/legion/llm/call/structured_output.rb` | JSON schema enforcement with native response_format and prompt fallback |
|
|
505
497
|
| `lib/legion/llm/call/daemon_client.rb` | HTTP routing to LegionIO daemon with 30s health cache |
|
|
506
|
-
| `lib/legion/llm/call/bedrock_auth.rb` | Monkey-patch for Bedrock Bearer Token auth — required lazily |
|
|
507
498
|
| `lib/legion/llm/call/claude_config_loader.rb` | Import Claude CLI config from ~/.claude/settings.json |
|
|
508
499
|
| `lib/legion/llm/call/codex_config_loader.rb` | Import OpenAI bearer token from ~/.codex/auth.json |
|
|
509
500
|
| `lib/legion/llm/context.rb` | Context entry point |
|
|
@@ -524,19 +515,17 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
524
515
|
| `lib/legion/llm/metering/tokens.rb` | Thread-safe per-session token budget accumulator (was TokenTracker) |
|
|
525
516
|
| `lib/legion/llm/inference.rb` | Inference entry point: requires all pipeline components |
|
|
526
517
|
| `lib/legion/llm/inference/request.rb` | Inference::Request Data.define struct with .build and .from_chat_args |
|
|
527
|
-
| `lib/legion/llm/inference/response.rb` | Inference::Response Data.define struct with .build, .
|
|
518
|
+
| `lib/legion/llm/inference/response.rb` | Inference::Response Data.define struct with .build, .from_provider_message, #with |
|
|
528
519
|
| `lib/legion/llm/inference/profile.rb` | Inference::Profile: caller-derived profiles for step skipping |
|
|
529
520
|
| `lib/legion/llm/inference/tracing.rb` | Inference::Tracing: trace_id, span_id, exchange_id generation |
|
|
530
521
|
| `lib/legion/llm/inference/timeline.rb` | Inference::Timeline: ordered event recording with participant tracking |
|
|
531
522
|
| `lib/legion/llm/inference/executor.rb` | Inference::Executor: 18-step skeleton with profile-aware execution and call_stream |
|
|
532
523
|
| `lib/legion/llm/inference/conversation.rb` | In-memory LRU (256 slots) + optional Sequel DB persistence (was ConversationStore) |
|
|
533
524
|
| `lib/legion/llm/inference/prompt.rb` | Prompt dispatch API: dispatch, request, summarize, extract, decide |
|
|
534
|
-
| `lib/legion/llm/inference/
|
|
535
|
-
| `lib/legion/llm/inference/tool_dispatcher.rb` | Routes tool calls to MCP client / LEX runner / RubyLLM builtin |
|
|
525
|
+
| `lib/legion/llm/inference/tool_dispatcher.rb` | Routes tool calls to MCP client / LEX runner / native execution |
|
|
536
526
|
| `lib/legion/llm/inference/audit_publisher.rb` | Publishes audit events to llm.audit exchange |
|
|
537
527
|
| `lib/legion/llm/inference/enrichment_injector.rb` | Converts RAG/GAIA enrichments into system prompt |
|
|
538
528
|
| `lib/legion/llm/inference/gaia_caller.rb` | Gaia-specific chat dispatch with phase/tick tracing |
|
|
539
|
-
| `lib/legion/llm/inference/mcp_tool_adapter.rb` | Backward-compat alias for ToolAdapter |
|
|
540
529
|
| `lib/legion/llm/inference/steps.rb` | Steps aggregator: requires all step modules |
|
|
541
530
|
| `lib/legion/llm/inference/steps/*.rb` | All 18+ pipeline step modules (metering, billing, rbac, classification, etc.) |
|
|
542
531
|
| `lib/legion/llm/router.rb` | Router: resolve, health_tracker, resolve_chain, select_candidates |
|
|
@@ -586,9 +575,8 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
586
575
|
| `lib/legion/llm/scheduling/batch.rb` | Non-urgent request batching with priority queue and auto-flush |
|
|
587
576
|
| `lib/legion/llm/scheduling/off_peak.rb` | Peak-hour deferral (delegates to Scheduling) |
|
|
588
577
|
| `lib/legion/llm/tools/confidence.rb` | 4-tier degrading confidence storage (was OverrideConfidence) |
|
|
589
|
-
| `lib/legion/llm/tools/dispatcher.rb` | Routes tool calls: MCP client / LEX runner /
|
|
578
|
+
| `lib/legion/llm/tools/dispatcher.rb` | Routes tool calls: MCP client / LEX runner / native execution |
|
|
590
579
|
| `lib/legion/llm/tools/interceptor.rb` | Extensible pre-dispatch intercept registry |
|
|
591
|
-
| `lib/legion/llm/tools/adapter.rb` | Wraps lex-* extension tool as RubyLLM::Tool (McpToolAdapter kept as alias) |
|
|
592
580
|
| `lib/legion/llm/tools/interceptors/python_venv.rb` | Redirects python3/pip3 tool calls to isolated venv |
|
|
593
581
|
| `lib/legion/llm/hooks.rb` | Hooks: before/after chat registry, run_before, run_after, install_defaults |
|
|
594
582
|
| `lib/legion/llm/hooks/rag_guard.rb` | Post-generation RAG faithfulness check via lex-eval |
|
|
@@ -643,7 +631,7 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
643
631
|
| `spec/legion/llm/gateway_integration_spec.rb` | Tests: gateway teardown — verifies no delegation |
|
|
644
632
|
| `spec/legion/llm/metering/estimator_spec.rb` | Tests: cost estimation, fuzzy matching, pricing table (was cost_estimator_spec.rb) |
|
|
645
633
|
| `spec/legion/llm/inference/request_spec.rb` | Tests: Request struct builder, legacy adapter |
|
|
646
|
-
| `spec/legion/llm/inference/response_spec.rb` | Tests: Response struct builder,
|
|
634
|
+
| `spec/legion/llm/inference/response_spec.rb` | Tests: Response struct builder, provider message adapter, #with |
|
|
647
635
|
| `spec/legion/llm/inference/profile_spec.rb` | Tests: Profile derivation and step skipping |
|
|
648
636
|
| `spec/legion/llm/inference/tracing_spec.rb` | Tests: Tracing init, exchange_id generation |
|
|
649
637
|
| `spec/legion/llm/inference/timeline_spec.rb` | Tests: Timeline event recording, participants |
|
data/Gemfile
CHANGED
|
@@ -5,6 +5,28 @@ source 'https://rubygems.org'
|
|
|
5
5
|
gemspec
|
|
6
6
|
|
|
7
7
|
group :test do
|
|
8
|
+
lex_llm_path = File.expand_path('../extensions-ai/lex-llm', __dir__)
|
|
9
|
+
if Dir.exist?(lex_llm_path)
|
|
10
|
+
gem 'lex-llm', path: lex_llm_path
|
|
11
|
+
else
|
|
12
|
+
gem 'lex-llm'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
%w[
|
|
16
|
+
lex-llm-ollama
|
|
17
|
+
lex-llm-vllm
|
|
18
|
+
lex-llm-anthropic
|
|
19
|
+
lex-llm-openai
|
|
20
|
+
lex-llm-gemini
|
|
21
|
+
lex-llm-mlx
|
|
22
|
+
lex-llm-bedrock
|
|
23
|
+
lex-llm-azure-foundry
|
|
24
|
+
lex-llm-vertex
|
|
25
|
+
].each do |provider_gem|
|
|
26
|
+
provider_path = File.expand_path("../extensions-ai/#{provider_gem}", __dir__)
|
|
27
|
+
gem provider_gem, path: provider_path if Dir.exist?(provider_path)
|
|
28
|
+
end
|
|
29
|
+
|
|
8
30
|
gem 'rake'
|
|
9
31
|
gem 'rspec'
|
|
10
32
|
gem 'rspec_junit_formatter'
|