legion-llm 0.8.32 → 0.8.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -0
  3. data/CHANGELOG.md +135 -8
  4. data/CLAUDE.md +27 -36
  5. data/Gemfile +25 -0
  6. data/README.md +125 -135
  7. data/legion-llm.gemspec +6 -9
  8. data/lib/legion/llm/api/anthropic/messages.rb +8 -10
  9. data/lib/legion/llm/api/auth.rb +2 -2
  10. data/lib/legion/llm/api/native/chat.rb +6 -2
  11. data/lib/legion/llm/api/native/helpers.rb +57 -77
  12. data/lib/legion/llm/api/native/inference.rb +3 -9
  13. data/lib/legion/llm/api/native/instances.rb +87 -0
  14. data/lib/legion/llm/api/native/models.rb +117 -0
  15. data/lib/legion/llm/api/native/offerings.rb +75 -0
  16. data/lib/legion/llm/api/native/providers.rb +57 -43
  17. data/lib/legion/llm/api/native/routing.rb +55 -0
  18. data/lib/legion/llm/api/openai/chat_completions.rb +13 -12
  19. data/lib/legion/llm/api/openai/embeddings.rb +1 -1
  20. data/lib/legion/llm/api/openai/models.rb +6 -40
  21. data/lib/legion/llm/api.rb +8 -0
  22. data/lib/legion/llm/audit.rb +1 -2
  23. data/lib/legion/llm/cache/response.rb +48 -23
  24. data/lib/legion/llm/cache.rb +47 -22
  25. data/lib/legion/llm/call/daemon_client.rb +6 -6
  26. data/lib/legion/llm/call/dispatch.rb +181 -58
  27. data/lib/legion/llm/call/embeddings.rb +112 -438
  28. data/lib/legion/llm/call/lex_llm_adapter.rb +230 -0
  29. data/lib/legion/llm/call/providers.rb +24 -500
  30. data/lib/legion/llm/call/registry.rb +94 -10
  31. data/lib/legion/llm/call/structured_output.rb +4 -4
  32. data/lib/legion/llm/call.rb +0 -3
  33. data/lib/legion/llm/compat.rb +8 -11
  34. data/lib/legion/llm/config.rb +18 -12
  35. data/lib/legion/llm/context/compressor.rb +5 -1
  36. data/lib/legion/llm/context/curator.rb +8 -5
  37. data/lib/legion/llm/discovery/memory_gate.rb +53 -0
  38. data/lib/legion/llm/discovery/rule_generator.rb +147 -0
  39. data/lib/legion/llm/discovery/system.rb +1 -1
  40. data/lib/legion/llm/discovery.rb +227 -46
  41. data/lib/legion/llm/fleet/dispatcher.rb +129 -33
  42. data/lib/legion/llm/fleet/handler.rb +54 -21
  43. data/lib/legion/llm/fleet/lane.rb +132 -0
  44. data/lib/legion/llm/fleet/reply_dispatcher.rb +51 -39
  45. data/lib/legion/llm/fleet.rb +1 -0
  46. data/lib/legion/llm/helper.rb +3 -9
  47. data/lib/legion/llm/hooks/budget_guard.rb +1 -3
  48. data/lib/legion/llm/hooks/rag_guard.rb +2 -4
  49. data/lib/legion/llm/hooks/reflection.rb +1 -2
  50. data/lib/legion/llm/hooks.rb +2 -2
  51. data/lib/legion/llm/inference/audit_publisher.rb +36 -12
  52. data/lib/legion/llm/inference/conversation.rb +3 -2
  53. data/lib/legion/llm/inference/enrichment_injector.rb +1 -3
  54. data/lib/legion/llm/inference/executor.rb +474 -255
  55. data/lib/legion/llm/inference/prompt.rb +13 -2
  56. data/lib/legion/llm/inference/response.rb +1 -1
  57. data/lib/legion/llm/inference/steps/classification.rb +68 -26
  58. data/lib/legion/llm/inference/steps/debate.rb +41 -23
  59. data/lib/legion/llm/inference/steps/gaia_advisory.rb +2 -2
  60. data/lib/legion/llm/inference/steps/metering.rb +9 -7
  61. data/lib/legion/llm/inference/steps/prompt_cache.rb +15 -12
  62. data/lib/legion/llm/inference/steps/rag_context.rb +30 -21
  63. data/lib/legion/llm/inference/steps/rbac.rb +1 -1
  64. data/lib/legion/llm/inference/steps/skill_injector.rb +16 -6
  65. data/lib/legion/llm/inference/steps/sticky_helpers.rb +21 -6
  66. data/lib/legion/llm/inference/steps/sticky_persist.rb +29 -8
  67. data/lib/legion/llm/inference/steps/sticky_runners.rb +6 -6
  68. data/lib/legion/llm/inference/steps/tier_assigner.rb +28 -9
  69. data/lib/legion/llm/inference/steps/tool_calls.rb +16 -3
  70. data/lib/legion/llm/inference/steps/tool_discovery.rb +27 -13
  71. data/lib/legion/llm/inference/steps/tool_history.rb +5 -1
  72. data/lib/legion/llm/inference/steps/trigger_match.rb +15 -5
  73. data/lib/legion/llm/inference.rb +139 -108
  74. data/lib/legion/llm/inventory.rb +403 -0
  75. data/lib/legion/llm/metering/tokens.rb +2 -6
  76. data/lib/legion/llm/metering/tracker.rb +10 -3
  77. data/lib/legion/llm/metering.rb +31 -9
  78. data/lib/legion/llm/quality/checker.rb +2 -2
  79. data/lib/legion/llm/quality/confidence/scorer.rb +6 -11
  80. data/lib/legion/llm/quality/shadow_eval.rb +7 -3
  81. data/lib/legion/llm/router/arbitrage.rb +1 -4
  82. data/lib/legion/llm/router/gateway_interceptor.rb +4 -5
  83. data/lib/legion/llm/router/health_tracker.rb +154 -46
  84. data/lib/legion/llm/router/resolution.rb +33 -10
  85. data/lib/legion/llm/router/rule.rb +9 -5
  86. data/lib/legion/llm/router.rb +210 -70
  87. data/lib/legion/llm/scheduling/batch.rb +1 -4
  88. data/lib/legion/llm/scheduling.rb +3 -6
  89. data/lib/legion/llm/settings.rb +121 -53
  90. data/lib/legion/llm/skills/base.rb +4 -1
  91. data/lib/legion/llm/skills/external_discovery.rb +2 -2
  92. data/lib/legion/llm/skills.rb +1 -1
  93. data/lib/legion/llm/tools/confidence.rb +44 -10
  94. data/lib/legion/llm/tools/dispatcher.rb +80 -39
  95. data/lib/legion/llm/tools.rb +0 -1
  96. data/lib/legion/llm/transport/exchanges/fleet.rb +1 -1
  97. data/lib/legion/llm/transport/message.rb +83 -9
  98. data/lib/legion/llm/transport/messages/fleet_error.rb +25 -16
  99. data/lib/legion/llm/transport/messages/fleet_request.rb +45 -1
  100. data/lib/legion/llm/transport/messages/fleet_response.rb +22 -15
  101. data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
  102. data/lib/legion/llm/transport/messages/skill_event.rb +1 -1
  103. data/lib/legion/llm/transport/messages/tool_event.rb +1 -1
  104. data/lib/legion/llm/types/tool_definition.rb +71 -0
  105. data/lib/legion/llm/types.rb +1 -0
  106. data/lib/legion/llm/version.rb +1 -1
  107. data/lib/legion/llm.rb +21 -18
  108. metadata +30 -74
  109. data/lib/legion/llm/bedrock_bearer_auth.rb +0 -4
  110. data/lib/legion/llm/call/bedrock_auth.rb +0 -53
  111. data/lib/legion/llm/call/bedrock_embeddings.rb +0 -270
  112. data/lib/legion/llm/call/claude_config_loader.rb +0 -172
  113. data/lib/legion/llm/call/codex_config_loader.rb +0 -132
  114. data/lib/legion/llm/discovery/ollama.rb +0 -96
  115. data/lib/legion/llm/discovery/vllm.rb +0 -114
  116. data/lib/legion/llm/inference/mcp_tool_adapter.rb +0 -5
  117. data/lib/legion/llm/inference/tool_adapter.rb +0 -13
  118. data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb +0 -102
  119. data/lib/legion/llm/patches/ruby_llm_vllm.rb +0 -78
  120. data/lib/legion/llm/tools/adapter.rb +0 -101
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f73cca457b602ab79cd3964a15333afe338a7ed5ef18a556f0e502da7817b4ef
4
- data.tar.gz: 706498c91924640dc31bf43e814a04f2880acb2930cf30abdf144c28cb02fd01
3
+ metadata.gz: 88524bf330ed22e6ac366d35ddccdbad4557b115c447d87d238ab2670ca8da7e
4
+ data.tar.gz: 68c0470f4c10ee885bd28d0486f487777ae3c9dc35ab0fea3cc4ab860c68a0c6
5
5
  SHA512:
6
- metadata.gz: 3b499fde4636085676157fdad1576ba60ed0149f2e9d126289b38399b4232ce64ed47e66250c7bf0dd01c87e15afff675c0d3f0ddc5d6d152ec17640af615fd2
7
- data.tar.gz: 5b26a4426f630a3e36dedd6bdeacae7b6a2283696f4a9771d5458bdd2c8735380984ee983a10c9483ea2cb373eaa5a4e54c0cc92cac5d1a0b04f5835a7920850
6
+ metadata.gz: ba9ad2293c9e65db838aca3921ba291ef3bfd71798fc044bfdcdb90f3a04e0536fa5c562f00ea3e06eda88c41ee1659d23ef46382c69424940b4ff058f5cd978
7
+ data.tar.gz: c7feaf54a2a618eb5d45741269ba65d855bb478142a3b90702d29de9d23f3bbceadca26b5eb765bedf6c2476b97901dc59cc597242051d9500df9733174532f4
data/.gitignore CHANGED
@@ -1,6 +1,7 @@
1
1
  /.bundle/
2
2
  /.yardoc
3
3
  /Gemfile.lock
4
+ Gemfile.lock
4
5
  *.gem
5
6
  /_yardoc/
6
7
  /coverage/
@@ -20,3 +21,6 @@ legion.log
20
21
  .worktrees/
21
22
  .claude/
22
23
  docs/
24
+ bin/apollo-setup-postreboot.sh
25
+ bin/apollo-setup-prereboot.sh
26
+ legionio-bootstrap-uhg-v3.json
data/CHANGELOG.md CHANGED
@@ -1,5 +1,132 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.49] - 2026-04-29
4
+
5
+ ### Changed
6
+ - `Settings.register_defaults!` now calls `Legion::Settings.register_library` instead of `merge_settings`, using the idempotent legion-settings 1.4.0 API that prevents double-registration.
7
+ - Bumped `legion-settings` dependency floor to `>= 1.4.0`.
8
+ - Test stub `Legion::Settings` now exposes `register_library` matching the real 1.4.0 API.
9
+
10
+ ## [0.8.48] - 2026-04-29
11
+
12
+ ### Added
13
+ - `ToolDefinition.from_registry_entry` builds tool definitions from `Legion::Settings::Extensions` registry entries.
14
+ - `Dispatcher` checks `Settings::Extensions` for tool override resolution; when no matching entry is found it falls back to settings-based MCP overrides (no `Tools::Registry` or `Catalog::Registry` fallback).
15
+ - `Executor#add_registry_tool_definitions` reads from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry` for backward compatibility.
16
+ - `Steps::ToolDiscovery` discovers tools from `Settings::Extensions` when available, falling back to `Legion::Tools::Registry`.
17
+
18
+ ### Changed
19
+ - Bumped `legion-settings` dependency floor to `>= 1.4.0` (requires `Settings::Extensions` module).
20
+
21
+ ## [0.8.47] - 2026-04-29
22
+
23
+ ### Fixed
24
+ - Tool-bearing native inference now forwards native tool definitions and runs a bounded native tool loop through `Call::Dispatch` and `Inference::ToolDispatcher`.
25
+ - OpenAI-compatible, Anthropic-compatible, and native API tool declarations now use provider-neutral native tool definitions.
26
+ - Embedding generation and discovery health checks now route through native `Call::Dispatch.dispatch_embed`.
27
+
28
+ ## [0.8.46] - 2026-04-29
29
+
30
+ ### Fixed
31
+ - Pipeline native dispatch now rejects tool-bearing requests that lack native tool-loop support instead of silently dropping tools.
32
+ - Escalation exhaustion now raises `EscalationExhausted` consistently when RubyLLM is unavailable, preserving the RubyLLM-absent native failure details in the error message.
33
+
34
+ ## [0.8.45] - 2026-04-29
35
+
36
+ ### Added
37
+ - Native LLM API now exposes model-offering inventory at `/api/llm/offerings` and provider-instance inventory at `/api/llm/instances`, matching the routing redesign metadata surface.
38
+
39
+ ## [0.8.44] - 2026-04-28
40
+
41
+ ### Fixed
42
+ - Native LLM API caller metadata now uses the unified `Legion::Identity::Request`/`Process` identity path instead of ad-hoc request waterfalls.
43
+ - Identity broker credential lookups now include `purpose:` and `context:` metadata so credential grants are auditable.
44
+ - Fleet request expiration no longer logs warnings for omitted or blank TTL values, and unified identity caller metadata accepts string-keyed `requested_by` hashes.
45
+
46
+ ## [0.8.43] - 2026-04-28
47
+
48
+ ### Changed
49
+ - RubyLLM is now loaded as an optional compatibility layer instead of a router runtime dependency, and native provider dispatch no longer falls back to RubyLLM by default.
50
+ - Non-pipeline chat, escalation, provider probes, startup defaults, discovery checks, and compatibility API tool builders now degrade cleanly when RubyLLM is unavailable, routing native direct calls where a registered native provider exists and raising `ProviderError` instead of `NameError` for RubyLLM-only paths.
51
+
52
+ ## [0.8.42] - 2026-04-28
53
+
54
+ ### Fixed
55
+ - LLM provider settings snapshots now initialize their fallback source before deep-copying settings, preventing an uninitialized local fallback when snapshot duplication raises.
56
+
57
+ ## [0.8.41] - 2026-04-28
58
+
59
+ ### Fixed
60
+ - The router gemspec now depends on the provider-neutral `lex-llm` base instead of installing legacy provider gems (`lex-bedrock`, `lex-claude`, `lex-gemini`, and `lex-openai`) as runtime dependencies.
61
+
62
+ ## [0.8.40] - 2026-04-28
63
+
64
+ ### Fixed
65
+ - Streaming inference now uses native lex-llm provider dispatch when the provider layer selects native mode.
66
+ - Startup discovery now refreshes local system facts independently of Ollama model refresh, and metering publish now loads its transport message class when connected instead of depending on prior boot order.
67
+
68
+ ## [0.8.39] - 2026-04-28
69
+
70
+ ### Fixed
71
+ - OpenAI-compatible, Anthropic-compatible, and native chat API routes now use the same server-resolved caller identity metadata as native inference, preserving audit and metering identity fields across compatibility routes.
72
+
73
+ ## [0.8.38] - 2026-04-28
74
+
75
+ ### Fixed
76
+ - `require 'legion/llm'` now loads `legion-settings` when the host has not already loaded `Legion::Settings`, preserving standalone settings defaults and override behavior during LegionIO load-phase initialization.
77
+
78
+ ## [0.8.37] - 2026-04-28
79
+
80
+ ### Fixed
81
+ - Router resolutions, health tracking, inventory, native dispatch, inference responses, audit, and metering now preserve optional lex-llm model offering metadata while keeping provider/model fallback behavior compatible with older callers.
82
+ - Inventory now consumes lex-llm 0.1.5 `ModelOffering`-style fields from configured settings or native provider adapters when available, exposing offering IDs, model families, canonical aliases, provider instances, and routing metadata without credentials.
83
+
84
+ ## [0.8.36] - 2026-04-28
85
+
86
+ ### Fixed
87
+ - Fleet transport now publishes requests through the `llm.fleet` exchange, keeps provider/model in fleet message bodies for workers, and publishes handler replies through the mandatory confirmed fleet response message path.
88
+ - Inventory now exposes exact offering lanes and non-secret offering metadata so provider instances can opt into offering-level routing without losing shared fleet lane compatibility.
89
+
90
+ ## [0.8.35] - 2026-04-28
91
+
92
+ ### Fixed
93
+ - Fleet lane routing now rejects sensitive or oversized public boundary, eligibility, and offering-instance segments before they can enter routing keys.
94
+ - Lex-llm native provider adapters now memoize provider instances and cover streaming, token-count estimation, provider failures, missing namespaces, and non-hash message inputs.
95
+ - Local Ollama/vLLM health probes now distinguish malformed base URL configuration from ordinary unreachable services.
96
+ - Inventory failures now re-raise programmer/config-shape errors instead of silently returning an empty model list.
97
+ - Fleet reply dispatch logging now includes operation tags, JSON parse failures are logged at warning level, and unwired broker return/nack helpers were removed so pending replies rely on the documented timeout path.
98
+ - The vLLM thinking patch now rescues only expected settings-shape errors instead of swallowing all runtime failures.
99
+
100
+ ## [0.8.34] - 2026-04-28
101
+
102
+ ### Fixed
103
+ - Native lex-llm provider dispatch now preserves injected system instructions when routing through `LexLLMAdapter`.
104
+ - Lex-llm bridge configuration now normalizes OpenAI-compatible `/v1` base URLs for `vllm` and `openai` providers while preserving versioned non-OpenAI-compatible endpoints.
105
+
106
+ ## [0.8.33] - 2026-04-27
107
+
108
+ ### Fixed
109
+ - `legion-llm` can now bridge loaded `lex-llm-*` provider classes into native dispatch through a `LexLLMAdapter`, allowing the new provider-gem split to participate without duplicating old `lex-*` runner constants.
110
+ - Provider-layer defaults now prefer `auto` dispatch and include the new `ollama`, `vllm`, `anthropic`, `openai`, `gemini`, and `mlx` native provider names.
111
+ - Inventory now recognizes `mlx` as a local HTTP provider.
112
+ - Fleet dispatch now registers reply futures before publishing requests, consumes structured publish results, fails fast on unroutable/nacked/confirm-timeout publishes, and validates reply metadata before fulfilling pending requests.
113
+ - `FleetRequest` now opts out of live-request spooling, requires mandatory publish and publisher confirms by default, and includes reply routing fields in the worker payload.
114
+ - `FleetResponse` and `FleetError` now publish live replies with mandatory routing, publisher confirms, and no spool/replay.
115
+ - LLM transport message IDs are memoized per message instance so AMQP return/confirm correlation sees the same `message_id` that was published.
116
+ - Embedding provider/model resolution, provider disable gates, prefix injection, fallback chains, Azure settings, Ollama base URLs, and metering caller context now honor JSON/string-keyed settings in addition to symbol-keyed runtime settings.
117
+ - Discovery for Ollama, vLLM, and embedding fallback chains now honors JSON/string-keyed provider, embedding, base URL, model metadata, and refresh TTL settings.
118
+ - Inference executor routing defaults, conversation compaction, pipeline escalation, native dispatch, async post-step, telemetry span, tool-loop, and fallback-provider settings now honor JSON/string-keyed settings.
119
+ - Module-level inference, prompt dispatch, prompt-cache, debate, and skill-injector settings now honor JSON/string-keyed settings.
120
+ - Sticky tool history, trigger matching, and RAG context settings now honor JSON/string-keyed settings.
121
+ - Shared settings helpers now normalize string and symbol keys across router, fleet, scheduling, response cache, API auth/defaults, metering, quality, guards, skills, discovery, inventory, daemon, config loaders, and audit checks.
122
+ - Shared settings helpers now register defaults through `Legion::Settings.merge_settings(:llm, ...)` and read directly from the canonical `Legion::Settings[:llm]` store so JSON-loaded settings files and runtime overrides remain authoritative.
123
+ - LLM cache, response-cache, and tool-confidence paths now prefer connected local cache backends while preserving shared cache fallback behavior.
124
+ - Boot, compatibility, Bedrock embedding, transport-connected, identity, RBAC, and API helper paths now use shared LLM settings/logging helpers instead of direct `Legion::Settings`, `Legion::Logging`, and `Legion::Cache` calls.
125
+ - LLM transport messages now promote tracing metadata into W3C `traceparent`, `baggage`, and Legion trace headers for fleet/audit/metering correlation.
126
+ - Fleet dispatch replies now avoid request-side metadata gates by default and expose both `model` and `model_id` so downstream metering and metadata readers can resolve the model consistently.
127
+ - Fleet lane sanitization and vLLM health URL normalization now avoid regex patterns flagged by CodeQL for uncontrolled input.
128
+ - The lex-llm provider bridge now loads only the Legion-native `Legion::Extensions::Llm` namespace and no longer probes removed fork-era entrypoints.
129
+
3
130
  ## [0.8.32] - 2026-04-27
4
131
 
5
132
  ### Fixed
@@ -37,7 +164,7 @@
37
164
  ## [0.8.28] - 2026-04-24
38
165
 
39
166
  ### Fixed
40
- - Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The fallback paths blindly paired it with `default_provider` (typically `bedrock`), causing `RubyLLM::ModelNotFoundError`. Now infers the correct provider from model naming patterns before falling back to the global default.
167
+ - Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The routing paths blindly paired it with `default_provider` (typically `bedrock`), causing provider model lookup failures. Now infers the correct provider from model naming patterns before using the global default.
41
168
  - `arbitrage_fallback` hardcoded `:cloud` tier and `:bedrock` provider when inference failed. Now uses `PROVIDER_TIER` to resolve the correct tier for the inferred provider.
42
169
 
43
170
  ### Added
@@ -82,7 +209,7 @@
82
209
  ## [0.8.23] - 2026-04-23
83
210
 
84
211
  ### Fixed
85
- - `Call::StructuredOutput` prompt-fallback path passed `messages:` (plural) to `chat_single` which only accepts `message:` (singular), leaking the unknown kwarg into `RubyLLM::Chat.new`. Visible as repeated "unknown keyword: :messages" warnings during dream cycle contradiction detection. Flattened instruction + messages into a single string via `extract_user_content`.
212
+ - `Call::StructuredOutput` parse-retry path passed `messages:` (plural) to `chat_single` which only accepts `message:` (singular), leaking the unknown kwarg into the provider chat call. Visible as repeated "unknown keyword: :messages" warnings during dream cycle contradiction detection. Flattened instruction + messages into a single string via `extract_user_content`.
86
213
 
87
214
  ## [0.8.22] - 2026-04-22
88
215
 
@@ -132,7 +259,7 @@
132
259
  ## [0.8.16] - 2026-04-22
133
260
 
134
261
  ### Fixed
135
- - `RubyLLM::BadRequestError` (HTTP 400) and `RubyLLM::ContextLengthExceededError` now trigger the provider fallback-retry chain instead of bubbling up as unhandled 500s. Both `run_provider_call_single` and `step_provider_call_stream` retry on the next available provider before giving up.
262
+ - Provider bad-request and context-length errors now trigger the provider retry chain instead of bubbling up as unhandled 500s. Both `run_provider_call_single` and `step_provider_call_stream` retry on the next available provider before giving up.
136
263
  - Resolved provider/model is now logged (`log.info`) in `step_routing` so provider errors can be diagnosed from daemon logs without relying on SSE done events.
137
264
 
138
265
  ### Changed
@@ -448,7 +575,7 @@
448
575
  - `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
449
576
 
450
577
  ### Changed
451
- - `MAX_RUBY_LLM_TOOL_ROUNDS` constant raised from `25` to `200` (now serves as a fallback default for the configurable `max_tool_rounds` setting)
578
+ - Tool-loop round cap raised from `25` to `200` for the configurable `max_tool_rounds` setting.
452
579
 
453
580
  ### Fixed
454
581
  - `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
@@ -493,7 +620,7 @@
493
620
  ### Added
494
621
  - Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
495
622
  - Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
496
- - `MAX_RUBY_LLM_TOOL_ROUNDS` (25) — caps RubyLLM's unbounded tool-use loop to prevent infinite cycling
623
+ - Tool-loop round cap (25) to prevent infinite cycling
497
624
  - `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
498
625
 
499
626
  ### Fixed
@@ -639,9 +766,9 @@
639
766
  - `Legion::LLM::ProviderRegistry` — thread-safe registry for native lex-* provider extensions: `register(name, ext)`, `for(name)`, `available`, `registered?(name)`, `reset!`; cleared automatically on `Legion::LLM.shutdown` (closes #37)
640
767
  - `Legion::LLM::NativeDispatch` — native provider dispatch layer: `dispatch_chat`, `dispatch_embed`, `dispatch_stream`, `dispatch_count_tokens` route calls to registered lex-* extension modules and return standardized `{ result:, usage: Usage }` hashes; raises `ProviderError` when provider is not registered (closes #37)
641
768
  - `Legion::LLM::NativeResponseAdapter` — adapter wrapping native dispatch result hash to expose the same `.content`, `.input_tokens`, `.output_tokens`, `.usage` interface as a RubyLLM response object (closes #37)
642
- - `provider_layer` settings section: `mode` (`'ruby_llm'` default / `'native'` / `'auto'`), `native_providers` (default `['claude', 'bedrock']`), `fallback_to_ruby_llm` (default `true`); `ruby_llm` mode preserves all existing behavior unchanged (closes #37)
769
+ - `provider_layer` settings section: `mode` (`'native'` / `'auto'`) and `native_providers` (default `['claude', 'bedrock']`) for native provider dispatch (closes #37)
643
770
  - Auto-registration in `Legion::LLM.start`: detects loaded lex-* extensions via `Object.const_defined?` and registers them — `lex-claude` → `:claude`/`:anthropic`, `lex-bedrock` → `:bedrock`, `lex-openai` → `:openai`, `lex-gemini` → `:gemini`; no hard dependencies added (closes #37)
644
- - `Pipeline::Executor` provider layer integration: `use_native_dispatch?` checks `provider_layer.mode`; `execute_provider_request_native` calls `NativeDispatch.dispatch_chat` and wraps result in `NativeResponseAdapter`, falls back to RubyLLM when `fallback_to_ruby_llm: true`; `execute_provider_request_ruby_llm` is the extracted RubyLLM path (default, no behavior change) (closes #37)
771
+ - `Pipeline::Executor` provider layer integration: `use_native_dispatch?` checks `provider_layer.mode`; `execute_provider_request_native` calls `NativeDispatch.dispatch_chat` and wraps result in `NativeResponseAdapter` (closes #37)
645
772
  - Optional adversarial debate pipeline step for high-stakes decisions (closes #28): `Pipeline::Steps::Debate` runs a multi-round advocate/challenger/judge debate after `provider_call`; the initial response is the advocate, a challenger model critiques it, the advocate rebuts, and a judge model synthesizes all sides into the final response; activation via `debate: true` in `chat()` kwargs, or `Legion::Settings[:llm][:debate][:enabled]`, or GAIA auto-trigger when `gaia_auto_trigger: true` and `high_stakes`/`debate_recommended` are set in the advisory enrichment; debate is disabled by default; GAIA auto-trigger defaults to false in v0.6.0; different models are required for each role (advocate, challenger, judge) to avoid training bias — model rotation picks from enabled providers automatically when not explicitly configured; model strings use `provider:model` format; all LLM calls use `chat_direct` to avoid pipeline recursion; configurable via `debate.default_rounds` (default 1), `debate.max_rounds` (cap, default 3), `debate.advocate_model`, `debate.challenger_model`, `debate.judge_model`, `debate.model_selection_strategy` (default `'rotate'`); debate metadata (`enabled`, `rounds`, `advocate_model`, `challenger_model`, `judge_model`, `advocate_summary`, `challenger_summary`, `judge_confidence`) stored in `enrichments['debate:result']`; gracefully degrades to single-model mode with a warning when fewer than 2 models are available
646
773
  - Async context curation (`Legion::LLM::ContextCurator`): keeps LLM context lean without compaction (closes #38). Heuristic curation runs async in `Thread.new` after each `step_context_store` — zero latency impact. Curated messages are used in `step_context_load` when available, falling back to raw history. Heuristic pipeline: `strip_thinking` removes `<thinking>` blocks; `distill_tool_result` summarizes large tool outputs by tool type (`read_file` → line count + first/last, `search`/`grep` → match counts, `bash` → exit code + last lines, default → char count + preview); `fold_resolved_exchanges` detects multi-turn clarification reaching agreement and folds to a system note; `evict_superseded` keeps only the latest read of each file path; `dedup_similar` removes near-duplicate messages via Jaccard similarity (delegates to `Compressor.deduplicate_messages`). LLM-assisted mode is built but off by default (`llm_assisted: false`); when enabled with `mode: 'llm_assisted'`, a configurable small/fast model produces better summaries with automatic fallback to heuristic on any error. All behavior gated by `Legion::Settings[:llm][:context_curation]`: `enabled` (default `true`), `mode` (`'heuristic'`), `llm_assisted` (`false`), `llm_model` (`nil`), `tool_result_max_chars` (2000), `thinking_eviction` (`true`), `exchange_folding` (`true`), `superseded_eviction` (`true`), `dedup_enabled` (`true`), `dedup_threshold` (0.85), `target_context_tokens` (40000).
647
774
  - Message chain architecture with parent links and sidechain support in `ConversationStore` (closes #39): every message now carries `id` (UUID), `parent_id`, `sidechain` (default `false`), `message_group_id`, and `agent_id` fields; `build_chain(conversation_id, include_sidechains: false)` reconstructs ordered message history from parent links with rooted-leaf selection, parallel sibling recovery via `message_group_id`, and orphan appending; `sidechain_messages(conversation_id, agent_id: nil)` queries background/subagent messages with optional agent filter; `branch(conversation_id, from_message_id:)` creates a new conversation by copying history up to the given message; `store_metadata` / `read_metadata` provide tail-window session metadata storage; `migrate_parent_links!` backfills parent links on pre-migration sequential data; `messages()` backward-compatible flat array uses chain reconstruction when parent links are present, seq ordering otherwise; DB persistence adds `message_id`, `parent_id`, `sidechain`, `message_group_id`, `agent_id` columns when present (graceful degradation without migration)
@@ -1188,7 +1315,7 @@
1188
1315
  ### Added
1189
1316
  - `ResponseCache` module for async response delivery via memcached with spool overflow at 8MB
1190
1317
  - `DaemonClient` module for HTTP routing to LegionIO daemon with health caching (30s TTL)
1191
- - `Legion::LLM.ask` one-shot method: daemon-first routing with direct RubyLLM fallback
1318
+ - `Legion::LLM.ask` one-shot method: daemon-first routing with direct provider execution
1192
1319
  - `DaemonDeniedError` and `DaemonRateLimitedError` error classes
1193
1320
  - Daemon settings: `daemon.url` and `daemon.enabled` in defaults
1194
1321
  - HTTP status code contract: 200 (cached), 201 (sync), 202 (async poll), 403, 429, 503
data/CLAUDE.md CHANGED
@@ -5,10 +5,10 @@
5
5
 
6
6
  ## Purpose
7
7
 
8
- Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
8
+ Core LegionIO gem providing LLM capabilities to all extensions through Legion-native provider dispatch. Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
9
9
 
10
10
  **GitHub**: https://github.com/LegionIO/legion-llm
11
- **Version**: 0.8.0
11
+ **Version**: 0.8.49
12
12
  **License**: Apache-2.0
13
13
 
14
14
  ## Architecture
@@ -37,8 +37,6 @@ Legion::LLM.start
37
37
 
38
38
  ```
39
39
  Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inference, Call, Discovery
40
- ├── Patches # Monkey-patches for upstream gems
41
- │ └── RubyLLMParallelTools # Parallel tool execution patch for RubyLLM
42
40
  ├── Errors # Typed error hierarchy (LLMError base + subtypes, retryable?)
43
41
  │ └── EscalationExhausted / DaemonDeniedError / DaemonRateLimitedError / AuthError /
44
42
  │ RateLimitError / ContextOverflow / ProviderError / ProviderDown /
@@ -57,14 +55,16 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
57
55
  │ ├── Embeddings # generate, generate_batch, default_model, fallback chain
58
56
  │ ├── StructuredOutput # JSON schema enforcement with native response_format and prompt fallback
59
57
  │ ├── DaemonClient # HTTP routing to LegionIO daemon with 30s health cache
60
- │ ├── BedrockAuth # Monkey-patch for Bedrock Bearer Token auth (required lazily)
61
58
  │ ├── ClaudeConfigLoader # Import Claude CLI config from ~/.claude/settings.json
62
59
  │ └── CodexConfigLoader # Import OpenAI bearer token from ~/.codex/auth.json
63
60
  ├── Context # Prompt and conversation context management
64
61
  │ ├── Compressor # Deterministic prompt compression (3 levels, code-block-aware)
65
62
  │ └── Curator # Async conversation curation: strip thinking, distill tools, fold resolved exchanges
66
63
  ├── Discovery # Runtime introspection
67
- │ ├── Ollama # Queries Ollama /api/tags for pulled models (TTL-cached)
64
+ │ ├── Ollama # Multi-instance Ollama /api/tags + /api/show discovery (TTL-cached)
65
+ │ ├── Vllm # Multi-instance vLLM /v1/models + /health discovery (TTL-cached)
66
+ │ ├── RuleGenerator # Auto-generates routing rules from discovered instances/models
67
+ │ ├── MemoryGate # Checks available RAM before routing to local models
68
68
  │ └── System # Queries OS memory: macOS (vm_stat/sysctl), Linux (/proc/meminfo)
69
69
  ├── Quality # Response quality evaluation
70
70
  │ ├── Checker # Quality heuristics (empty, too_short, repetition, json_parse) + pluggable (was QualityChecker)
@@ -86,12 +86,10 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
86
86
  │ ├── Executor # 18-step skeleton with profile-aware execution and call_stream
87
87
  │ ├── Conversation # In-memory LRU (256 slots) + optional Sequel DB persistence (was ConversationStore)
88
88
  │ ├── Prompt # Clean dispatch API: dispatch, request, summarize, extract, decide
89
- │ ├── ToolAdapter # Wraps Tools::Base for RubyLLM sessions (McpToolAdapter kept as alias)
90
- │ ├── ToolDispatcher # Routes tool calls: MCP client / LEX runner / RubyLLM builtin
89
+ │ ├── ToolDispatcher # Routes tool calls: MCP client / LEX runner / native execution
91
90
  │ ├── AuditPublisher # Publishes audit events to llm.audit exchange
92
91
  │ ├── EnrichmentInjector # Converts RAG/GAIA enrichments into system prompt
93
92
  │ ├── GaiaCaller # Gaia-specific chat dispatch with phase/tick tracing
94
- │ ├── McpToolAdapter # Backward-compat alias for ToolAdapter
95
93
  │ └── Steps/ # All 18+ pipeline step modules
96
94
  │ ├── Metering, Billing, TokenBudget, PromptCache, Classification, Rbac
97
95
  │ ├── GaiaAdvisory, TierAssigner, TriggerMatch, ToolDiscovery, McpDiscovery, RagContext
@@ -148,9 +146,8 @@ Legion::LLM (lib/legion/llm.rb) # Thin facade — delegates to Inferenc
148
146
  │ └── OffPeak # Peak-hour deferral (delegates to Scheduling)
149
147
  ├── Tools # Tool call layer
150
148
  │ ├── Confidence # 4-tier degrading confidence storage (was OverrideConfidence)
151
- │ ├── Dispatcher # Routes tool calls to MCP/LEX/RubyLLM
149
+ │ ├── Dispatcher # Routes tool calls to MCP/LEX/native execution
152
150
  │ ├── Interceptor # Extensible pre-dispatch intercept registry
153
- │ ├── Adapter # Wraps lex-* extension tool as RubyLLM::Tool
154
151
  │ └── Interceptors/
155
152
  │ └── PythonVenv # Redirects python3/pip3 tool calls to isolated venv
156
153
  ├── Hooks # before/after chat interceptor registry
@@ -286,7 +283,6 @@ All compatibility routes normalize requests through `API::Translators` (OpenAIRe
286
283
 
287
284
  | Gem | Purpose |
288
285
  |-----|---------|
289
- | `ruby_llm` (>= 1.0) | Multi-provider LLM client |
290
286
  | `tzinfo` (>= 2.0) | IANA timezone conversion for schedule windows |
291
287
  | `legion-logging` | Logging |
292
288
  | `legion-settings` | Configuration |
@@ -371,24 +367,21 @@ Settings read from `Legion::Settings[:llm]`:
371
367
 
372
368
  ### Provider Settings
373
369
 
374
- Each provider has: `enabled`, `api_key`, `vault_path`, plus provider-specific keys.
370
+ Provider defaults now live in each `lex-llm-*` provider extension's `default_settings`. The `providers:` key in `Settings.default` ships as an empty hash; settings files and extension registrations populate it at runtime. Each provider has: `enabled`, `api_key`, plus provider-specific keys.
375
371
 
376
- Vault credential resolution: When `vault_path` is set and Legion::Crypt::Vault is connected, credentials are fetched from Vault at startup. Keys map to provider-specific fields automatically.
372
+ Local/fleet providers (Ollama, vLLM, MLX) support multi-instance configs via an `instances:` hash. Discovery scans all instances in parallel, enriches models with real capability metadata, and generates per-instance routing rules.
377
373
 
378
- Bedrock supports two auth modes:
379
- - **SigV4** (default): `api_key` + `secret_key` (+ optional `session_token`)
380
- - **Bearer token**: `bearer_token` for AWS Identity Center/SSO. When set, `bedrock_bearer_auth.rb` is required lazily to monkey-patch RubyLLM's Bedrock provider.
374
+ ### Capability-Aware Routing
381
375
 
382
- ### Auto-Detection Priority
376
+ Routing rules carry `model_capabilities`, `context_length`, and `parameter_count` from provider-supplied `Model::Info`. The `RuleGenerator` creates rules from discovered instances without a static capability map -- each provider supplies real metadata.
377
+
378
+ ### Memory Gate
383
379
 
384
- When no defaults are configured, the first enabled provider is used:
380
+ `Discovery::MemoryGate` checks available system memory before routing to local models. Models that exceed available RAM minus `discovery.memory_floor_mb` are silently skipped.
381
+
382
+ ### Auto-Detection Priority
385
383
 
386
- 1. Bedrock -> `us.anthropic.claude-sonnet-4-6-v1`
387
- 2. Anthropic -> `claude-sonnet-4-6`
388
- 3. OpenAI -> `gpt-4o`
389
- 4. Gemini -> `gemini-2.0-flash`
390
- 5. Azure -> (endpoint-specific, from `api_base`)
391
- 6. Ollama -> `llama3`
384
+ When no defaults are configured, the first enabled provider is used. Detection order and default models are defined by each `lex-llm-*` provider extension.
392
385
 
393
386
  ### Routing Settings
394
387
 
@@ -485,7 +478,6 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
485
478
  | Path | Purpose |
486
479
  |------|---------|
487
480
  | `lib/legion/llm.rb` | Thin facade: start, shutdown, delegates to Inference/Call/Discovery |
488
- | `lib/legion/llm/patches/ruby_llm_parallel_tools.rb` | Monkey-patch for RubyLLM parallel tool execution |
489
481
  | `lib/legion/llm/compat.rb` | Backward-compat aliases via const_missing with deprecation warnings |
490
482
  | `lib/legion/llm/errors.rb` | Typed error hierarchy: LLMError base + all subtypes, retryable? predicate |
491
483
  | `lib/legion/llm/version.rb` | Version constant |
@@ -503,14 +495,16 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
503
495
  | `lib/legion/llm/call/embeddings.rb` | generate, generate_batch, fallback chain, dimension enforcement |
504
496
  | `lib/legion/llm/call/structured_output.rb` | JSON schema enforcement with native response_format and prompt fallback |
505
497
  | `lib/legion/llm/call/daemon_client.rb` | HTTP routing to LegionIO daemon with 30s health cache |
506
- | `lib/legion/llm/call/bedrock_auth.rb` | Monkey-patch for Bedrock Bearer Token auth — required lazily |
507
498
  | `lib/legion/llm/call/claude_config_loader.rb` | Import Claude CLI config from ~/.claude/settings.json |
508
499
  | `lib/legion/llm/call/codex_config_loader.rb` | Import OpenAI bearer token from ~/.codex/auth.json |
509
500
  | `lib/legion/llm/context.rb` | Context entry point |
510
501
  | `lib/legion/llm/context/compressor.rb` | Deterministic prompt compression: 3 levels, code-block-aware, stopword removal |
511
502
  | `lib/legion/llm/context/curator.rb` | Async heuristic conversation curation (was ContextCurator) |
512
503
  | `lib/legion/llm/discovery.rb` | Discovery entry point: run, detect_embedding_capability, can_embed? |
513
- | `lib/legion/llm/discovery/ollama.rb` | Ollama /api/tags discovery with TTL cache |
504
+ | `lib/legion/llm/discovery/ollama.rb` | Multi-instance Ollama /api/tags + /api/show discovery with TTL cache |
505
+ | `lib/legion/llm/discovery/vllm.rb` | Multi-instance vLLM /v1/models + /health discovery with TTL cache |
506
+ | `lib/legion/llm/discovery/rule_generator.rb` | Auto-generates routing rules from discovered instances/models |
507
+ | `lib/legion/llm/discovery/memory_gate.rb` | Checks available RAM vs model size before routing to local models |
514
508
  | `lib/legion/llm/discovery/system.rb` | OS memory introspection (macOS + Linux) with TTL cache |
515
509
  | `lib/legion/llm/quality.rb` | Quality entry point |
516
510
  | `lib/legion/llm/quality/checker.rb` | Quality heuristics + pluggable callable (was QualityChecker) |
@@ -524,19 +518,17 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
524
518
  | `lib/legion/llm/metering/tokens.rb` | Thread-safe per-session token budget accumulator (was TokenTracker) |
525
519
  | `lib/legion/llm/inference.rb` | Inference entry point: requires all pipeline components |
526
520
  | `lib/legion/llm/inference/request.rb` | Inference::Request Data.define struct with .build and .from_chat_args |
527
- | `lib/legion/llm/inference/response.rb` | Inference::Response Data.define struct with .build, .from_ruby_llm, #with |
521
+ | `lib/legion/llm/inference/response.rb` | Inference::Response Data.define struct with .build, .from_provider_message, #with |
528
522
  | `lib/legion/llm/inference/profile.rb` | Inference::Profile: caller-derived profiles for step skipping |
529
523
  | `lib/legion/llm/inference/tracing.rb` | Inference::Tracing: trace_id, span_id, exchange_id generation |
530
524
  | `lib/legion/llm/inference/timeline.rb` | Inference::Timeline: ordered event recording with participant tracking |
531
525
  | `lib/legion/llm/inference/executor.rb` | Inference::Executor: 18-step skeleton with profile-aware execution and call_stream |
532
526
  | `lib/legion/llm/inference/conversation.rb` | In-memory LRU (256 slots) + optional Sequel DB persistence (was ConversationStore) |
533
527
  | `lib/legion/llm/inference/prompt.rb` | Prompt dispatch API: dispatch, request, summarize, extract, decide |
534
- | `lib/legion/llm/inference/tool_adapter.rb` | Wraps Tools::Base for RubyLLM sessions (McpToolAdapter kept as alias) |
535
- | `lib/legion/llm/inference/tool_dispatcher.rb` | Routes tool calls to MCP client / LEX runner / RubyLLM builtin |
528
+ | `lib/legion/llm/inference/tool_dispatcher.rb` | Routes tool calls to MCP client / LEX runner / native execution |
536
529
  | `lib/legion/llm/inference/audit_publisher.rb` | Publishes audit events to llm.audit exchange |
537
530
  | `lib/legion/llm/inference/enrichment_injector.rb` | Converts RAG/GAIA enrichments into system prompt |
538
531
  | `lib/legion/llm/inference/gaia_caller.rb` | Gaia-specific chat dispatch with phase/tick tracing |
539
- | `lib/legion/llm/inference/mcp_tool_adapter.rb` | Backward-compat alias for ToolAdapter |
540
532
  | `lib/legion/llm/inference/steps.rb` | Steps aggregator: requires all step modules |
541
533
  | `lib/legion/llm/inference/steps/*.rb` | All 18+ pipeline step modules (metering, billing, rbac, classification, etc.) |
542
534
  | `lib/legion/llm/router.rb` | Router: resolve, health_tracker, resolve_chain, select_candidates |
@@ -586,9 +578,8 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
586
578
  | `lib/legion/llm/scheduling/batch.rb` | Non-urgent request batching with priority queue and auto-flush |
587
579
  | `lib/legion/llm/scheduling/off_peak.rb` | Peak-hour deferral (delegates to Scheduling) |
588
580
  | `lib/legion/llm/tools/confidence.rb` | 4-tier degrading confidence storage (was OverrideConfidence) |
589
- | `lib/legion/llm/tools/dispatcher.rb` | Routes tool calls: MCP client / LEX runner / RubyLLM builtin |
581
+ | `lib/legion/llm/tools/dispatcher.rb` | Routes tool calls: MCP client / LEX runner / native execution |
590
582
  | `lib/legion/llm/tools/interceptor.rb` | Extensible pre-dispatch intercept registry |
591
- | `lib/legion/llm/tools/adapter.rb` | Wraps lex-* extension tool as RubyLLM::Tool (McpToolAdapter kept as alias) |
592
583
  | `lib/legion/llm/tools/interceptors/python_venv.rb` | Redirects python3/pip3 tool calls to isolated venv |
593
584
  | `lib/legion/llm/hooks.rb` | Hooks: before/after chat registry, run_before, run_after, install_defaults |
594
585
  | `lib/legion/llm/hooks/rag_guard.rb` | Post-generation RAG faithfulness check via lex-eval |
@@ -643,7 +634,7 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
643
634
  | `spec/legion/llm/gateway_integration_spec.rb` | Tests: gateway teardown — verifies no delegation |
644
635
  | `spec/legion/llm/metering/estimator_spec.rb` | Tests: cost estimation, fuzzy matching, pricing table (was cost_estimator_spec.rb) |
645
636
  | `spec/legion/llm/inference/request_spec.rb` | Tests: Request struct builder, legacy adapter |
646
- | `spec/legion/llm/inference/response_spec.rb` | Tests: Response struct builder, RubyLLM adapter, #with |
637
+ | `spec/legion/llm/inference/response_spec.rb` | Tests: Response struct builder, provider message adapter, #with |
647
638
  | `spec/legion/llm/inference/profile_spec.rb` | Tests: Profile derivation and step skipping |
648
639
  | `spec/legion/llm/inference/tracing_spec.rb` | Tests: Tracing init, exchange_id generation |
649
640
  | `spec/legion/llm/inference/timeline_spec.rb` | Tests: Timeline event recording, participants |
@@ -727,7 +718,7 @@ The legacy `vault_path` per-provider setting was removed in v0.3.1.
727
718
  Tests run without the full LegionIO stack. `spec/spec_helper.rb` uses real `Legion::Logging` and `Legion::Settings` (no stubs — hard dependencies are always present). Each test resets settings to defaults via `before(:each)`.
728
719
 
729
720
  ```bash
730
- bundle exec rspec # 1661 examples, 0 failures
721
+ bundle exec rspec # 2379 examples, 0 failures
731
722
  bundle exec rubocop # 0 offenses
732
723
  ```
733
724
 
data/Gemfile CHANGED
@@ -4,7 +4,32 @@ source 'https://rubygems.org'
4
4
 
5
5
  gemspec
6
6
 
7
+ legion_settings_path = File.expand_path('../legion-settings', __dir__)
8
+ gem 'legion-settings', path: legion_settings_path if Dir.exist?(legion_settings_path)
9
+
7
10
  group :test do
11
+ lex_llm_path = File.expand_path('../extensions-ai/lex-llm', __dir__)
12
+ if Dir.exist?(lex_llm_path)
13
+ gem 'lex-llm', path: lex_llm_path
14
+ else
15
+ gem 'lex-llm'
16
+ end
17
+
18
+ %w[
19
+ lex-llm-ollama
20
+ lex-llm-vllm
21
+ lex-llm-anthropic
22
+ lex-llm-openai
23
+ lex-llm-gemini
24
+ lex-llm-mlx
25
+ lex-llm-bedrock
26
+ lex-llm-azure-foundry
27
+ lex-llm-vertex
28
+ ].each do |provider_gem|
29
+ provider_path = File.expand_path("../extensions-ai/#{provider_gem}", __dir__)
30
+ gem provider_gem, path: provider_path if Dir.exist?(provider_path)
31
+ end
32
+
8
33
  gem 'rake'
9
34
  gem 'rspec'
10
35
  gem 'rspec_junit_formatter'