legion-llm 0.3.8 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/CLAUDE.md +15 -4
- data/README.md +33 -5
- data/lib/legion/llm/hooks.rb +45 -0
- data/lib/legion/llm/router.rb +11 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +98 -17
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 61c5173e4490643fbecb7977697159ffdeb082b63ec4140289128c69efd14806
|
|
4
|
+
data.tar.gz: c42e9f24e2ecc387c1076fe32f05b4636e17e9fe92b16bf7ed438198caaa3187
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b05c1c69d88184ef4ceea383523c0b6538fd363a1cee9d12bf849ab2885bfdfe7fd59170fe059458c89378f66b93851ec69803ae03af6df1a3bbb55ab1aa432c
|
|
7
|
+
data.tar.gz: 57eab8217bdbc614a8602b45836f4b38bf099d9fc2e6b85cfbd92813856b837387e0901ac06af43085cfe0cd034146d160b05cd358afeb45c03ac628280ce9d6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.3.11] - 2026-03-20
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM::Hooks` module with before/after chat hook registry
|
|
7
|
+
- `Hooks.before_chat` and `Hooks.after_chat` for registering interceptor blocks
|
|
8
|
+
- `Hooks.run_before` and `Hooks.run_after` with `:block` action support for guardrail enforcement
|
|
9
|
+
- `Hooks.reset!` for test isolation
|
|
10
|
+
- Before/after hook invocation wired into `_dispatch_chat` for transparent request interception
|
|
11
|
+
|
|
12
|
+
## [0.3.10] - 2026-03-20
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
- `PrivacyModeError` raised when cloud LLM tier is used with `enterprise_data_privacy` enabled
|
|
16
|
+
- `assert_cloud_allowed!` guard in `chat_single` and `ask_direct` blocks cloud-tier dispatch
|
|
17
|
+
- `Router.tier_available?(:cloud)` returns false when enterprise privacy mode is active
|
|
18
|
+
- Cloud provider detection covers bedrock, anthropic, openai, gemini, and azure
|
|
19
|
+
|
|
20
|
+
## [0.3.9] - 2026-03-20
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- OpenInference OTel span wrapping for chat, embed, and structured methods
|
|
24
|
+
|
|
3
25
|
## [0.3.8] - 2026-03-20
|
|
4
26
|
|
|
5
27
|
### Added
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.3.
|
|
11
|
+
**Version**: 0.3.8
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -31,8 +31,12 @@ Legion::LLM.start
|
|
|
31
31
|
```
|
|
32
32
|
Legion::LLM (lib/legion/llm.rb)
|
|
33
33
|
├── EscalationExhausted # Raised when all escalation attempts are exhausted
|
|
34
|
+
├── DaemonDeniedError # Raised when daemon returns HTTP 403
|
|
35
|
+
├── DaemonRateLimitedError # Raised when daemon returns HTTP 429
|
|
34
36
|
├── Settings # Default config, provider settings, routing defaults, discovery defaults
|
|
35
|
-
├── Providers # Provider configuration and Vault credential resolution
|
|
37
|
+
├── Providers # Provider configuration and Vault credential resolution (includes Azure `configure_azure`)
|
|
38
|
+
├── DaemonClient # HTTP routing to LegionIO daemon with 30s health cache
|
|
39
|
+
├── ResponseCache # Async response delivery via memcached with spool overflow
|
|
36
40
|
├── Compressor # Deterministic prompt compression (3 levels, code-block-aware)
|
|
37
41
|
├── Discovery # Runtime introspection for local model availability and system resources
|
|
38
42
|
│ ├── Ollama # Queries Ollama /api/tags for pulled models (TTL-cached)
|
|
@@ -128,6 +132,9 @@ Legion::LLM.shutdown # Cleanup
|
|
|
128
132
|
Legion::LLM.started? # -> Boolean
|
|
129
133
|
Legion::LLM.settings # -> Hash
|
|
130
134
|
|
|
135
|
+
# One-shot convenience (daemon-first, direct fallback)
|
|
136
|
+
Legion::LLM.ask(message, model:, provider:) # -> Hash with :content key; raises DaemonDeniedError/DaemonRateLimitedError
|
|
137
|
+
|
|
131
138
|
# Chat (delegates to gateway when loaded, otherwise direct)
|
|
132
139
|
Legion::LLM.chat(message: 'hello', model:, provider:) # Gateway-metered if available
|
|
133
140
|
Legion::LLM.chat(intent: { privacy: :strict }) # Intent-based routing
|
|
@@ -196,7 +203,8 @@ When no defaults are configured, the first enabled provider is used:
|
|
|
196
203
|
2. Anthropic -> `claude-sonnet-4-6`
|
|
197
204
|
3. OpenAI -> `gpt-4o`
|
|
198
205
|
4. Gemini -> `gemini-2.0-flash`
|
|
199
|
-
5.
|
|
206
|
+
5. Azure -> (endpoint-specific, from `api_base`)
|
|
207
|
+
6. Ollama -> `llama3`
|
|
200
208
|
|
|
201
209
|
### Routing Settings
|
|
202
210
|
|
|
@@ -293,6 +301,9 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
293
301
|
| `lib/legion/llm/settings.rb` | Default settings including routing_defaults, auto-merge into Legion::Settings |
|
|
294
302
|
| `lib/legion/llm/providers.rb` | Provider config, Vault resolution, RubyLLM configuration |
|
|
295
303
|
| `lib/legion/llm/bedrock_bearer_auth.rb` | Monkey-patch for Bedrock Bearer Token auth — required lazily |
|
|
304
|
+
| `lib/legion/llm/claude_config_loader.rb` | Import Claude CLI config from `~/.claude/settings.json` and `~/.claude.json` |
|
|
305
|
+
| `lib/legion/llm/response_cache.rb` | Async response delivery via memcached with spool overflow at 8MB |
|
|
306
|
+
| `lib/legion/llm/daemon_client.rb` | HTTP routing to LegionIO daemon with health caching (30s TTL) |
|
|
296
307
|
| `lib/legion/llm/compressor.rb` | Deterministic prompt compression: 3 levels, code-block-aware, stopword removal |
|
|
297
308
|
| `lib/legion/llm/router.rb` | Router module: resolve, health_tracker, select_candidates pipeline |
|
|
298
309
|
| `lib/legion/llm/router/resolution.rb` | Value object: tier, provider, model, rule, metadata, compress_level |
|
|
@@ -303,7 +314,7 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
303
314
|
| `lib/legion/llm/embeddings.rb` | Embeddings module: generate, generate_batch, default_model |
|
|
304
315
|
| `lib/legion/llm/shadow_eval.rb` | Shadow evaluation: enabled?, should_sample?, evaluate, compare |
|
|
305
316
|
| `lib/legion/llm/structured_output.rb` | JSON schema enforcement with native response_format and prompt fallback |
|
|
306
|
-
| `lib/legion/llm/version.rb` | Version constant (0.3.
|
|
317
|
+
| `lib/legion/llm/version.rb` | Version constant (0.3.8) |
|
|
307
318
|
| `lib/legion/llm/quality_checker.rb` | QualityChecker module with QualityResult struct |
|
|
308
319
|
| `lib/legion/llm/escalation_history.rb` | EscalationHistory mixin: `escalation_history`, `escalated?`, `final_resolution`, `escalation_chain` |
|
|
309
320
|
| `lib/legion/llm/router/escalation_chain.rb` | EscalationChain value object |
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
LLM integration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Wraps [ruby_llm](https://github.com/crmne/ruby_llm) to provide chat, embeddings, tool use, and agent capabilities to any Legion extension.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.3.
|
|
5
|
+
**Version**: 0.3.8
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -60,6 +60,7 @@ Provider-specific fields:
|
|
|
60
60
|
| Provider | Additional Fields |
|
|
61
61
|
|----------|------------------|
|
|
62
62
|
| **Bedrock** | `secret_key`, `session_token`, `region` (default: `us-east-2`), `bearer_token` (alternative to SigV4 — for AWS Identity Center/SSO) |
|
|
63
|
+
| **Azure** | `api_base` (Azure OpenAI endpoint URL, required), `auth_token` (bearer token alternative to `api_key`) |
|
|
63
64
|
| **Ollama** | `base_url` (default: `http://localhost:11434`) |
|
|
64
65
|
|
|
65
66
|
### Credential Resolution
|
|
@@ -90,7 +91,8 @@ If no `default_model` or `default_provider` is set, legion-llm auto-detects from
|
|
|
90
91
|
| 2 | Anthropic | `claude-sonnet-4-6` |
|
|
91
92
|
| 3 | OpenAI | `gpt-4o` |
|
|
92
93
|
| 4 | Gemini | `gemini-2.0-flash` |
|
|
93
|
-
| 5 |
|
|
94
|
+
| 5 | Azure | (endpoint-specific) |
|
|
95
|
+
| 6 | Ollama | `llama3` |
|
|
94
96
|
|
|
95
97
|
## Core API
|
|
96
98
|
|
|
@@ -103,6 +105,32 @@ Legion::LLM.started? # -> Boolean
|
|
|
103
105
|
Legion::LLM.settings # -> Hash (current LLM settings)
|
|
104
106
|
```
|
|
105
107
|
|
|
108
|
+
### One-Shot Ask
|
|
109
|
+
|
|
110
|
+
`Legion::LLM.ask` is a convenience method for single-turn requests. It routes daemon-first (via the LegionIO REST API if running and configured) and falls back to direct RubyLLM:
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
# Synchronous response
|
|
114
|
+
response = Legion::LLM.ask("What is the capital of France?")
|
|
115
|
+
puts response[:content]
|
|
116
|
+
|
|
117
|
+
# The daemon path returns cached (HTTP 200), synchronous (HTTP 201), or async (HTTP 202) responses
|
|
118
|
+
# HTTP 403 raises DaemonDeniedError; HTTP 429 raises DaemonRateLimitedError
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Configure daemon routing under `llm.daemon`:
|
|
122
|
+
|
|
123
|
+
```json
|
|
124
|
+
{
|
|
125
|
+
"llm": {
|
|
126
|
+
"daemon": {
|
|
127
|
+
"enabled": true,
|
|
128
|
+
"url": "http://127.0.0.1:4567"
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
106
134
|
### Chat
|
|
107
135
|
|
|
108
136
|
Returns a `RubyLLM::Chat` instance for multi-turn conversation:
|
|
@@ -266,8 +294,7 @@ legion-llm includes a dynamic weighted routing engine that dispatches requests a
|
|
|
266
294
|
│ Zero network overhead, no Transport │
|
|
267
295
|
│ │
|
|
268
296
|
│ Tier 2: FLEET → Ollama on Mac Studios / GPU servers │
|
|
269
|
-
│ Via
|
|
270
|
-
│ serve the model (Phase 2, not yet built) │
|
|
297
|
+
│ Via lex-llm-gateway RPC over AMQP │
|
|
271
298
|
│ │
|
|
272
299
|
│ Tier 3: CLOUD → Bedrock / Anthropic / OpenAI / Gemini │
|
|
273
300
|
│ Existing provider API calls │
|
|
@@ -277,7 +304,7 @@ legion-llm includes a dynamic weighted routing engine that dispatches requests a
|
|
|
277
304
|
| Tier | Target | Use Case |
|
|
278
305
|
|------|--------|----------|
|
|
279
306
|
| `local` | Ollama on localhost | Privacy-sensitive, offline, or low-latency workloads |
|
|
280
|
-
| `fleet` | Shared hardware via
|
|
307
|
+
| `fleet` | Shared hardware via lex-llm-gateway (AMQP) | Larger models on dedicated GPU servers |
|
|
281
308
|
| `cloud` | API providers (Bedrock, Anthropic, OpenAI, Gemini) | Frontier models, full-capability inference |
|
|
282
309
|
|
|
283
310
|
#### Intent-Based Dispatch
|
|
@@ -566,6 +593,7 @@ end
|
|
|
566
593
|
| Anthropic | `anthropic` | `vault://`, `env://`, or direct | Direct API access |
|
|
567
594
|
| OpenAI | `openai` | `vault://`, `env://`, or direct | GPT models |
|
|
568
595
|
| Google Gemini | `gemini` | `vault://`, `env://`, or direct | Gemini models |
|
|
596
|
+
| Azure AI | `azure` | `vault://`, `env://`, or direct | Azure OpenAI endpoint; `api_base` + `api_key` or `auth_token` |
|
|
569
597
|
| Ollama | `ollama` | Local, no credentials needed | Local inference |
|
|
570
598
|
|
|
571
599
|
## Integration with LegionIO
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Hooks
|
|
6
|
+
@before_chat = []
|
|
7
|
+
@after_chat = []
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
def before_chat(&block)
|
|
11
|
+
@before_chat << block
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def after_chat(&block)
|
|
15
|
+
@after_chat << block
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def run_before(messages:, model:, **)
|
|
19
|
+
@before_chat.each do |hook|
|
|
20
|
+
result = hook.call(messages: messages, model: model, **)
|
|
21
|
+
return result if result.is_a?(Hash) && result[:action] == :block
|
|
22
|
+
end
|
|
23
|
+
nil
|
|
24
|
+
rescue StandardError
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def run_after(response:, messages:, model:, **)
|
|
29
|
+
@after_chat.each do |hook|
|
|
30
|
+
result = hook.call(response: response, messages: messages, model: model, **)
|
|
31
|
+
return result if result.is_a?(Hash) && result[:action] == :block
|
|
32
|
+
end
|
|
33
|
+
nil
|
|
34
|
+
rescue StandardError
|
|
35
|
+
nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def reset!
|
|
39
|
+
@before_chat = []
|
|
40
|
+
@after_chat = []
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -61,7 +61,9 @@ module Legion
|
|
|
61
61
|
# :fleet — available when Legion::Transport is loaded
|
|
62
62
|
# :cloud — always available
|
|
63
63
|
def tier_available?(tier)
|
|
64
|
-
|
|
64
|
+
sym = tier.to_sym
|
|
65
|
+
return false if sym == :cloud && privacy_mode?
|
|
66
|
+
return Legion.const_defined?('Transport') if sym == :fleet
|
|
65
67
|
|
|
66
68
|
true
|
|
67
69
|
end
|
|
@@ -163,6 +165,14 @@ module Legion
|
|
|
163
165
|
{}
|
|
164
166
|
end
|
|
165
167
|
|
|
168
|
+
def privacy_mode?
|
|
169
|
+
if Legion.const_defined?('Settings') && Legion::Settings.respond_to?(:enterprise_privacy?)
|
|
170
|
+
Legion::Settings.enterprise_privacy?
|
|
171
|
+
else
|
|
172
|
+
ENV['LEGION_ENTERPRISE_PRIVACY'] == 'true'
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
166
176
|
def pick_best(candidates)
|
|
167
177
|
return nil if candidates.empty?
|
|
168
178
|
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -8,6 +8,7 @@ require 'legion/llm/router'
|
|
|
8
8
|
require 'legion/llm/compressor'
|
|
9
9
|
require 'legion/llm/quality_checker'
|
|
10
10
|
require 'legion/llm/escalation_history'
|
|
11
|
+
require 'legion/llm/hooks'
|
|
11
12
|
require_relative 'llm/response_cache'
|
|
12
13
|
require_relative 'llm/daemon_client'
|
|
13
14
|
|
|
@@ -22,6 +23,7 @@ module Legion
|
|
|
22
23
|
class EscalationExhausted < StandardError; end
|
|
23
24
|
class DaemonDeniedError < StandardError; end
|
|
24
25
|
class DaemonRateLimitedError < StandardError; end
|
|
26
|
+
class PrivacyModeError < StandardError; end
|
|
25
27
|
|
|
26
28
|
class << self
|
|
27
29
|
include Legion::LLM::Providers
|
|
@@ -64,15 +66,18 @@ module Legion
|
|
|
64
66
|
# for automatic metering and fleet dispatch
|
|
65
67
|
def chat(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
|
|
66
68
|
max_escalations: nil, quality_check: nil, message: nil, **)
|
|
67
|
-
if
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
if defined?(Legion::Telemetry::OpenInference)
|
|
70
|
+
Legion::Telemetry::OpenInference.llm_span(
|
|
71
|
+
model: (model || settings[:default_model]).to_s, provider: provider&.to_s, input: message
|
|
72
|
+
) do |_span|
|
|
73
|
+
_dispatch_chat(model: model, provider: provider, intent: intent, tier: tier, escalate: escalate, max_escalations: max_escalations,
|
|
74
|
+
quality_check: quality_check, message: message, **)
|
|
75
|
+
end
|
|
76
|
+
else
|
|
77
|
+
_dispatch_chat(model: model, provider: provider, intent: intent, tier: tier,
|
|
78
|
+
escalate: escalate, max_escalations: max_escalations,
|
|
79
|
+
quality_check: quality_check, message: message, **)
|
|
71
80
|
end
|
|
72
|
-
|
|
73
|
-
chat_direct(model: model, provider: provider, intent: intent, tier: tier,
|
|
74
|
-
escalate: escalate, max_escalations: max_escalations,
|
|
75
|
-
quality_check: quality_check, message: message, **)
|
|
76
81
|
end
|
|
77
82
|
|
|
78
83
|
# Send a single message — daemon-first, falls through to direct on unavailability.
|
|
@@ -106,9 +111,13 @@ module Legion
|
|
|
106
111
|
|
|
107
112
|
# Generate embeddings — delegates to gateway when available
|
|
108
113
|
def embed(text, **)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
114
|
+
if defined?(Legion::Telemetry::OpenInference)
|
|
115
|
+
Legion::Telemetry::OpenInference.embedding_span(
|
|
116
|
+
model: (settings[:default_model] || 'unknown').to_s
|
|
117
|
+
) { |_span| _dispatch_embed(text, **) }
|
|
118
|
+
else
|
|
119
|
+
_dispatch_embed(text, **)
|
|
120
|
+
end
|
|
112
121
|
end
|
|
113
122
|
|
|
114
123
|
# Direct embed bypassing gateway
|
|
@@ -127,13 +136,13 @@ module Legion
|
|
|
127
136
|
|
|
128
137
|
# Generate structured JSON output — delegates to gateway when available
|
|
129
138
|
def structured(messages:, schema:, **)
|
|
130
|
-
if
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
139
|
+
if defined?(Legion::Telemetry::OpenInference)
|
|
140
|
+
Legion::Telemetry::OpenInference.llm_span(
|
|
141
|
+
model: (settings[:default_model] || 'unknown').to_s, input: messages.to_s
|
|
142
|
+
) { |_span| _dispatch_structured(messages: messages, schema: schema, **) }
|
|
143
|
+
else
|
|
144
|
+
_dispatch_structured(messages: messages, schema: schema, **)
|
|
134
145
|
end
|
|
135
|
-
|
|
136
|
-
structured_direct(messages: messages, schema: schema, **)
|
|
137
146
|
end
|
|
138
147
|
|
|
139
148
|
# Direct structured bypassing gateway
|
|
@@ -152,6 +161,49 @@ module Legion
|
|
|
152
161
|
|
|
153
162
|
private
|
|
154
163
|
|
|
164
|
+
def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **)
|
|
165
|
+
messages = message.is_a?(Array) ? message : [{ role: 'user', content: message.to_s }]
|
|
166
|
+
resolved_model = model || settings[:default_model]
|
|
167
|
+
|
|
168
|
+
if defined?(Legion::LLM::Hooks)
|
|
169
|
+
blocked = Legion::LLM::Hooks.run_before(messages: messages, model: resolved_model)
|
|
170
|
+
return blocked[:response] if blocked
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
result = if gateway_loaded? && message
|
|
174
|
+
gateway_chat(model: model, provider: provider, intent: intent,
|
|
175
|
+
tier: tier, message: message, escalate: escalate,
|
|
176
|
+
max_escalations: max_escalations, quality_check: quality_check, **)
|
|
177
|
+
else
|
|
178
|
+
chat_direct(model: model, provider: provider, intent: intent, tier: tier,
|
|
179
|
+
escalate: escalate, max_escalations: max_escalations,
|
|
180
|
+
quality_check: quality_check, message: message, **)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
if defined?(Legion::LLM::Hooks)
|
|
184
|
+
blocked = Legion::LLM::Hooks.run_after(response: result, messages: messages, model: resolved_model)
|
|
185
|
+
return blocked[:response] if blocked
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
result
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def _dispatch_embed(text, **)
|
|
192
|
+
return Legion::Extensions::LLM::Gateway::Runners::Inference.embed(text: text, **) if gateway_loaded?
|
|
193
|
+
|
|
194
|
+
embed_direct(text, **)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def _dispatch_structured(messages:, schema:, **)
|
|
198
|
+
if gateway_loaded?
|
|
199
|
+
return Legion::Extensions::LLM::Gateway::Runners::Inference.structured(
|
|
200
|
+
messages: messages, schema: schema, **
|
|
201
|
+
)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
structured_direct(messages: messages, schema: schema, **)
|
|
205
|
+
end
|
|
206
|
+
|
|
155
207
|
def daemon_ask(message:, model: nil, provider: nil, context: {}, tier: nil, identity: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
156
208
|
result = DaemonClient.chat(
|
|
157
209
|
message: message, model: model, provider: provider,
|
|
@@ -172,6 +224,7 @@ module Legion
|
|
|
172
224
|
end
|
|
173
225
|
|
|
174
226
|
def ask_direct(message:, model: nil, provider: nil, intent: nil, tier: nil, &block)
|
|
227
|
+
assert_cloud_allowed! if effective_tier_is_cloud?(tier, provider)
|
|
175
228
|
session = chat_direct(model: model, provider: provider, intent: intent, tier: tier)
|
|
176
229
|
response = block ? session.ask(message, &block) : session.ask(message)
|
|
177
230
|
|
|
@@ -202,7 +255,10 @@ module Legion
|
|
|
202
255
|
resolution = Router::GatewayInterceptor.intercept(resolution, context: kwargs.fetch(:context, {}))
|
|
203
256
|
model = resolution.model
|
|
204
257
|
provider = resolution.provider
|
|
258
|
+
assert_cloud_allowed! if resolution.tier.to_sym == :cloud
|
|
205
259
|
end
|
|
260
|
+
elsif tier
|
|
261
|
+
assert_cloud_allowed! if tier.to_sym == :cloud
|
|
206
262
|
end
|
|
207
263
|
|
|
208
264
|
model ||= settings[:default_model]
|
|
@@ -304,6 +360,31 @@ module Legion
|
|
|
304
360
|
esc.fetch(:quality_threshold, 50)
|
|
305
361
|
end
|
|
306
362
|
|
|
363
|
+
def enterprise_privacy?
|
|
364
|
+
if Legion.const_defined?('Settings') && Legion::Settings.respond_to?(:enterprise_privacy?)
|
|
365
|
+
Legion::Settings.enterprise_privacy?
|
|
366
|
+
else
|
|
367
|
+
ENV['LEGION_ENTERPRISE_PRIVACY'] == 'true'
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def assert_cloud_allowed!
|
|
372
|
+
return unless enterprise_privacy?
|
|
373
|
+
|
|
374
|
+
raise PrivacyModeError,
|
|
375
|
+
'Cloud LLM tier is disabled: enterprise_data_privacy is enabled. ' \
|
|
376
|
+
'Only Tier 0 (cache) and Tier 1 (local Ollama) are permitted.'
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def effective_tier_is_cloud?(tier, provider)
|
|
380
|
+
return tier.to_sym == :cloud if tier
|
|
381
|
+
return false unless enterprise_privacy?
|
|
382
|
+
|
|
383
|
+
resolved = provider || settings[:default_provider]
|
|
384
|
+
cloud_providers = %i[anthropic bedrock openai gemini azure]
|
|
385
|
+
cloud_providers.include?(resolved&.to_sym)
|
|
386
|
+
end
|
|
387
|
+
|
|
307
388
|
def set_defaults
|
|
308
389
|
default_model = settings[:default_model]
|
|
309
390
|
default_provider = settings[:default_provider]
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -139,6 +139,7 @@ files:
|
|
|
139
139
|
- lib/legion/llm/embeddings.rb
|
|
140
140
|
- lib/legion/llm/escalation_history.rb
|
|
141
141
|
- lib/legion/llm/helpers/llm.rb
|
|
142
|
+
- lib/legion/llm/hooks.rb
|
|
142
143
|
- lib/legion/llm/providers.rb
|
|
143
144
|
- lib/legion/llm/quality_checker.rb
|
|
144
145
|
- lib/legion/llm/response_cache.rb
|