legion-llm 0.7.4 → 0.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/lib/legion/llm/errors.rb +2 -0
- data/lib/legion/llm/pipeline/executor.rb +34 -4
- data/lib/legion/llm/pipeline/request.rb +2 -2
- data/lib/legion/llm/pipeline/steps/rbac.rb +38 -19
- data/lib/legion/llm/prompt.rb +220 -0
- data/lib/legion/llm/router.rb +27 -8
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +12 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2856c4fbdb896cc947ca9e64e1f16240db499ce49edb92d1a775009de9405a7b
|
|
4
|
+
data.tar.gz: dde486aca3ebfefec431e4ac4578db846a2fcd597a118657ba44cfca05245cc6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 96435d5a198f879241b1c04c2c17cd0caf2afd2ae3751c2efd6c416e19c3513bbfe3c3612289a4014fed6f461e9c801451e6fd767fa23c62075cd908cdb6f676
|
|
7
|
+
data.tar.gz: 4978d31e8be17eadb107ac10481f1089ed32bb5403e60e1f5320510b6d93c7f73a1880d063ac55e05144dab64998839f7280c8e9271378adceaf08b41b4a35d8
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,36 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.7.6] - 2026-04-14
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `DaemonUnavailableError < LLMError` error class for fleet fail-closed semantics
|
|
9
|
+
- Metering wired as explicit pipeline executor step (`:metering` in STEPS)
|
|
10
|
+
- Router `exclude:` parameter — provider-keyed hash for anti-bias model exclusion (step 4.6 in `select_candidates`)
|
|
11
|
+
- `thinking:` forwarded to RubyLLM provider via `ruby_llm_chat_options`
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- Tool injection suppression: `tools: []` (explicit empty array) now skips `inject_registry_tools` — prevents 60+ MCP tools from being injected into fleet LLM calls
|
|
15
|
+
- RBAC fail-closed for fleet: callers with `agent.id` starting with `fleet:` are blocked when RBAC is unavailable (scoped, does not affect non-fleet callers)
|
|
16
|
+
- `exclude:` normalized defensively — `nil` or non-Hash values treated as empty
|
|
17
|
+
|
|
18
|
+
## [0.7.5] - 2026-04-14
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
- `Legion::LLM::Prompt` module — clean API replacing `chat`/`ask`/`chat_direct` surface
|
|
22
|
+
- `Prompt.dispatch(message, intent:, exclude:, tier:, tools:, ...)` — auto-routed via Router
|
|
23
|
+
- `Prompt.request(message, provider:, model:, ...)` — pinned dispatch, full pipeline
|
|
24
|
+
- `Prompt.summarize`, `Prompt.extract`, `Prompt.decide` — convenience methods (default `tools: []`)
|
|
25
|
+
- Nil provider/model guard raises `LLMError` with actionable message
|
|
26
|
+
- In-process pipeline execution (no DaemonClient HTTP roundtrip)
|
|
27
|
+
- Backward compat: `Legion::LLM.chat` delegates to `Prompt.dispatch` for non-streaming calls
|
|
28
|
+
- `build_pipeline_request` uses `Pipeline::Request.from_chat_args` as base, preserving all pipeline kwargs
|
|
29
|
+
|
|
30
|
+
## [0.7.4] - 2026-04-14
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
- PHI/PII classification hard gate: `TierAssigner` now routes `contains_phi`/`contains_pii`/`:restricted` to `tier: :local` (fail closed). Previously routed to `:cloud`.
|
|
34
|
+
|
|
5
35
|
## [0.7.3] - 2026-04-13
|
|
6
36
|
|
|
7
37
|
### Added
|
data/lib/legion/llm/errors.rb
CHANGED
|
@@ -28,11 +28,12 @@ module Legion
|
|
|
28
28
|
include Steps::TokenBudget
|
|
29
29
|
include Steps::PromptCache
|
|
30
30
|
include Steps::Debate
|
|
31
|
+
include Steps::Metering
|
|
31
32
|
|
|
32
33
|
STEPS = %i[
|
|
33
34
|
tracing_init idempotency conversation_uuid context_load
|
|
34
35
|
rbac classification billing gaia_advisory tier_assignment rag_context trigger_match skill_injector tool_discovery
|
|
35
|
-
routing request_normalization token_budget provider_call response_normalization
|
|
36
|
+
routing request_normalization token_budget provider_call response_normalization metering
|
|
36
37
|
debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
37
38
|
].freeze
|
|
38
39
|
|
|
@@ -43,7 +44,7 @@ module Legion
|
|
|
43
44
|
].freeze
|
|
44
45
|
|
|
45
46
|
POST_PROVIDER_STEPS = %i[
|
|
46
|
-
response_normalization debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
47
|
+
response_normalization metering debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
47
48
|
].freeze
|
|
48
49
|
|
|
49
50
|
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
@@ -724,10 +725,12 @@ module Legion
|
|
|
724
725
|
end
|
|
725
726
|
|
|
726
727
|
def ruby_llm_chat_options
|
|
727
|
-
{
|
|
728
|
+
opts = {
|
|
728
729
|
model: @resolved_model,
|
|
729
730
|
provider: @resolved_provider
|
|
730
|
-
}
|
|
731
|
+
}
|
|
732
|
+
opts[:thinking] = @request.thinking if @request.thinking
|
|
733
|
+
opts.compact
|
|
731
734
|
end
|
|
732
735
|
|
|
733
736
|
def inject_ruby_llm_tools(session)
|
|
@@ -735,6 +738,10 @@ module Legion
|
|
|
735
738
|
session.with_tool(tool)
|
|
736
739
|
end
|
|
737
740
|
|
|
741
|
+
# nil means caller did not specify tools — inject registry tools as normal.
|
|
742
|
+
# An explicit empty array [] means caller opted out of registry injection.
|
|
743
|
+
return if @request.tools.is_a?(Array) && @request.tools.empty?
|
|
744
|
+
|
|
738
745
|
inject_registry_tools(session)
|
|
739
746
|
end
|
|
740
747
|
|
|
@@ -923,6 +930,29 @@ module Legion
|
|
|
923
930
|
@enrichments = normalized
|
|
924
931
|
end
|
|
925
932
|
|
|
933
|
+
def step_metering
|
|
934
|
+
input_tokens = @raw_response.respond_to?(:input_tokens) ? @raw_response.input_tokens.to_i : 0
|
|
935
|
+
output_tokens = @raw_response.respond_to?(:output_tokens) ? @raw_response.output_tokens.to_i : 0
|
|
936
|
+
tier = @audit.dig(:'routing:provider_selection', :data, :tier)
|
|
937
|
+
latency_ms = if @timestamps[:provider_start] && @timestamps[:provider_end]
|
|
938
|
+
((@timestamps[:provider_end] - @timestamps[:provider_start]) * 1000).round
|
|
939
|
+
else
|
|
940
|
+
0
|
|
941
|
+
end
|
|
942
|
+
event = Steps::Metering.build_event(
|
|
943
|
+
provider: @resolved_provider,
|
|
944
|
+
model_id: @resolved_model,
|
|
945
|
+
tier: tier,
|
|
946
|
+
input_tokens: input_tokens,
|
|
947
|
+
output_tokens: output_tokens,
|
|
948
|
+
latency_ms: latency_ms
|
|
949
|
+
)
|
|
950
|
+
Steps::Metering.publish_or_spool(event)
|
|
951
|
+
rescue StandardError => e
|
|
952
|
+
@warnings << "metering error: #{e.message}"
|
|
953
|
+
handle_exception(e, level: :warn, operation: 'llm.pipeline.step_metering')
|
|
954
|
+
end
|
|
955
|
+
|
|
926
956
|
def step_context_store
|
|
927
957
|
conv_id = @request.conversation_id
|
|
928
958
|
return unless conv_id
|
|
@@ -21,7 +21,7 @@ module Legion
|
|
|
21
21
|
schema_version: kwargs.fetch(:schema_version, '1.0.0'),
|
|
22
22
|
system: kwargs[:system],
|
|
23
23
|
messages: kwargs.fetch(:messages, []),
|
|
24
|
-
tools: kwargs.
|
|
24
|
+
tools: kwargs.key?(:tools) ? kwargs[:tools] : nil,
|
|
25
25
|
tool_choice: kwargs.fetch(:tool_choice, { mode: :auto }),
|
|
26
26
|
routing: kwargs.fetch(:routing, { provider: nil, model: nil }),
|
|
27
27
|
tokens: kwargs.fetch(:tokens, { max: 4096 }),
|
|
@@ -79,7 +79,7 @@ module Legion
|
|
|
79
79
|
messages: messages,
|
|
80
80
|
system: kwargs[:system],
|
|
81
81
|
routing: routing,
|
|
82
|
-
tools: kwargs.
|
|
82
|
+
tools: kwargs.key?(:tools) ? kwargs[:tools] : nil,
|
|
83
83
|
tool_choice: kwargs[:tool_choice] || { mode: :auto },
|
|
84
84
|
stream: kwargs.fetch(:stream, false),
|
|
85
85
|
generation: kwargs[:generation] || {},
|
|
@@ -13,6 +13,14 @@ module Legion
|
|
|
13
13
|
start_time = Time.now
|
|
14
14
|
|
|
15
15
|
unless defined?(::Legion::Rbac)
|
|
16
|
+
if fleet_caller?
|
|
17
|
+
msg = 'RBAC unavailable: fleet callers require RBAC enforcement (fail-closed)'
|
|
18
|
+
log.error("[llm][rbac] fleet_blocked request_id=#{@request.id} reason=rbac_unavailable")
|
|
19
|
+
record_rbac_audit(:failure, msg, start_time)
|
|
20
|
+
record_rbac_timeline("denied: #{msg}")
|
|
21
|
+
raise Legion::LLM::PipelineError.new("403 Forbidden: #{msg}", step: :rbac)
|
|
22
|
+
end
|
|
23
|
+
|
|
16
24
|
@warnings << 'RBAC unavailable, permitting request without enforcement'
|
|
17
25
|
log.info("[llm][rbac] unavailable request_id=#{@request.id} action=permit_without_enforcement")
|
|
18
26
|
record_rbac_audit(:success, 'permitted (rbac unavailable)', start_time)
|
|
@@ -20,26 +28,28 @@ module Legion
|
|
|
20
28
|
return
|
|
21
29
|
end
|
|
22
30
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
begin
|
|
32
|
+
principal = build_rbac_principal
|
|
33
|
+
caller_id = extract_rbac_caller_id
|
|
34
|
+
log.info("[llm][rbac] authorize request_id=#{@request.id} caller=#{caller_id}")
|
|
35
|
+
::Legion::Rbac.authorize!(principal: principal, action: :use, resource: 'llm/pipeline')
|
|
27
36
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
37
|
+
log.info("[llm][rbac] permitted request_id=#{@request.id} caller=#{caller_id}")
|
|
38
|
+
record_rbac_audit(:success, "permitted caller=#{caller_id}", start_time)
|
|
39
|
+
record_rbac_timeline("permitted caller=#{caller_id}")
|
|
40
|
+
rescue ::Legion::Rbac::AccessDenied => e
|
|
41
|
+
log.warn("[llm][rbac] denied request_id=#{@request.id} error=#{e.message}")
|
|
42
|
+
record_rbac_audit(:failure, e.message, start_time)
|
|
43
|
+
record_rbac_timeline("denied: #{e.message}")
|
|
44
|
+
handle_exception(e, level: :warn, operation: 'llm.pipeline.steps.rbac.denied', request_id: @request.id)
|
|
45
|
+
raise Legion::LLM::PipelineError.new("403 Forbidden: #{e.message}", step: :rbac)
|
|
46
|
+
rescue StandardError => e
|
|
47
|
+
log.error("[llm][rbac] failed request_id=#{@request.id} error=#{e.message}")
|
|
48
|
+
record_rbac_audit(:failure, "error: #{e.message}", start_time)
|
|
49
|
+
record_rbac_timeline("error: #{e.message}")
|
|
50
|
+
handle_exception(e, level: :error, operation: 'llm.pipeline.steps.rbac', request_id: @request.id)
|
|
51
|
+
raise Legion::LLM::PipelineError.new("rbac error: #{e.message}", step: :rbac)
|
|
52
|
+
end
|
|
43
53
|
end
|
|
44
54
|
|
|
45
55
|
private
|
|
@@ -54,6 +64,15 @@ module Legion
|
|
|
54
64
|
)
|
|
55
65
|
end
|
|
56
66
|
|
|
67
|
+
def fleet_caller?
|
|
68
|
+
agent_ids = [
|
|
69
|
+
@request.agent&.dig(:id),
|
|
70
|
+
@request.caller&.dig(:agent, :id)
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
agent_ids.any? { |agent_id| agent_id.is_a?(String) && agent_id.start_with?('fleet:') }
|
|
74
|
+
end
|
|
75
|
+
|
|
57
76
|
def extract_rbac_caller_id
|
|
58
77
|
@request.caller&.dig(:requested_by, :id) ||
|
|
59
78
|
@request.caller&.dig(:requested_by, :identity) ||
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Prompt
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Auto-routed: Router picks the best provider+model based on intent.
|
|
9
|
+
# Primary entry point for most LLM calls.
|
|
10
|
+
# When provider/model are passed explicitly, they take precedence over routing.
|
|
11
|
+
def dispatch(message, # rubocop:disable Metrics/ParameterLists
|
|
12
|
+
intent: nil,
|
|
13
|
+
tier: nil,
|
|
14
|
+
exclude: {},
|
|
15
|
+
provider: nil,
|
|
16
|
+
model: nil,
|
|
17
|
+
schema: nil,
|
|
18
|
+
tools: nil,
|
|
19
|
+
escalate: nil,
|
|
20
|
+
max_escalations: 3,
|
|
21
|
+
thinking: nil,
|
|
22
|
+
temperature: nil,
|
|
23
|
+
max_tokens: nil,
|
|
24
|
+
tracing: nil,
|
|
25
|
+
agent: nil,
|
|
26
|
+
caller: nil,
|
|
27
|
+
cache: nil,
|
|
28
|
+
quality_check: nil,
|
|
29
|
+
**)
|
|
30
|
+
resolved_provider = provider
|
|
31
|
+
resolved_model = model
|
|
32
|
+
|
|
33
|
+
if resolved_provider.nil? && resolved_model.nil? && defined?(Router) && Router.routing_enabled? && (intent || tier)
|
|
34
|
+
resolution = Router.resolve(intent: intent, tier: tier, exclude: exclude)
|
|
35
|
+
resolved_provider = resolution&.provider
|
|
36
|
+
resolved_model = resolution&.model
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
resolved_provider ||= Legion::LLM.settings[:default_provider]
|
|
40
|
+
resolved_model ||= Legion::LLM.settings[:default_model]
|
|
41
|
+
|
|
42
|
+
request(message,
|
|
43
|
+
provider: resolved_provider,
|
|
44
|
+
model: resolved_model,
|
|
45
|
+
intent: intent,
|
|
46
|
+
tier: tier,
|
|
47
|
+
schema: schema,
|
|
48
|
+
tools: tools,
|
|
49
|
+
escalate: escalate,
|
|
50
|
+
max_escalations: max_escalations,
|
|
51
|
+
thinking: thinking,
|
|
52
|
+
temperature: temperature,
|
|
53
|
+
max_tokens: max_tokens,
|
|
54
|
+
tracing: tracing,
|
|
55
|
+
agent: agent,
|
|
56
|
+
caller: caller,
|
|
57
|
+
cache: cache,
|
|
58
|
+
quality_check: quality_check,
|
|
59
|
+
**)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Pinned: caller specifies exact provider+model. Full pipeline runs in-process.
|
|
63
|
+
def request(message, # rubocop:disable Metrics/ParameterLists
|
|
64
|
+
provider:,
|
|
65
|
+
model:,
|
|
66
|
+
intent: nil,
|
|
67
|
+
tier: nil,
|
|
68
|
+
schema: nil,
|
|
69
|
+
tools: nil,
|
|
70
|
+
escalate: nil,
|
|
71
|
+
max_escalations: 3,
|
|
72
|
+
thinking: nil,
|
|
73
|
+
temperature: nil,
|
|
74
|
+
max_tokens: nil,
|
|
75
|
+
tracing: nil,
|
|
76
|
+
agent: nil,
|
|
77
|
+
caller: nil,
|
|
78
|
+
cache: nil,
|
|
79
|
+
quality_check: nil,
|
|
80
|
+
**)
|
|
81
|
+
if provider.nil? || model.nil?
|
|
82
|
+
raise LLMError, "Prompt.request: provider and model must be set (got provider=#{provider.inspect}, model=#{model.inspect}). " \
|
|
83
|
+
'Configure Legion::Settings[:llm][:default_provider] and [:default_model], or pass them explicitly.'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
pipeline_request = build_pipeline_request(
|
|
87
|
+
message, provider: provider, model: model, intent: intent, tier: tier,
|
|
88
|
+
schema: schema, tools: tools,
|
|
89
|
+
escalate: escalate, max_escalations: max_escalations,
|
|
90
|
+
thinking: thinking, temperature: temperature, max_tokens: max_tokens,
|
|
91
|
+
tracing: tracing, agent: agent, caller: caller, cache: cache,
|
|
92
|
+
quality_check: quality_check, **
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
executor = Pipeline::Executor.new(pipeline_request)
|
|
96
|
+
executor.call
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Condense a conversation or feedback history into a shorter form.
|
|
100
|
+
def summarize(messages, tools: [], **)
|
|
101
|
+
prompt = build_summarize_prompt(messages)
|
|
102
|
+
dispatch(prompt, tools: tools, **)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Extract structured data from unstructured text.
|
|
106
|
+
def extract(text, schema:, tools: [], **)
|
|
107
|
+
prompt = build_extract_prompt(text)
|
|
108
|
+
dispatch(prompt, schema: schema, tools: tools, **)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Pick from a set of options with reasoning.
|
|
112
|
+
def decide(question, options:, tools: [], **)
|
|
113
|
+
prompt = build_decide_prompt(question, options)
|
|
114
|
+
dispatch(prompt, tools: tools, **)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# --- Private helpers ---
|
|
118
|
+
|
|
119
|
+
def build_pipeline_request(message, provider:, model:, intent:, tier:, schema:, tools:, # rubocop:disable Metrics/ParameterLists, Metrics/MethodLength
|
|
120
|
+
escalate:, max_escalations:, thinking:, temperature:,
|
|
121
|
+
max_tokens:, tracing:, agent:, caller:, cache:,
|
|
122
|
+
quality_check:, **rest)
|
|
123
|
+
# Build base request via from_chat_args to preserve full pipeline kwargs
|
|
124
|
+
# (context_strategy, tool_choice, idempotency_key, ttl, enrichments, predictions, etc.)
|
|
125
|
+
chat_message = message.is_a?(Array) ? nil : message.to_s
|
|
126
|
+
chat_messages = message.is_a?(Array) ? message : nil
|
|
127
|
+
|
|
128
|
+
base = Pipeline::Request.from_chat_args(
|
|
129
|
+
message: chat_message,
|
|
130
|
+
messages: chat_messages,
|
|
131
|
+
model: model,
|
|
132
|
+
provider: provider,
|
|
133
|
+
intent: intent,
|
|
134
|
+
tier: tier,
|
|
135
|
+
tools: tools,
|
|
136
|
+
thinking: thinking,
|
|
137
|
+
tracing: tracing,
|
|
138
|
+
agent: agent,
|
|
139
|
+
caller: caller,
|
|
140
|
+
cache: cache,
|
|
141
|
+
escalate: escalate,
|
|
142
|
+
max_escalations: max_escalations,
|
|
143
|
+
quality_check: quality_check,
|
|
144
|
+
**rest
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Overlay Prompt-specific translations on top of the base request
|
|
148
|
+
generation = (base.generation || {}).dup
|
|
149
|
+
generation[:temperature] = temperature if temperature
|
|
150
|
+
|
|
151
|
+
tokens = (base.tokens || {}).dup
|
|
152
|
+
tokens[:max] = max_tokens if max_tokens
|
|
153
|
+
|
|
154
|
+
response_format = if schema
|
|
155
|
+
{ type: :json_schema, schema: schema }
|
|
156
|
+
elsif base.response_format
|
|
157
|
+
base.response_format
|
|
158
|
+
else
|
|
159
|
+
{ type: :text }
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
Pipeline::Request.build(
|
|
163
|
+
messages: base.messages,
|
|
164
|
+
system: base.system,
|
|
165
|
+
routing: base.routing || { provider: provider, model: model },
|
|
166
|
+
tools: base.tools || tools || [],
|
|
167
|
+
tool_choice: base.tool_choice,
|
|
168
|
+
thinking: base.thinking || thinking,
|
|
169
|
+
generation: generation,
|
|
170
|
+
tokens: tokens,
|
|
171
|
+
stop: base.stop,
|
|
172
|
+
response_format: response_format,
|
|
173
|
+
stream: base.stream || false,
|
|
174
|
+
fork: base.fork,
|
|
175
|
+
cache: base.cache || cache || { strategy: :default, cacheable: true },
|
|
176
|
+
priority: base.priority || :normal,
|
|
177
|
+
tracing: base.tracing || tracing,
|
|
178
|
+
classification: base.classification,
|
|
179
|
+
caller: base.caller || caller,
|
|
180
|
+
agent: base.agent || agent,
|
|
181
|
+
billing: base.billing,
|
|
182
|
+
test: base.test,
|
|
183
|
+
modality: base.modality,
|
|
184
|
+
hooks: base.hooks,
|
|
185
|
+
conversation_id: base.conversation_id,
|
|
186
|
+
idempotency_key: base.idempotency_key,
|
|
187
|
+
schema_version: base.schema_version,
|
|
188
|
+
id: base.id,
|
|
189
|
+
ttl: base.ttl,
|
|
190
|
+
metadata: base.metadata || {},
|
|
191
|
+
enrichments: base.enrichments || {},
|
|
192
|
+
predictions: base.predictions || {},
|
|
193
|
+
context_strategy: base.context_strategy,
|
|
194
|
+
extra: base.extra || {}
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def build_summarize_prompt(messages)
|
|
199
|
+
text = if messages.is_a?(Array)
|
|
200
|
+
messages.map { |m| m.is_a?(Hash) ? m[:content] : m.to_s }.join("\n")
|
|
201
|
+
else
|
|
202
|
+
messages.to_s
|
|
203
|
+
end
|
|
204
|
+
"Summarize the following content concisely, preserving key points:\n\n#{text}"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def build_extract_prompt(text)
|
|
208
|
+
"Extract structured data from the following text. Return only the JSON matching the provided schema.\n\n#{text}"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def build_decide_prompt(question, options)
|
|
212
|
+
options_text = options.each_with_index.map { |opt, i| "#{i + 1}. #{opt}" }.join("\n")
|
|
213
|
+
"#{question}\n\nOptions:\n#{options_text}\n\nPick the best option and explain your reasoning."
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private_class_method :build_pipeline_request,
|
|
217
|
+
:build_summarize_prompt, :build_extract_prompt, :build_decide_prompt
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -22,14 +22,14 @@ module Legion
|
|
|
22
22
|
# @param model [String, nil] explicit model override
|
|
23
23
|
# @param provider [Symbol, nil] explicit provider override
|
|
24
24
|
# @return [Resolution, nil]
|
|
25
|
-
def resolve(intent: nil, tier: nil, model: nil, provider: nil)
|
|
25
|
+
def resolve(intent: nil, tier: nil, model: nil, provider: nil, exclude: {})
|
|
26
26
|
return explicit_resolution(tier, provider, model) if tier
|
|
27
27
|
|
|
28
28
|
return nil unless routing_enabled? && intent
|
|
29
29
|
|
|
30
30
|
merged = merge_defaults(intent)
|
|
31
31
|
rules = load_rules
|
|
32
|
-
candidates = select_candidates(rules, merged)
|
|
32
|
+
candidates = select_candidates(rules, merged, exclude: exclude)
|
|
33
33
|
best = pick_best(candidates)
|
|
34
34
|
resolution = best&.to_resolution
|
|
35
35
|
|
|
@@ -42,12 +42,12 @@ module Legion
|
|
|
42
42
|
resolution || arbitrage_fallback(intent)
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
-
def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil)
|
|
45
|
+
def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {})
|
|
46
46
|
max = max_escalations || escalation_max_attempts
|
|
47
47
|
return chain_from_defaults(model, provider, max) unless routing_enabled? && (intent || tier)
|
|
48
48
|
return EscalationChain.new(resolutions: [explicit_resolution(tier, provider, model)], max_attempts: max) if tier
|
|
49
49
|
|
|
50
|
-
chain_from_intent(intent, max)
|
|
50
|
+
chain_from_intent(intent, max, exclude: exclude)
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
def health_tracker
|
|
@@ -131,7 +131,7 @@ module Legion
|
|
|
131
131
|
raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) }
|
|
132
132
|
end
|
|
133
133
|
|
|
134
|
-
def select_candidates(rules, intent)
|
|
134
|
+
def select_candidates(rules, intent, exclude: {})
|
|
135
135
|
log.debug("Router: selecting candidates from #{rules.size} rules")
|
|
136
136
|
|
|
137
137
|
# 1. Collect constraints from constraint rules that match the intent
|
|
@@ -151,8 +151,12 @@ module Legion
|
|
|
151
151
|
# 4.5 Reject Ollama rules where model is not pulled or doesn't fit
|
|
152
152
|
discovered = unconstrained.reject { |r| excluded_by_discovery?(r) }
|
|
153
153
|
|
|
154
|
+
# 4.6 Reject rules matching caller-provided exclude list
|
|
155
|
+
normalized_exclude = exclude.is_a?(Hash) ? exclude : {}
|
|
156
|
+
not_excluded = normalized_exclude.empty? ? discovered : discovered.reject { |r| excluded_by_caller?(r, normalized_exclude) }
|
|
157
|
+
|
|
154
158
|
# 5. Filter by tier availability
|
|
155
|
-
final =
|
|
159
|
+
final = not_excluded.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
|
|
156
160
|
|
|
157
161
|
log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
|
|
158
162
|
|
|
@@ -204,6 +208,21 @@ module Legion
|
|
|
204
208
|
{}
|
|
205
209
|
end
|
|
206
210
|
|
|
211
|
+
def excluded_by_caller?(rule, exclude)
|
|
212
|
+
return false if exclude.nil? || exclude.empty?
|
|
213
|
+
|
|
214
|
+
target = rule.target || {}
|
|
215
|
+
provider = (target[:provider] || target['provider'])&.to_sym
|
|
216
|
+
model = target[:model] || target['model']
|
|
217
|
+
tier = (target[:tier] || target['tier'])&.to_sym
|
|
218
|
+
|
|
219
|
+
return true if exclude[:provider] && provider == exclude[:provider].to_sym
|
|
220
|
+
return true if exclude[:model] && model == exclude[:model]
|
|
221
|
+
return true if exclude[:tier] && tier == exclude[:tier].to_sym
|
|
222
|
+
|
|
223
|
+
false
|
|
224
|
+
end
|
|
225
|
+
|
|
207
226
|
def privacy_mode?
|
|
208
227
|
if Legion.const_defined?('Settings', false) && Legion::Settings.respond_to?(:enterprise_privacy?)
|
|
209
228
|
Legion::Settings.enterprise_privacy?
|
|
@@ -272,10 +291,10 @@ module Legion
|
|
|
272
291
|
EscalationChain.new(resolutions: [res], max_attempts: max)
|
|
273
292
|
end
|
|
274
293
|
|
|
275
|
-
def chain_from_intent(intent, max)
|
|
294
|
+
def chain_from_intent(intent, max, exclude: {})
|
|
276
295
|
merged = intent ? merge_defaults(intent) : {}
|
|
277
296
|
rules = load_rules
|
|
278
|
-
candidates = select_candidates(rules, merged)
|
|
297
|
+
candidates = select_candidates(rules, merged, exclude: exclude)
|
|
279
298
|
sorted = candidates.sort_by { |r| -effective_priority(r) }
|
|
280
299
|
resolutions = sorted.map(&:to_resolution)
|
|
281
300
|
resolutions = build_fallback_chain(sorted.first, sorted, resolutions) if sorted.first&.fallback
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -37,6 +37,7 @@ require_relative 'llm/cost_tracker'
|
|
|
37
37
|
require_relative 'llm/token_tracker'
|
|
38
38
|
require_relative 'llm/override_confidence'
|
|
39
39
|
require_relative 'llm/routes'
|
|
40
|
+
require_relative 'llm/prompt'
|
|
40
41
|
|
|
41
42
|
begin
|
|
42
43
|
require_relative 'llm/skills'
|
|
@@ -543,7 +544,17 @@ module Legion
|
|
|
543
544
|
"tier=#{tier} escalate=#{escalate} max_escalations=#{max_escalations} " \
|
|
544
545
|
"quality_check=#{quality_check} message_present=#{!message.nil?} kwargs=#{kwargs.keys.sort}"
|
|
545
546
|
)
|
|
546
|
-
if pipeline_enabled? && (message || kwargs[:messages])
|
|
547
|
+
if pipeline_enabled? && (message || kwargs[:messages]) && !block_given?
|
|
548
|
+
return Prompt.dispatch(
|
|
549
|
+
message || kwargs[:messages],
|
|
550
|
+
intent: intent, tier: tier, provider: provider, model: model,
|
|
551
|
+
escalate: escalate, max_escalations: max_escalations,
|
|
552
|
+
quality_check: quality_check, **kwargs.except(:messages)
|
|
553
|
+
)
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# Streaming with pipeline — old path (Prompt does not handle streaming yet)
|
|
557
|
+
if pipeline_enabled? && (message || kwargs[:messages]) && block_given?
|
|
547
558
|
return chat_via_pipeline(model: model, provider: provider, intent: intent, tier: tier,
|
|
548
559
|
message: message, escalate: escalate, max_escalations: max_escalations,
|
|
549
560
|
quality_check: quality_check, **kwargs, &)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -307,6 +307,7 @@ files:
|
|
|
307
307
|
- lib/legion/llm/pipeline/tool_adapter.rb
|
|
308
308
|
- lib/legion/llm/pipeline/tool_dispatcher.rb
|
|
309
309
|
- lib/legion/llm/pipeline/tracing.rb
|
|
310
|
+
- lib/legion/llm/prompt.rb
|
|
310
311
|
- lib/legion/llm/provider_registry.rb
|
|
311
312
|
- lib/legion/llm/providers.rb
|
|
312
313
|
- lib/legion/llm/quality_checker.rb
|