legion-llm 0.7.5 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14ea97502e51c6baf291165f3e36bbf37b28b93eb119ad750e6157089b0058c6
4
- data.tar.gz: 6881a3b870cbdb2b7f7bbc2ce3af39b7331f27aefd48e742073a5c1149a8d0bf
3
+ metadata.gz: 2856c4fbdb896cc947ca9e64e1f16240db499ce49edb92d1a775009de9405a7b
4
+ data.tar.gz: dde486aca3ebfefec431e4ac4578db846a2fcd597a118657ba44cfca05245cc6
5
5
  SHA512:
6
- metadata.gz: 83d59e98b5fe417f762bdf7aec41c8bdbe10246db47d9f9dcf620a9c5b7bc97768a0f05a13e5865a30400fb37c018ee019047fd68ddf717d760b213dd36c58e2
7
- data.tar.gz: 43897c5332b7c641812f3e75981b1712a5d5262f607ce55bf8cd49f27c04a6a62f4bddd3d5f6abd8c0adc634a41b60e9b894383a211955ae1197d09d9c16bfb3
6
+ metadata.gz: 96435d5a198f879241b1c04c2c17cd0caf2afd2ae3751c2efd6c416e19c3513bbfe3c3612289a4014fed6f461e9c801451e6fd767fa23c62075cd908cdb6f676
7
+ data.tar.gz: 4978d31e8be17eadb107ac10481f1089ed32bb5403e60e1f5320510b6d93c7f73a1880d063ac55e05144dab64998839f7280c8e9271378adceaf08b41b4a35d8
data/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.7.6] - 2026-04-14
6
+
7
+ ### Added
8
+ - `DaemonUnavailableError < LLMError` error class for fleet fail-closed semantics
9
+ - Metering wired as explicit pipeline executor step (`:metering` in STEPS)
10
+ - Router `exclude:` parameter — provider-keyed hash for anti-bias model exclusion (step 4.6 in `select_candidates`)
11
+ - `thinking:` forwarded to RubyLLM provider via `ruby_llm_chat_options`
12
+
13
+ ### Fixed
14
+ - Tool injection suppression: `tools: []` (explicit empty array) now skips `inject_registry_tools` — prevents 60+ MCP tools from being injected into fleet LLM calls
15
+ - RBAC fail-closed for fleet: callers with `agent.id` starting with `fleet:` are blocked when RBAC is unavailable (scoped, does not affect non-fleet callers)
16
+ - `exclude:` normalized defensively — `nil` or non-Hash values treated as empty
17
+
5
18
  ## [0.7.5] - 2026-04-14
6
19
 
7
20
  ### Added
@@ -41,5 +41,7 @@ module Legion
41
41
  end
42
42
 
43
43
  class TokenBudgetExceeded < LLMError; end
44
+
45
+ class DaemonUnavailableError < LLMError; end
44
46
  end
45
47
  end
@@ -28,11 +28,12 @@ module Legion
28
28
  include Steps::TokenBudget
29
29
  include Steps::PromptCache
30
30
  include Steps::Debate
31
+ include Steps::Metering
31
32
 
32
33
  STEPS = %i[
33
34
  tracing_init idempotency conversation_uuid context_load
34
35
  rbac classification billing gaia_advisory tier_assignment rag_context trigger_match skill_injector tool_discovery
35
- routing request_normalization token_budget provider_call response_normalization
36
+ routing request_normalization token_budget provider_call response_normalization metering
36
37
  debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
37
38
  ].freeze
38
39
 
@@ -43,7 +44,7 @@ module Legion
43
44
  ].freeze
44
45
 
45
46
  POST_PROVIDER_STEPS = %i[
46
- response_normalization debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
47
+ response_normalization metering debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
47
48
  ].freeze
48
49
 
49
50
  ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
@@ -724,10 +725,12 @@ module Legion
724
725
  end
725
726
 
726
727
  def ruby_llm_chat_options
727
- {
728
+ opts = {
728
729
  model: @resolved_model,
729
730
  provider: @resolved_provider
730
- }.compact
731
+ }
732
+ opts[:thinking] = @request.thinking if @request.thinking
733
+ opts.compact
731
734
  end
732
735
 
733
736
  def inject_ruby_llm_tools(session)
@@ -735,6 +738,10 @@ module Legion
735
738
  session.with_tool(tool)
736
739
  end
737
740
 
741
+ # nil means caller did not specify tools — inject registry tools as normal.
742
+ # An explicit empty array [] means caller opted out of registry injection.
743
+ return if @request.tools.is_a?(Array) && @request.tools.empty?
744
+
738
745
  inject_registry_tools(session)
739
746
  end
740
747
 
@@ -923,6 +930,29 @@ module Legion
923
930
  @enrichments = normalized
924
931
  end
925
932
 
933
+ def step_metering
934
+ input_tokens = @raw_response.respond_to?(:input_tokens) ? @raw_response.input_tokens.to_i : 0
935
+ output_tokens = @raw_response.respond_to?(:output_tokens) ? @raw_response.output_tokens.to_i : 0
936
+ tier = @audit.dig(:'routing:provider_selection', :data, :tier)
937
+ latency_ms = if @timestamps[:provider_start] && @timestamps[:provider_end]
938
+ ((@timestamps[:provider_end] - @timestamps[:provider_start]) * 1000).round
939
+ else
940
+ 0
941
+ end
942
+ event = Steps::Metering.build_event(
943
+ provider: @resolved_provider,
944
+ model_id: @resolved_model,
945
+ tier: tier,
946
+ input_tokens: input_tokens,
947
+ output_tokens: output_tokens,
948
+ latency_ms: latency_ms
949
+ )
950
+ Steps::Metering.publish_or_spool(event)
951
+ rescue StandardError => e
952
+ @warnings << "metering error: #{e.message}"
953
+ handle_exception(e, level: :warn, operation: 'llm.pipeline.step_metering')
954
+ end
955
+
926
956
  def step_context_store
927
957
  conv_id = @request.conversation_id
928
958
  return unless conv_id
@@ -21,7 +21,7 @@ module Legion
21
21
  schema_version: kwargs.fetch(:schema_version, '1.0.0'),
22
22
  system: kwargs[:system],
23
23
  messages: kwargs.fetch(:messages, []),
24
- tools: kwargs.fetch(:tools, []),
24
+ tools: kwargs.key?(:tools) ? kwargs[:tools] : nil,
25
25
  tool_choice: kwargs.fetch(:tool_choice, { mode: :auto }),
26
26
  routing: kwargs.fetch(:routing, { provider: nil, model: nil }),
27
27
  tokens: kwargs.fetch(:tokens, { max: 4096 }),
@@ -79,7 +79,7 @@ module Legion
79
79
  messages: messages,
80
80
  system: kwargs[:system],
81
81
  routing: routing,
82
- tools: kwargs.fetch(:tools, []),
82
+ tools: kwargs.key?(:tools) ? kwargs[:tools] : nil,
83
83
  tool_choice: kwargs[:tool_choice] || { mode: :auto },
84
84
  stream: kwargs.fetch(:stream, false),
85
85
  generation: kwargs[:generation] || {},
@@ -13,6 +13,14 @@ module Legion
13
13
  start_time = Time.now
14
14
 
15
15
  unless defined?(::Legion::Rbac)
16
+ if fleet_caller?
17
+ msg = 'RBAC unavailable: fleet callers require RBAC enforcement (fail-closed)'
18
+ log.error("[llm][rbac] fleet_blocked request_id=#{@request.id} reason=rbac_unavailable")
19
+ record_rbac_audit(:failure, msg, start_time)
20
+ record_rbac_timeline("denied: #{msg}")
21
+ raise Legion::LLM::PipelineError.new("403 Forbidden: #{msg}", step: :rbac)
22
+ end
23
+
16
24
  @warnings << 'RBAC unavailable, permitting request without enforcement'
17
25
  log.info("[llm][rbac] unavailable request_id=#{@request.id} action=permit_without_enforcement")
18
26
  record_rbac_audit(:success, 'permitted (rbac unavailable)', start_time)
@@ -20,26 +28,28 @@ module Legion
20
28
  return
21
29
  end
22
30
 
23
- principal = build_rbac_principal
24
- caller_id = extract_rbac_caller_id
25
- log.info("[llm][rbac] authorize request_id=#{@request.id} caller=#{caller_id}")
26
- ::Legion::Rbac.authorize!(principal: principal, action: :use, resource: 'llm/pipeline')
31
+ begin
32
+ principal = build_rbac_principal
33
+ caller_id = extract_rbac_caller_id
34
+ log.info("[llm][rbac] authorize request_id=#{@request.id} caller=#{caller_id}")
35
+ ::Legion::Rbac.authorize!(principal: principal, action: :use, resource: 'llm/pipeline')
27
36
 
28
- log.info("[llm][rbac] permitted request_id=#{@request.id} caller=#{caller_id}")
29
- record_rbac_audit(:success, "permitted caller=#{caller_id}", start_time)
30
- record_rbac_timeline("permitted caller=#{caller_id}")
31
- rescue ::Legion::Rbac::AccessDenied => e
32
- log.warn("[llm][rbac] denied request_id=#{@request.id} error=#{e.message}")
33
- record_rbac_audit(:failure, e.message, start_time)
34
- record_rbac_timeline("denied: #{e.message}")
35
- handle_exception(e, level: :warn, operation: 'llm.pipeline.steps.rbac.denied', request_id: @request.id)
36
- raise Legion::LLM::PipelineError.new("403 Forbidden: #{e.message}", step: :rbac)
37
- rescue StandardError => e
38
- log.error("[llm][rbac] failed request_id=#{@request.id} error=#{e.message}")
39
- record_rbac_audit(:failure, "error: #{e.message}", start_time)
40
- record_rbac_timeline("error: #{e.message}")
41
- handle_exception(e, level: :error, operation: 'llm.pipeline.steps.rbac', request_id: @request.id)
42
- raise Legion::LLM::PipelineError.new("rbac error: #{e.message}", step: :rbac)
37
+ log.info("[llm][rbac] permitted request_id=#{@request.id} caller=#{caller_id}")
38
+ record_rbac_audit(:success, "permitted caller=#{caller_id}", start_time)
39
+ record_rbac_timeline("permitted caller=#{caller_id}")
40
+ rescue ::Legion::Rbac::AccessDenied => e
41
+ log.warn("[llm][rbac] denied request_id=#{@request.id} error=#{e.message}")
42
+ record_rbac_audit(:failure, e.message, start_time)
43
+ record_rbac_timeline("denied: #{e.message}")
44
+ handle_exception(e, level: :warn, operation: 'llm.pipeline.steps.rbac.denied', request_id: @request.id)
45
+ raise Legion::LLM::PipelineError.new("403 Forbidden: #{e.message}", step: :rbac)
46
+ rescue StandardError => e
47
+ log.error("[llm][rbac] failed request_id=#{@request.id} error=#{e.message}")
48
+ record_rbac_audit(:failure, "error: #{e.message}", start_time)
49
+ record_rbac_timeline("error: #{e.message}")
50
+ handle_exception(e, level: :error, operation: 'llm.pipeline.steps.rbac', request_id: @request.id)
51
+ raise Legion::LLM::PipelineError.new("rbac error: #{e.message}", step: :rbac)
52
+ end
43
53
  end
44
54
 
45
55
  private
@@ -54,6 +64,15 @@ module Legion
54
64
  )
55
65
  end
56
66
 
67
+ def fleet_caller?
68
+ agent_ids = [
69
+ @request.agent&.dig(:id),
70
+ @request.caller&.dig(:agent, :id)
71
+ ]
72
+
73
+ agent_ids.any? { |agent_id| agent_id.is_a?(String) && agent_id.start_with?('fleet:') }
74
+ end
75
+
57
76
  def extract_rbac_caller_id
58
77
  @request.caller&.dig(:requested_by, :id) ||
59
78
  @request.caller&.dig(:requested_by, :identity) ||
@@ -10,8 +10,8 @@ module Legion
10
10
  # When provider/model are passed explicitly, they take precedence over routing.
11
11
  def dispatch(message, # rubocop:disable Metrics/ParameterLists
12
12
  intent: nil,
13
- exclude: {}, # rubocop:disable Lint/UnusedMethodArgument -- forwarded to Router.resolve in WS-00E
14
13
  tier: nil,
14
+ exclude: {},
15
15
  provider: nil,
16
16
  model: nil,
17
17
  schema: nil,
@@ -30,8 +30,8 @@ module Legion
30
30
  resolved_provider = provider
31
31
  resolved_model = model
32
32
 
33
- if resolved_provider.nil? && resolved_model.nil? && defined?(Router) && Router.routing_enabled?
34
- resolution = Router.resolve(intent: intent, tier: tier)
33
+ if resolved_provider.nil? && resolved_model.nil? && defined?(Router) && Router.routing_enabled? && (intent || tier)
34
+ resolution = Router.resolve(intent: intent, tier: tier, exclude: exclude)
35
35
  resolved_provider = resolution&.provider
36
36
  resolved_model = resolution&.model
37
37
  end
@@ -22,14 +22,14 @@ module Legion
22
22
  # @param model [String, nil] explicit model override
23
23
  # @param provider [Symbol, nil] explicit provider override
24
24
  # @return [Resolution, nil]
25
- def resolve(intent: nil, tier: nil, model: nil, provider: nil)
25
+ def resolve(intent: nil, tier: nil, model: nil, provider: nil, exclude: {})
26
26
  return explicit_resolution(tier, provider, model) if tier
27
27
 
28
28
  return nil unless routing_enabled? && intent
29
29
 
30
30
  merged = merge_defaults(intent)
31
31
  rules = load_rules
32
- candidates = select_candidates(rules, merged)
32
+ candidates = select_candidates(rules, merged, exclude: exclude)
33
33
  best = pick_best(candidates)
34
34
  resolution = best&.to_resolution
35
35
 
@@ -42,12 +42,12 @@ module Legion
42
42
  resolution || arbitrage_fallback(intent)
43
43
  end
44
44
 
45
- def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil)
45
+ def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {})
46
46
  max = max_escalations || escalation_max_attempts
47
47
  return chain_from_defaults(model, provider, max) unless routing_enabled? && (intent || tier)
48
48
  return EscalationChain.new(resolutions: [explicit_resolution(tier, provider, model)], max_attempts: max) if tier
49
49
 
50
- chain_from_intent(intent, max)
50
+ chain_from_intent(intent, max, exclude: exclude)
51
51
  end
52
52
 
53
53
  def health_tracker
@@ -131,7 +131,7 @@ module Legion
131
131
  raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) }
132
132
  end
133
133
 
134
- def select_candidates(rules, intent)
134
+ def select_candidates(rules, intent, exclude: {})
135
135
  log.debug("Router: selecting candidates from #{rules.size} rules")
136
136
 
137
137
  # 1. Collect constraints from constraint rules that match the intent
@@ -151,8 +151,12 @@ module Legion
151
151
  # 4.5 Reject Ollama rules where model is not pulled or doesn't fit
152
152
  discovered = unconstrained.reject { |r| excluded_by_discovery?(r) }
153
153
 
154
+ # 4.6 Reject rules matching caller-provided exclude list
155
+ normalized_exclude = exclude.is_a?(Hash) ? exclude : {}
156
+ not_excluded = normalized_exclude.empty? ? discovered : discovered.reject { |r| excluded_by_caller?(r, normalized_exclude) }
157
+
154
158
  # 5. Filter by tier availability
155
- final = discovered.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
159
+ final = not_excluded.select { |r| tier_available?(r.target[:tier] || r.target['tier']) }
156
160
 
157
161
  log.debug("Router: #{final.size} candidates after filtering (started with #{rules.size})")
158
162
 
@@ -204,6 +208,21 @@ module Legion
204
208
  {}
205
209
  end
206
210
 
211
+ def excluded_by_caller?(rule, exclude)
212
+ return false if exclude.nil? || exclude.empty?
213
+
214
+ target = rule.target || {}
215
+ provider = (target[:provider] || target['provider'])&.to_sym
216
+ model = target[:model] || target['model']
217
+ tier = (target[:tier] || target['tier'])&.to_sym
218
+
219
+ return true if exclude[:provider] && provider == exclude[:provider].to_sym
220
+ return true if exclude[:model] && model == exclude[:model]
221
+ return true if exclude[:tier] && tier == exclude[:tier].to_sym
222
+
223
+ false
224
+ end
225
+
207
226
  def privacy_mode?
208
227
  if Legion.const_defined?('Settings', false) && Legion::Settings.respond_to?(:enterprise_privacy?)
209
228
  Legion::Settings.enterprise_privacy?
@@ -272,10 +291,10 @@ module Legion
272
291
  EscalationChain.new(resolutions: [res], max_attempts: max)
273
292
  end
274
293
 
275
- def chain_from_intent(intent, max)
294
+ def chain_from_intent(intent, max, exclude: {})
276
295
  merged = intent ? merge_defaults(intent) : {}
277
296
  rules = load_rules
278
- candidates = select_candidates(rules, merged)
297
+ candidates = select_candidates(rules, merged, exclude: exclude)
279
298
  sorted = candidates.sort_by { |r| -effective_priority(r) }
280
299
  resolutions = sorted.map(&:to_resolution)
281
300
  resolutions = build_fallback_chain(sorted.first, sorted, resolutions) if sorted.first&.fallback
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.7.5'
5
+ VERSION = '0.7.6'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity