legion-llm 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61c5173e4490643fbecb7977697159ffdeb082b63ec4140289128c69efd14806
4
- data.tar.gz: c42e9f24e2ecc387c1076fe32f05b4636e17e9fe92b16bf7ed438198caaa3187
3
+ metadata.gz: f6dc45bc6e985a3a6399ba3ed860bfb1ac9d3d9a0f31dda55a2f812d3c46e7cb
4
+ data.tar.gz: c3db21154b0b43de08e3e23b24416d9a7dc26a58eb10beb19835845b6ad83500
5
5
  SHA512:
6
- metadata.gz: b05c1c69d88184ef4ceea383523c0b6538fd363a1cee9d12bf849ab2885bfdfe7fd59170fe059458c89378f66b93851ec69803ae03af6df1a3bbb55ab1aa432c
7
- data.tar.gz: 57eab8217bdbc614a8602b45836f4b38bf099d9fc2e6b85cfbd92813856b837387e0901ac06af43085cfe0cd034146d160b05cd358afeb45c03ac628280ce9d6
6
+ metadata.gz: 6bd0700aee69aab3d7dad4e3266855d6ddf28de1574a9b1e48e972b653f4af509720e53b2d8c34e84ac9599a325b539c5fc6c7ac765e6c62a846a40e2b6b9519
7
+ data.tar.gz: c2ffe0842728637165668508a68a690eb0a00596710108b4685f47e4fa8b78f24e634ec652e11d7f86ace856f0166299c6827e7bb7a4f1e9ed6e491ed97ca559
data/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.3.13] - 2026-03-21
4
+
5
+ ### Added
6
+ - `Legion::LLM::Hooks::RagGuard` module with `check_rag_faithfulness` for post-generation RAG faithfulness evaluation via lex-eval
7
+ - `Legion::LLM::Hooks::ResponseGuard` module with `guard_response` as the central dispatch point for post-generation safety checks
8
+ - Response guard wired into `_dispatch_chat`: fires when `Legion::Settings[:llm][:response_guards][:enabled]` is true, attaches `_guard_result` metadata to the response hash without blocking
9
+ - RAG guard skips gracefully when lex-eval is unavailable (returns `reason: :eval_unavailable`) or context is not provided (returns `reason: :no_context`)
10
+ - Settings keys: `llm.rag_guard.enabled`, `llm.rag_guard.threshold` (default 0.7), `llm.rag_guard.evaluators` (default `[:faithfulness, :rag_relevancy]`)
11
+ - 19 new specs in `spec/legion/llm/hooks/rag_guard_spec.rb` and `spec/legion/llm/hooks/response_guard_spec.rb`
12
+
13
+ ## [0.3.12] - 2026-03-19
14
+
15
+ ### Added
16
+ - `Legion::LLM::Cache` module with deterministic SHA256 key generation, guarded `get`/`set`, and `enabled?` check
17
+ - Application-level response caching in `chat_direct` via `legion-cache` (Legion::Cache guard required)
18
+ - Cache skip conditions: `cache: false` option, `temperature > 0`, nil message, or cache disabled
19
+ - Cache hits return `{ cached: true }` merged into response metadata
20
+ - Anthropic prompt caching support: injects `cache_control: { type: "ephemeral" }` into system messages longer than `min_tokens` when provider is anthropic
21
+ - `prompt_caching` settings section with `enabled`, `min_tokens`, `response_cache.enabled`, `response_cache.ttl_seconds` defaults
22
+ - 25 new specs in `spec/legion/llm/cache_spec.rb` covering key determinism, hit/miss flows, skip conditions, and Legion::Cache unavailability guard
23
+
3
24
  ## [0.3.11] - 2026-03-20
4
25
 
5
26
  ### Added
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Legion
6
+ module LLM
7
+ module Cache
8
+ DEFAULT_TTL = 300
9
+
10
+ module_function
11
+
12
+ # Generates a deterministic SHA256 cache key from request parameters.
13
+ def key(model:, provider:, messages:, temperature: nil, tools: nil, schema: nil)
14
+ payload = ::JSON.dump({
15
+ model: model.to_s,
16
+ provider: provider.to_s,
17
+ messages: messages,
18
+ temperature: temperature,
19
+ tools: tools,
20
+ schema: schema
21
+ })
22
+ Digest::SHA256.hexdigest(payload)
23
+ end
24
+
25
+ # Returns the cached response hash, or nil on miss / cache unavailable.
26
+ def get(cache_key)
27
+ return nil unless available?
28
+
29
+ raw = Legion::Cache.get(cache_key)
30
+ return nil if raw.nil?
31
+
32
+ ::JSON.parse(raw, symbolize_names: true)
33
+ rescue StandardError
34
+ nil
35
+ end
36
+
37
+ # Stores a response in the cache with the given TTL.
38
+ def set(cache_key, response, ttl: DEFAULT_TTL)
39
+ return false unless available?
40
+
41
+ Legion::Cache.set(cache_key, ::JSON.dump(response), ttl)
42
+ true
43
+ rescue StandardError
44
+ false
45
+ end
46
+
47
+ # Returns true if response caching is enabled in settings and Legion::Cache is loaded.
48
+ def enabled?
49
+ return false unless available?
50
+
51
+ settings = llm_settings
52
+ settings.dig(:prompt_caching, :response_cache, :enabled) != false
53
+ end
54
+
55
+ private_class_method def self.available?
56
+ defined?(Legion::Cache) && Legion::Cache.respond_to?(:get)
57
+ end
58
+
59
+ private_class_method def self.llm_settings
60
+ if Legion.const_defined?('Settings')
61
+ Legion::Settings[:llm]
62
+ else
63
+ Legion::LLM::Settings.default
64
+ end
65
+ rescue StandardError
66
+ {}
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Hooks
6
+ module RagGuard
7
+ class << self
8
+ def check_rag_faithfulness(response:, context:, threshold: nil, evaluators: nil, **)
9
+ return { faithful: true, reason: :eval_unavailable } unless eval_available?
10
+
11
+ resolved_threshold = threshold || settings_threshold
12
+ resolved_evaluators = evaluators || settings_evaluators
13
+
14
+ scores = {}
15
+ flagged = []
16
+
17
+ resolved_evaluators.each do |evaluator_name|
18
+ score = run_evaluator(evaluator_name, response: response, context: context)
19
+ scores[evaluator_name] = score
20
+ flagged << evaluator_name if score < resolved_threshold
21
+ end
22
+
23
+ faithful = flagged.empty?
24
+ details = build_details(scores, resolved_threshold, faithful)
25
+
26
+ { faithful: faithful, scores: scores, flagged_evaluators: flagged, details: details }
27
+ rescue StandardError => e
28
+ Legion::Logging.warn "RagGuard evaluation error: #{e.message}" if logging_available?
29
+ { faithful: true, reason: :eval_error }
30
+ end
31
+
32
+ private
33
+
34
+ def eval_available?
35
+ defined?(Legion::Extensions::Eval::Client)
36
+ end
37
+
38
+ def logging_available?
39
+ Legion.const_defined?('Logging')
40
+ end
41
+
42
+ def settings_threshold
43
+ val = Legion::Settings.dig(:llm, :rag_guard, :threshold) if Legion.const_defined?('Settings')
44
+ val || 0.7
45
+ end
46
+
47
+ def settings_evaluators
48
+ val = Legion::Settings.dig(:llm, :rag_guard, :evaluators) if Legion.const_defined?('Settings')
49
+ val || %i[faithfulness rag_relevancy]
50
+ end
51
+
52
+ def run_evaluator(evaluator_name, response:, context:)
53
+ client = Legion::Extensions::Eval::Client.new
54
+ result = client.run_evaluation(
55
+ evaluator_name: evaluator_name,
56
+ inputs: [{ input: context.to_s, output: response.to_s, expected: nil }]
57
+ )
58
+ result.dig(:summary, :avg_score) || 0.0
59
+ rescue StandardError
60
+ 0.0
61
+ end
62
+
63
+ def build_details(scores, threshold, faithful)
64
+ score_parts = scores.map { |k, v| "#{k}=#{v.round(3)}" }.join(', ')
65
+ status = faithful ? 'passed' : 'failed'
66
+ "RAG faithfulness check #{status} (threshold=#{threshold}): #{score_parts}"
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Hooks
6
+ module ResponseGuard
7
+ GUARD_REGISTRY = {
8
+ rag: RagGuard
9
+ }.freeze
10
+
11
+ class << self
12
+ def guard_response(response:, context: nil, guards: [:rag], **)
13
+ guard_results = {}
14
+
15
+ guards.each do |guard_name|
16
+ guard_mod = GUARD_REGISTRY[guard_name.to_sym]
17
+ next unless guard_mod
18
+
19
+ guard_results[guard_name] = dispatch_guard(guard_mod, guard_name,
20
+ response: response, context: context)
21
+ end
22
+
23
+ passed = guard_results.values.all? { |r| r[:faithful] != false }
24
+
25
+ { passed: passed, guards: guard_results }
26
+ rescue StandardError => e
27
+ Legion::Logging.warn "ResponseGuard error: #{e.message}" if Legion.const_defined?('Logging')
28
+ { passed: true, guards: {} }
29
+ end
30
+
31
+ private
32
+
33
+ def dispatch_guard(guard_mod, guard_name, response:, context:)
34
+ case guard_name.to_sym
35
+ when :rag
36
+ return { faithful: true, reason: :no_context } if context.nil?
37
+
38
+ guard_mod.check_rag_faithfulness(response: response, context: context)
39
+ else
40
+ guard_mod.check(response: response, context: context)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/llm/hooks/rag_guard'
4
+ require 'legion/llm/hooks/response_guard'
5
+
3
6
  module Legion
4
7
  module LLM
5
8
  module Hooks
@@ -14,7 +14,8 @@ module Legion
14
14
  routing: routing_defaults,
15
15
  discovery: discovery_defaults,
16
16
  gateway: gateway_defaults,
17
- daemon: daemon_defaults
17
+ daemon: daemon_defaults,
18
+ prompt_caching: prompt_caching_defaults
18
19
  }
19
20
  end
20
21
 
@@ -25,6 +26,17 @@ module Legion
25
26
  }
26
27
  end
27
28
 
29
+ def self.prompt_caching_defaults
30
+ {
31
+ enabled: true,
32
+ min_tokens: 1024,
33
+ response_cache: {
34
+ enabled: true,
35
+ ttl_seconds: 300
36
+ }
37
+ }
38
+ end
39
+
28
40
  def self.discovery_defaults
29
41
  {
30
42
  enabled: true,
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.3.11'
5
+ VERSION = '0.3.13'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -9,6 +9,7 @@ require 'legion/llm/compressor'
9
9
  require 'legion/llm/quality_checker'
10
10
  require 'legion/llm/escalation_history'
11
11
  require 'legion/llm/hooks'
12
+ require 'legion/llm/cache'
12
13
  require_relative 'llm/response_cache'
13
14
  require_relative 'llm/daemon_client'
14
15
 
@@ -95,18 +96,40 @@ module Legion
95
96
 
96
97
  # Direct chat bypassing gateway — used by gateway runners to avoid recursion
97
98
  def chat_direct(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
98
- max_escalations: nil, quality_check: nil, message: nil, **)
99
+ max_escalations: nil, quality_check: nil, message: nil, **kwargs)
100
+ cache_opt = kwargs.delete(:cache) { true }
101
+ temperature = kwargs.delete(:temperature)
102
+
99
103
  escalate = escalation_enabled? if escalate.nil?
104
+ cache_key = build_cache_key(model, provider, message, temperature) if cacheable?(cache_opt, temperature, message)
105
+
106
+ if cache_key
107
+ cached = Cache.get(cache_key)
108
+ if cached
109
+ Legion::Logging.debug 'Legion::LLM cache hit'
110
+ cached_response = cached.dup
111
+ cached_response[:meta] = (cached_response[:meta] || {}).merge(cached: true)
112
+ return cached_response
113
+ end
114
+ end
100
115
 
101
- if escalate && message
102
- chat_with_escalation(
103
- model: model, provider: provider, intent: intent, tier: tier,
104
- max_escalations: max_escalations, quality_check: quality_check,
105
- message: message, **
106
- )
107
- else
108
- chat_single(model: model, provider: provider, intent: intent, tier: tier, **)
116
+ result = if escalate && message
117
+ chat_with_escalation(
118
+ model: model, provider: provider, intent: intent, tier: tier,
119
+ max_escalations: max_escalations, quality_check: quality_check,
120
+ message: message, temperature: temperature, **kwargs
121
+ )
122
+ else
123
+ chat_single(model: model, provider: provider, intent: intent, tier: tier,
124
+ temperature: temperature, **kwargs)
125
+ end
126
+
127
+ if cache_key && result.is_a?(Hash)
128
+ ttl = settings.dig(:prompt_caching, :response_cache, :ttl_seconds) || Cache::DEFAULT_TTL
129
+ Cache.set(cache_key, result, ttl: ttl)
109
130
  end
131
+
132
+ result
110
133
  end
111
134
 
112
135
  # Generate embeddings — delegates to gateway when available
@@ -161,7 +184,7 @@ module Legion
161
184
 
162
185
  private
163
186
 
164
- def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **)
187
+ def _dispatch_chat(model:, provider:, intent:, tier:, escalate:, max_escalations:, quality_check:, message:, **kwargs)
165
188
  messages = message.is_a?(Array) ? message : [{ role: 'user', content: message.to_s }]
166
189
  resolved_model = model || settings[:default_model]
167
190
 
@@ -173,11 +196,11 @@ module Legion
173
196
  result = if gateway_loaded? && message
174
197
  gateway_chat(model: model, provider: provider, intent: intent,
175
198
  tier: tier, message: message, escalate: escalate,
176
- max_escalations: max_escalations, quality_check: quality_check, **)
199
+ max_escalations: max_escalations, quality_check: quality_check, **kwargs)
177
200
  else
178
201
  chat_direct(model: model, provider: provider, intent: intent, tier: tier,
179
202
  escalate: escalate, max_escalations: max_escalations,
180
- quality_check: quality_check, message: message, **)
203
+ quality_check: quality_check, message: message, **kwargs)
181
204
  end
182
205
 
183
206
  if defined?(Legion::LLM::Hooks)
@@ -185,6 +208,8 @@ module Legion
185
208
  return blocked[:response] if blocked
186
209
  end
187
210
 
211
+ result = apply_response_guards(result, kwargs) if response_guards_enabled? && result.is_a?(Hash)
212
+
188
213
  result
189
214
  end
190
215
 
@@ -268,6 +293,9 @@ module Legion
268
293
  opts[:model] = model if model
269
294
  opts[:provider] = provider if provider
270
295
  opts.merge!(kwargs)
296
+ opts.delete(:temperature) if opts[:temperature].nil?
297
+
298
+ inject_anthropic_cache_control!(opts, provider)
271
299
 
272
300
  RubyLLM.chat(**opts)
273
301
  end
@@ -344,6 +372,55 @@ module Legion
344
372
  nil
345
373
  end
346
374
 
375
+ def response_guards_enabled?
376
+ settings.dig(:response_guards, :enabled) == true
377
+ end
378
+
379
+ def apply_response_guards(result, kwargs)
380
+ context = kwargs[:context]
381
+ response_text = result[:response] || result[:content]
382
+ guard_result = Hooks::ResponseGuard.guard_response(
383
+ response: response_text, context: context
384
+ )
385
+
386
+ Legion::Logging.warn "Response guard failed: #{guard_result.inspect}" if !guard_result[:passed] && Legion.const_defined?('Logging')
387
+
388
+ result.merge(_guard_result: guard_result)
389
+ rescue StandardError
390
+ result
391
+ end
392
+
393
+ def cacheable?(cache_opt, temperature, message)
394
+ cache_opt != false && temperature.to_f.zero? && message && Cache.enabled?
395
+ end
396
+
397
+ def build_cache_key(model, provider, message, temperature)
398
+ messages_arr = message.is_a?(Array) ? message : [{ role: 'user', content: message.to_s }]
399
+ Cache.key(
400
+ model: model || settings[:default_model],
401
+ provider: provider || settings[:default_provider],
402
+ messages: messages_arr,
403
+ temperature: temperature
404
+ )
405
+ end
406
+
407
+ def inject_anthropic_cache_control!(opts, provider)
408
+ resolved_provider = (provider || settings[:default_provider])&.to_sym
409
+ return unless resolved_provider == :anthropic
410
+
411
+ caching_settings = settings[:prompt_caching] || {}
412
+ return unless caching_settings[:enabled] != false
413
+
414
+ min_tokens = caching_settings[:min_tokens] || 1024
415
+ instructions = opts[:instructions]
416
+ return unless instructions.is_a?(String) && instructions.length > min_tokens
417
+
418
+ opts[:instructions] = {
419
+ content: instructions,
420
+ cache_control: { type: 'ephemeral' }
421
+ }
422
+ end
423
+
347
424
  def escalation_enabled?
348
425
  routing = settings[:routing]
349
426
  return false unless routing.is_a?(Hash)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -131,6 +131,7 @@ files:
131
131
  - legion-llm.gemspec
132
132
  - lib/legion/llm.rb
133
133
  - lib/legion/llm/bedrock_bearer_auth.rb
134
+ - lib/legion/llm/cache.rb
134
135
  - lib/legion/llm/claude_config_loader.rb
135
136
  - lib/legion/llm/compressor.rb
136
137
  - lib/legion/llm/daemon_client.rb
@@ -140,6 +141,8 @@ files:
140
141
  - lib/legion/llm/escalation_history.rb
141
142
  - lib/legion/llm/helpers/llm.rb
142
143
  - lib/legion/llm/hooks.rb
144
+ - lib/legion/llm/hooks/rag_guard.rb
145
+ - lib/legion/llm/hooks/response_guard.rb
143
146
  - lib/legion/llm/providers.rb
144
147
  - lib/legion/llm/quality_checker.rb
145
148
  - lib/legion/llm/response_cache.rb