legion-llm 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61c5173e4490643fbecb7977697159ffdeb082b63ec4140289128c69efd14806
4
- data.tar.gz: c42e9f24e2ecc387c1076fe32f05b4636e17e9fe92b16bf7ed438198caaa3187
3
+ metadata.gz: 0551af82013a885240cd8d38ba1f991d470110d308925a8f4848b8650376d252
4
+ data.tar.gz: fc2da425ddafa426f89375dbffd9afccc2c5d318207ed2bff0513dc57cf7dc07
5
5
  SHA512:
6
- metadata.gz: b05c1c69d88184ef4ceea383523c0b6538fd363a1cee9d12bf849ab2885bfdfe7fd59170fe059458c89378f66b93851ec69803ae03af6df1a3bbb55ab1aa432c
7
- data.tar.gz: 57eab8217bdbc614a8602b45836f4b38bf099d9fc2e6b85cfbd92813856b837387e0901ac06af43085cfe0cd034146d160b05cd358afeb45c03ac628280ce9d6
6
+ metadata.gz: 3ebfd45a16cd899050c44c0e53b0ae9952c8c87f46381b0af4356cda6d03ebff05084c3d66923ffd9f3012674b41005e080fa0350cdb4a2f799cbaa83e1cd4dc
7
+ data.tar.gz: bfb977400e5c78caa90012af604ec47c7b4be94e1d268cfceb3b240b1e9731f678b4a0f6dc30de4df6e014e4714701b15f554e9a5858b3a94754aedaaa67da84
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.3.12] - 2026-03-19
4
+
5
+ ### Added
6
+ - `Legion::LLM::Cache` module with deterministic SHA256 key generation, guarded `get`/`set`, and `enabled?` check
7
+ - Application-level response caching in `chat_direct` via `legion-cache` (Legion::Cache guard required)
8
+ - Cache skip conditions: `cache: false` option, `temperature > 0`, nil message, or cache disabled
9
+ - Cache hits return `{ cached: true }` merged into response metadata
10
+ - Anthropic prompt caching support: injects `cache_control: { type: "ephemeral" }` into system messages longer than `min_tokens` when provider is anthropic
11
+ - `prompt_caching` settings section with `enabled`, `min_tokens`, `response_cache.enabled`, `response_cache.ttl_seconds` defaults
12
+ - 25 new specs in `spec/legion/llm/cache_spec.rb` covering key determinism, hit/miss flows, skip conditions, and Legion::Cache unavailability guard
13
+
3
14
  ## [0.3.11] - 2026-03-20
4
15
 
5
16
  ### Added
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Legion
6
+ module LLM
7
+ module Cache
8
+ DEFAULT_TTL = 300
9
+
10
+ module_function
11
+
12
+ # Generates a deterministic SHA256 cache key from request parameters.
13
+ def key(model:, provider:, messages:, temperature: nil, tools: nil, schema: nil)
14
+ payload = ::JSON.dump({
15
+ model: model.to_s,
16
+ provider: provider.to_s,
17
+ messages: messages,
18
+ temperature: temperature,
19
+ tools: tools,
20
+ schema: schema
21
+ })
22
+ Digest::SHA256.hexdigest(payload)
23
+ end
24
+
25
+ # Returns the cached response hash, or nil on miss / cache unavailable.
26
+ def get(cache_key)
27
+ return nil unless available?
28
+
29
+ raw = Legion::Cache.get(cache_key)
30
+ return nil if raw.nil?
31
+
32
+ ::JSON.parse(raw, symbolize_names: true)
33
+ rescue StandardError
34
+ nil
35
+ end
36
+
37
+ # Stores a response in the cache with the given TTL.
38
+ def set(cache_key, response, ttl: DEFAULT_TTL)
39
+ return false unless available?
40
+
41
+ Legion::Cache.set(cache_key, ::JSON.dump(response), ttl)
42
+ true
43
+ rescue StandardError
44
+ false
45
+ end
46
+
47
+ # Returns true if response caching is enabled in settings and Legion::Cache is loaded.
48
+ def enabled?
49
+ return false unless available?
50
+
51
+ settings = llm_settings
52
+ settings.dig(:prompt_caching, :response_cache, :enabled) != false
53
+ end
54
+
55
+ private_class_method def self.available?
56
+ defined?(Legion::Cache) && Legion::Cache.respond_to?(:get)
57
+ end
58
+
59
+ private_class_method def self.llm_settings
60
+ if Legion.const_defined?('Settings')
61
+ Legion::Settings[:llm]
62
+ else
63
+ Legion::LLM::Settings.default
64
+ end
65
+ rescue StandardError
66
+ {}
67
+ end
68
+ end
69
+ end
70
+ end
@@ -14,7 +14,8 @@ module Legion
14
14
  routing: routing_defaults,
15
15
  discovery: discovery_defaults,
16
16
  gateway: gateway_defaults,
17
- daemon: daemon_defaults
17
+ daemon: daemon_defaults,
18
+ prompt_caching: prompt_caching_defaults
18
19
  }
19
20
  end
20
21
 
@@ -25,6 +26,17 @@ module Legion
25
26
  }
26
27
  end
27
28
 
29
+ def self.prompt_caching_defaults
30
+ {
31
+ enabled: true,
32
+ min_tokens: 1024,
33
+ response_cache: {
34
+ enabled: true,
35
+ ttl_seconds: 300
36
+ }
37
+ }
38
+ end
39
+
28
40
  def self.discovery_defaults
29
41
  {
30
42
  enabled: true,
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.3.11'
5
+ VERSION = '0.3.12'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -9,6 +9,7 @@ require 'legion/llm/compressor'
9
9
  require 'legion/llm/quality_checker'
10
10
  require 'legion/llm/escalation_history'
11
11
  require 'legion/llm/hooks'
12
+ require 'legion/llm/cache'
12
13
  require_relative 'llm/response_cache'
13
14
  require_relative 'llm/daemon_client'
14
15
 
@@ -95,18 +96,40 @@ module Legion
95
96
 
96
97
  # Direct chat bypassing gateway — used by gateway runners to avoid recursion
97
98
  def chat_direct(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
98
- max_escalations: nil, quality_check: nil, message: nil, **)
99
+ max_escalations: nil, quality_check: nil, message: nil, **kwargs)
100
+ cache_opt = kwargs.delete(:cache) { true }
101
+ temperature = kwargs.delete(:temperature)
102
+
99
103
  escalate = escalation_enabled? if escalate.nil?
104
+ cache_key = build_cache_key(model, provider, message, temperature) if cacheable?(cache_opt, temperature, message)
105
+
106
+ if cache_key
107
+ cached = Cache.get(cache_key)
108
+ if cached
109
+ Legion::Logging.debug 'Legion::LLM cache hit'
110
+ cached_response = cached.dup
111
+ cached_response[:meta] = (cached_response[:meta] || {}).merge(cached: true)
112
+ return cached_response
113
+ end
114
+ end
100
115
 
101
- if escalate && message
102
- chat_with_escalation(
103
- model: model, provider: provider, intent: intent, tier: tier,
104
- max_escalations: max_escalations, quality_check: quality_check,
105
- message: message, **
106
- )
107
- else
108
- chat_single(model: model, provider: provider, intent: intent, tier: tier, **)
116
+ result = if escalate && message
117
+ chat_with_escalation(
118
+ model: model, provider: provider, intent: intent, tier: tier,
119
+ max_escalations: max_escalations, quality_check: quality_check,
120
+ message: message, temperature: temperature, **kwargs
121
+ )
122
+ else
123
+ chat_single(model: model, provider: provider, intent: intent, tier: tier,
124
+ temperature: temperature, **kwargs)
125
+ end
126
+
127
+ if cache_key && result.is_a?(Hash)
128
+ ttl = settings.dig(:prompt_caching, :response_cache, :ttl_seconds) || Cache::DEFAULT_TTL
129
+ Cache.set(cache_key, result, ttl: ttl)
109
130
  end
131
+
132
+ result
110
133
  end
111
134
 
112
135
  # Generate embeddings — delegates to gateway when available
@@ -268,6 +291,9 @@ module Legion
268
291
  opts[:model] = model if model
269
292
  opts[:provider] = provider if provider
270
293
  opts.merge!(kwargs)
294
+ opts.delete(:temperature) if opts[:temperature].nil?
295
+
296
+ inject_anthropic_cache_control!(opts, provider)
271
297
 
272
298
  RubyLLM.chat(**opts)
273
299
  end
@@ -344,6 +370,37 @@ module Legion
344
370
  nil
345
371
  end
346
372
 
373
+ def cacheable?(cache_opt, temperature, message)
374
+ cache_opt != false && temperature.to_f.zero? && message && Cache.enabled?
375
+ end
376
+
377
+ def build_cache_key(model, provider, message, temperature)
378
+ messages_arr = message.is_a?(Array) ? message : [{ role: 'user', content: message.to_s }]
379
+ Cache.key(
380
+ model: model || settings[:default_model],
381
+ provider: provider || settings[:default_provider],
382
+ messages: messages_arr,
383
+ temperature: temperature
384
+ )
385
+ end
386
+
387
+ def inject_anthropic_cache_control!(opts, provider)
388
+ resolved_provider = (provider || settings[:default_provider])&.to_sym
389
+ return unless resolved_provider == :anthropic
390
+
391
+ caching_settings = settings[:prompt_caching] || {}
392
+ return unless caching_settings[:enabled] != false
393
+
394
+ min_tokens = caching_settings[:min_tokens] || 1024
395
+ instructions = opts[:instructions]
396
+ return unless instructions.is_a?(String) && instructions.length > min_tokens
397
+
398
+ opts[:instructions] = {
399
+ content: instructions,
400
+ cache_control: { type: 'ephemeral' }
401
+ }
402
+ end
403
+
347
404
  def escalation_enabled?
348
405
  routing = settings[:routing]
349
406
  return false unless routing.is_a?(Hash)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -131,6 +131,7 @@ files:
131
131
  - legion-llm.gemspec
132
132
  - lib/legion/llm.rb
133
133
  - lib/legion/llm/bedrock_bearer_auth.rb
134
+ - lib/legion/llm/cache.rb
134
135
  - lib/legion/llm/claude_config_loader.rb
135
136
  - lib/legion/llm/compressor.rb
136
137
  - lib/legion/llm/daemon_client.rb