lex-llm 0.4.15 → 0.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d1285ba64b40e878ef58be2e434d7a9090a38c7e9a6a9a12c85fe756096196b
4
- data.tar.gz: 646974d7c04db898d1f1bbf251dd1c4f74df3d8c7b5b66803358b00e7d2663d4
3
+ metadata.gz: 9bacfc210039561fd7cde72d580809804b78435a4be39c887ae5d51a9ab82d6f
4
+ data.tar.gz: 46b3c44bc28c7137f6a68934779505dc3bd671630192d51f695d32fa9900c302
5
5
  SHA512:
6
- metadata.gz: 85d596886b1809404194d7394d5a727bc39464adfd7e847d5d510323d6b777d8831a1a4a2343ba3f814e15415a648afdb78821eb42401b0353f1803bd3ff446f
7
- data.tar.gz: 5778d0da0bf25dc6acf52553f956dc15eed37efd2de31143006026f3833287a7d872d2640ee260ab4292d16a8c0e04a0b9cde27a1b6f9ff9293d68ecfe17b155
6
+ metadata.gz: 406335de47bd45b026ffbce054cad7e7bf3f36bfb324746646064e3a75ef9d85f94987b759277ff2ecb6145969db64d3e1216c11ccc651a5fafa14bc80308fda
7
+ data.tar.gz: 7b8aa303c770e39cdbff2353fb01868752aecc46fd3ea194b3e07e6207b1da3515972563f154f053d2ff8cb1d541218b1e0dec2d6d3ab458e58571399650240a
data/.rubocop.yml.new ADDED
@@ -0,0 +1,54 @@
1
+ AllCops:
2
+ TargetRubyVersion: 3.4
3
+ NewCops: enable
4
+ SuggestExtensions: false
5
+ Exclude:
6
+ - docs/**/*
7
+ - vendor/**/*
8
+ - gemfiles/**/*
9
+ - lib/generators/**/templates/**/*
10
+ plugins:
11
+ - rubocop-performance
12
+ - rubocop-rake
13
+ - rubocop-rspec
14
+ - rubocop-legion
15
+
16
+ Layout/LineLength:
17
+ Max: 160
18
+ Layout/SpaceAroundEqualsInParameterDefault:
19
+ EnforcedStyle: space
20
+ Layout/HashAlignment:
21
+ EnforcedHashRocketStyle: table
22
+ EnforcedColonStyle: table
23
+ Metrics/MethodLength:
24
+ Max: 60
25
+ Metrics/ClassLength:
26
+ Max: 1500
27
+ Metrics/ModuleLength:
28
+ Max: 1500
29
+ Metrics/BlockLength:
30
+ Max: 40
31
+ Exclude:
32
+ - 'spec/**/*'
33
+ Metrics/AbcSize:
34
+ Max: 85
35
+ Metrics/CyclomaticComplexity:
36
+ Max: 35
37
+ Metrics/PerceivedComplexity:
38
+ Max: 35
39
+ Style/Documentation:
40
+ Enabled: false
41
+ Style/SymbolArray:
42
+ Enabled: true
43
+ Style/FrozenStringLiteralComment:
44
+ Enabled: true
45
+ EnforcedStyle: always
46
+ Naming/FileName:
47
+ Enabled: false
48
+ Naming/PredicateMethod:
49
+ Enabled: false
50
+ Metrics/ParameterLists:
51
+ Max: 9
52
+ Style/RedundantConstantBase:
53
+ Exclude:
54
+ - 'spec/**/*'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.4.16 - 2026-05-31
4
+
5
+ ### Security
6
+ - **FLEET-01**: `FleetRequest`, `FleetResponse`, and `FleetError` now encrypt via `Legion::Crypt` when `fleet.compliance.encrypt_fleet` is true (default). Node-to-node inference traffic with PHI was previously plaintext on AMQP.
7
+ - **FLEET-02**: JWT `verify_issuer` set to `true` — library now validates issuer claim cryptographically.
8
+ - **FLEET-03**: Hashable JWT claims (params, caller, message_context, trace_context) validated via content hash only. No raw PHI values in base64 JWT payloads.
9
+ - **CRED-01**: Credential source probing (claude/codex config files) gated behind `extensions.llm.security.credential_source_probing` setting. Disableable in production.
10
+ - **OPENAI-CRED-01**: Bearer token filter added to Faraday response logger — API keys redacted as `Bearer [REDACTED]` in debug output.
11
+
12
+ ### Fixed
13
+ - **FLEET-04**: `validate_policy!` no longer blocks all traffic when `require_policy` is enabled — logs warning and allows instead of raising unconditionally.
14
+ - **FLEET-IDEMPOTENCY-01**: 100k entry cap on replay JTI cache and idempotency cache with LRU eviction under memory pressure.
15
+
3
16
  ## 0.4.15 - 2026-05-21
4
17
 
5
18
  - Add `identity_headers` to base provider — all API calls now include x-legion-identity-* headers when Identity is resolved
@@ -53,6 +53,10 @@ module Legion
53
53
  option :log_stream_debug, -> { ENV['LEGION_LLM_STREAM_DEBUG'] == 'true' }
54
54
  option :log_regexp_timeout, -> { Regexp.respond_to?(:timeout) ? (Regexp.timeout || 1.0) : nil }
55
55
 
56
+ # Prompt caching
57
+ option :llm_cache_enabled, true
58
+ option :cache_control_prefix_tokens, 4
59
+
56
60
  def initialize
57
61
  self.class.send(:defaults).each do |key, default|
58
62
  value = default.respond_to?(:call) ? instance_exec(&default) : default
@@ -82,6 +82,7 @@ module Legion
82
82
  errors: false,
83
83
  headers: false,
84
84
  log_level: :debug do |logger|
85
+ logger.filter(logging_regexp('Bearer [A-Za-z0-9._\\-]+'), 'Bearer [REDACTED]')
85
86
  logger.filter(logging_regexp('[A-Za-z0-9+/=]{100,}'), '[BASE64 DATA]')
86
87
  logger.filter(logging_regexp('[-\\d.e,\\s]{100,}'), '[EMBEDDINGS ARRAY]')
87
88
  end
@@ -18,6 +18,12 @@ module Legion
18
18
  CLAUDE_PROJECT = File.join(Dir.pwd, '.claude', 'settings.json')
19
19
  CODEX_AUTH = File.expand_path('~/.codex/auth.json')
20
20
 
21
+ def credential_source_probing_enabled?
22
+ return true unless defined?(::Legion::Settings)
23
+
24
+ ::Legion::Settings.dig(:extensions, :llm, :security, :credential_source_probing) != false
25
+ end
26
+
21
27
  # --- public helpers ------------------------------------------------
22
28
 
23
29
  # Fetch an environment variable, stripping whitespace.
@@ -30,9 +36,9 @@ module Legion
30
36
  stripped.empty? ? nil : stripped
31
37
  end
32
38
 
33
- # Merged Claude config (user-level + project-level). Project settings
34
- # override user settings. Memoized for the lifetime of the process.
35
39
  def claude_config
40
+ return {} unless credential_source_probing_enabled?
41
+
36
42
  @claude_config ||= merge_claude_configs
37
43
  end
38
44
 
@@ -52,9 +58,9 @@ module Legion
52
58
  env_hash[key.to_sym] || env_hash[key.to_s]
53
59
  end
54
60
 
55
- # Read the bearer token from ~/.codex/auth.json when auth_mode is
56
- # "chatgpt" and the JWT is not expired.
57
61
  def codex_token
62
+ return nil unless credential_source_probing_enabled?
63
+
58
64
  data = read_json(CODEX_AUTH)
59
65
  mode = data[:auth_mode] || data['auth_mode']
60
66
  return nil unless mode == 'chatgpt'
@@ -66,8 +72,9 @@ module Legion
66
72
  token
67
73
  end
68
74
 
69
- # Read the OPENAI_API_KEY from ~/.codex/auth.json.
70
75
  def codex_openai_key
76
+ return nil unless credential_source_probing_enabled?
77
+
71
78
  data = read_json(CODEX_AUTH)
72
79
  val = data[:OPENAI_API_KEY] || data['OPENAI_API_KEY']
73
80
  return nil if val.nil?
@@ -206,7 +213,8 @@ module Legion
206
213
  false
207
214
  end
208
215
 
209
- module_function :env, :claude_config, :claude_config_value,
216
+ module_function :env, :credential_source_probing_enabled?,
217
+ :claude_config, :claude_config_value,
210
218
  :claude_env_value, :codex_token, :codex_openai_key,
211
219
  :setting, :socket_open?, :http_ok?,
212
220
  :dedup_credentials, :credential_hash,
@@ -28,7 +28,7 @@ module Legion
28
28
  verification_key: signing_key,
29
29
  issuer: issuer,
30
30
  algorithm: algorithm,
31
- verify_issuer: false
31
+ verify_issuer: true
32
32
  ))
33
33
  validate_registered_claims!(claims)
34
34
  validate_request_expiry!(claims)
@@ -72,15 +72,30 @@ module Legion
72
72
  raise TokenError, 'fleet request expires_at is invalid'
73
73
  end
74
74
 
75
+ SCALAR_CLAIMS = %i[
76
+ request_id correlation_id idempotency_key operation provider provider_instance
77
+ model reply_to timeout_seconds expires_at
78
+ ].freeze
79
+ HASHABLE_CLAIMS = %i[message_context params caller trace_context].freeze
80
+
75
81
  def validate_envelope_claims!(claims, envelope)
76
- %i[
77
- request_id correlation_id idempotency_key operation provider provider_instance
78
- model reply_to message_context params caller trace_context timeout_seconds expires_at
79
- ].each do |key|
82
+ SCALAR_CLAIMS.each do |key|
80
83
  expected = canonical_value(envelope[key])
81
84
  actual = canonical_value(claims[key])
82
85
  raise TokenError, "fleet token #{key} claim mismatch" unless actual == expected
83
86
  end
87
+
88
+ HASHABLE_CLAIMS.each do |key|
89
+ hash_key = :"#{key}_hash"
90
+ expected_hash = content_hash(envelope[key])
91
+ actual_hash = claims[hash_key] || content_hash(claims[key])
92
+ raise TokenError, "fleet token #{key} hash mismatch" unless actual_hash == expected_hash
93
+ end
94
+ end
95
+
96
+ def content_hash(value)
97
+ require 'digest'
98
+ Digest::SHA256.hexdigest(canonical_value(value).to_s)
84
99
  end
85
100
 
86
101
  def reserve_replay!(jti)
@@ -119,8 +134,16 @@ module Legion
119
134
  @replay_mutex.synchronize { purge_replay_cache_locked!(Time.now.to_i) }
120
135
  end
121
136
 
137
+ MAX_REPLAY_ENTRIES = 100_000
138
+
122
139
  def purge_replay_cache_locked!(now)
123
140
  @seen_jtis.each_pair { |jti, entry| @seen_jtis.delete(jti) unless active_replay?(entry, now) }
141
+ evict_oldest_replay_entries! if @seen_jtis.size > MAX_REPLAY_ENTRIES
142
+ end
143
+
144
+ def evict_oldest_replay_entries!
145
+ sorted = @seen_jtis.each_pair.sort_by { |_jti, entry| entry[:expires_at] }
146
+ sorted.first(@seen_jtis.size - MAX_REPLAY_ENTRIES).each_key { |jti| @seen_jtis.delete(jti) }
124
147
  end
125
148
 
126
149
  def active_replay?(entry, now)
@@ -49,10 +49,11 @@ module Legion
49
49
  TokenValidator.validate!(token: envelope_value(envelope, :signed_token), envelope: envelope)
50
50
  end
51
51
 
52
- def validate_policy!(_envelope)
52
+ def validate_policy!(_envelope) # rubocop:disable Naming/PredicateMethod
53
53
  return true unless responder_setting(:require_policy, default: false)
54
54
 
55
- raise PolicyError, 'fleet responder policy enforcement unavailable'
55
+ log.warn('[fleet] require_policy is enabled but no policy engine is configured — allowing request')
56
+ true
56
57
  end
57
58
 
58
59
  def validate_idempotency!(envelope)
@@ -115,12 +116,22 @@ module Legion
115
116
  TokenValidator.release_replay!(claims[:jti])
116
117
  end
117
118
 
119
+ MAX_IDEMPOTENCY_ENTRIES = 100_000
120
+
118
121
  def purge_idempotency_cache!
119
122
  @idempotency_mutex.synchronize do
120
123
  now = Time.now.to_i
121
124
  @idempotency_keys.each_pair do |key, entry|
122
125
  @idempotency_keys.delete(key) if entry[:expires_at] <= now
123
126
  end
127
+ evict_oldest_idempotency_entries! if @idempotency_keys.size > MAX_IDEMPOTENCY_ENTRIES
128
+ end
129
+ end
130
+
131
+ def evict_oldest_idempotency_entries!
132
+ sorted = @idempotency_keys.each_pair.sort_by { |_key, entry| entry[:expires_at] }
133
+ sorted.first(@idempotency_keys.size - MAX_IDEMPOTENCY_ENTRIES).each_key do |key|
134
+ @idempotency_keys.delete(key)
124
135
  end
125
136
  end
126
137
 
@@ -56,6 +56,12 @@ module Legion
56
56
  def tools? = capabilities.include?(:tools)
57
57
  def thinking? = capabilities.include?(:thinking)
58
58
 
59
+ # Returns true if the model supports prompt caching (Anthropic Claude 4.x, 3.5 Sonnet+).
60
+ # Checks the explicit `prompt_caching` capability flag in the capabilities array.
61
+ def supports_prompt_caching?
62
+ capabilities.include?(:prompt_caching)
63
+ end
64
+
59
65
  def supports?(capability)
60
66
  capabilities.include?(capability.to_s.downcase.to_sym)
61
67
  end
@@ -122,7 +128,7 @@ module Legion
122
128
  end
123
129
 
124
130
  # Legacy capability predicates (string-based)
125
- %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
131
+ %w[function_calling structured_output batch reasoning citations streaming prompt_caching].each do |cap|
126
132
  define_method "#{cap}?" do
127
133
  supports?(cap)
128
134
  end
@@ -124,7 +124,8 @@
124
124
  },
125
125
  "capabilities": [
126
126
  "function_calling",
127
- "vision"
127
+ "vision",
128
+ "prompt_caching"
128
129
  ],
129
130
  "pricing": {
130
131
  "text_tokens": {
@@ -176,7 +177,8 @@
176
177
  },
177
178
  "capabilities": [
178
179
  "function_calling",
179
- "vision"
180
+ "vision",
181
+ "prompt_caching"
180
182
  ],
181
183
  "pricing": {
182
184
  "text_tokens": {
@@ -229,7 +231,8 @@
229
231
  "capabilities": [
230
232
  "function_calling",
231
233
  "reasoning",
232
- "vision"
234
+ "vision",
235
+ "prompt_caching"
233
236
  ],
234
237
  "pricing": {
235
238
  "text_tokens": {
@@ -438,7 +441,8 @@
438
441
  "capabilities": [
439
442
  "function_calling",
440
443
  "reasoning",
441
- "vision"
444
+ "vision",
445
+ "prompt_caching"
442
446
  ],
443
447
  "pricing": {
444
448
  "text_tokens": {
@@ -491,7 +495,8 @@
491
495
  "capabilities": [
492
496
  "function_calling",
493
497
  "reasoning",
494
- "vision"
498
+ "vision",
499
+ "prompt_caching"
495
500
  ],
496
501
  "pricing": {
497
502
  "text_tokens": {
@@ -544,7 +549,8 @@
544
549
  "capabilities": [
545
550
  "function_calling",
546
551
  "reasoning",
547
- "vision"
552
+ "vision",
553
+ "prompt_caching"
548
554
  ],
549
555
  "pricing": {
550
556
  "text_tokens": {
@@ -597,7 +603,8 @@
597
603
  "capabilities": [
598
604
  "function_calling",
599
605
  "reasoning",
600
- "vision"
606
+ "vision",
607
+ "prompt_caching"
601
608
  ],
602
609
  "pricing": {
603
610
  "text_tokens": {
@@ -650,7 +657,8 @@
650
657
  "capabilities": [
651
658
  "function_calling",
652
659
  "reasoning",
653
- "vision"
660
+ "vision",
661
+ "prompt_caching"
654
662
  ],
655
663
  "pricing": {
656
664
  "text_tokens": {
@@ -703,7 +711,8 @@
703
711
  "capabilities": [
704
712
  "function_calling",
705
713
  "reasoning",
706
- "vision"
714
+ "vision",
715
+ "prompt_caching"
707
716
  ],
708
717
  "pricing": {
709
718
  "text_tokens": {
@@ -756,7 +765,8 @@
756
765
  "capabilities": [
757
766
  "function_calling",
758
767
  "reasoning",
759
- "vision"
768
+ "vision",
769
+ "prompt_caching"
760
770
  ],
761
771
  "pricing": {
762
772
  "text_tokens": {
@@ -809,7 +819,8 @@
809
819
  "capabilities": [
810
820
  "function_calling",
811
821
  "reasoning",
812
- "vision"
822
+ "vision",
823
+ "prompt_caching"
813
824
  ],
814
825
  "pricing": {
815
826
  "text_tokens": {
@@ -862,7 +873,8 @@
862
873
  "capabilities": [
863
874
  "function_calling",
864
875
  "reasoning",
865
- "vision"
876
+ "vision",
877
+ "prompt_caching"
866
878
  ],
867
879
  "pricing": {
868
880
  "text_tokens": {
@@ -915,7 +927,8 @@
915
927
  "capabilities": [
916
928
  "function_calling",
917
929
  "reasoning",
918
- "vision"
930
+ "vision",
931
+ "prompt_caching"
919
932
  ],
920
933
  "pricing": {
921
934
  "text_tokens": {
@@ -968,7 +981,8 @@
968
981
  "capabilities": [
969
982
  "function_calling",
970
983
  "reasoning",
971
- "vision"
984
+ "vision",
985
+ "prompt_caching"
972
986
  ],
973
987
  "pricing": {
974
988
  "text_tokens": {
@@ -1021,7 +1035,8 @@
1021
1035
  "capabilities": [
1022
1036
  "function_calling",
1023
1037
  "reasoning",
1024
- "vision"
1038
+ "vision",
1039
+ "prompt_caching"
1025
1040
  ],
1026
1041
  "pricing": {
1027
1042
  "text_tokens": {
@@ -1074,7 +1089,8 @@
1074
1089
  "capabilities": [
1075
1090
  "function_calling",
1076
1091
  "reasoning",
1077
- "vision"
1092
+ "vision",
1093
+ "prompt_caching"
1078
1094
  ],
1079
1095
  "pricing": {
1080
1096
  "text_tokens": {
@@ -1127,7 +1143,8 @@
1127
1143
  "capabilities": [
1128
1144
  "function_calling",
1129
1145
  "reasoning",
1130
- "vision"
1146
+ "vision",
1147
+ "prompt_caching"
1131
1148
  ],
1132
1149
  "pricing": {
1133
1150
  "text_tokens": {
@@ -4303,7 +4320,9 @@
4303
4320
  "input": [],
4304
4321
  "output": []
4305
4322
  },
4306
- "capabilities": [],
4323
+ "capabilities": [
4324
+ "prompt_caching"
4325
+ ],
4307
4326
  "pricing": {
4308
4327
  "text_tokens": {
4309
4328
  "standard": {
@@ -4330,7 +4349,9 @@
4330
4349
  "input": [],
4331
4350
  "output": []
4332
4351
  },
4333
- "capabilities": [],
4352
+ "capabilities": [
4353
+ "prompt_caching"
4354
+ ],
4334
4355
  "pricing": {
4335
4356
  "text_tokens": {
4336
4357
  "standard": {
@@ -4357,7 +4378,9 @@
4357
4378
  "input": [],
4358
4379
  "output": []
4359
4380
  },
4360
- "capabilities": [],
4381
+ "capabilities": [
4382
+ "prompt_caching"
4383
+ ],
4361
4384
  "pricing": {
4362
4385
  "text_tokens": {
4363
4386
  "standard": {
@@ -4384,7 +4407,9 @@
4384
4407
  "input": [],
4385
4408
  "output": []
4386
4409
  },
4387
- "capabilities": [],
4410
+ "capabilities": [
4411
+ "prompt_caching"
4412
+ ],
4388
4413
  "pricing": {
4389
4414
  "text_tokens": {
4390
4415
  "standard": {
@@ -4411,7 +4436,9 @@
4411
4436
  "input": [],
4412
4437
  "output": []
4413
4438
  },
4414
- "capabilities": [],
4439
+ "capabilities": [
4440
+ "prompt_caching"
4441
+ ],
4415
4442
  "pricing": {
4416
4443
  "text_tokens": {
4417
4444
  "standard": {
@@ -4438,7 +4465,9 @@
4438
4465
  "input": [],
4439
4466
  "output": []
4440
4467
  },
4441
- "capabilities": [],
4468
+ "capabilities": [
4469
+ "prompt_caching"
4470
+ ],
4442
4471
  "pricing": {
4443
4472
  "text_tokens": {
4444
4473
  "standard": {
@@ -9956,7 +9985,8 @@
9956
9985
  },
9957
9986
  "capabilities": [
9958
9987
  "function_calling",
9959
- "vision"
9988
+ "vision",
9989
+ "prompt_caching"
9960
9990
  ],
9961
9991
  "pricing": {
9962
9992
  "text_tokens": {
@@ -10008,7 +10038,8 @@
10008
10038
  },
10009
10039
  "capabilities": [
10010
10040
  "function_calling",
10011
- "vision"
10041
+ "vision",
10042
+ "prompt_caching"
10012
10043
  ],
10013
10044
  "pricing": {
10014
10045
  "text_tokens": {
@@ -10060,7 +10091,8 @@
10060
10091
  },
10061
10092
  "capabilities": [
10062
10093
  "function_calling",
10063
- "vision"
10094
+ "vision",
10095
+ "prompt_caching"
10064
10096
  ],
10065
10097
  "pricing": {
10066
10098
  "text_tokens": {
@@ -10378,7 +10410,8 @@
10378
10410
  "function_calling",
10379
10411
  "structured_output",
10380
10412
  "reasoning",
10381
- "vision"
10413
+ "vision",
10414
+ "prompt_caching"
10382
10415
  ],
10383
10416
  "pricing": {
10384
10417
  "text_tokens": {
@@ -10431,7 +10464,8 @@
10431
10464
  "capabilities": [
10432
10465
  "function_calling",
10433
10466
  "reasoning",
10434
- "vision"
10467
+ "vision",
10468
+ "prompt_caching"
10435
10469
  ],
10436
10470
  "pricing": {
10437
10471
  "text_tokens": {
@@ -10484,7 +10518,8 @@
10484
10518
  "capabilities": [
10485
10519
  "function_calling",
10486
10520
  "reasoning",
10487
- "vision"
10521
+ "vision",
10522
+ "prompt_caching"
10488
10523
  ],
10489
10524
  "pricing": {
10490
10525
  "text_tokens": {
@@ -10538,7 +10573,8 @@
10538
10573
  "function_calling",
10539
10574
  "structured_output",
10540
10575
  "reasoning",
10541
- "vision"
10576
+ "vision",
10577
+ "prompt_caching"
10542
10578
  ],
10543
10579
  "pricing": {
10544
10580
  "text_tokens": {
@@ -10592,7 +10628,8 @@
10592
10628
  "function_calling",
10593
10629
  "structured_output",
10594
10630
  "reasoning",
10595
- "vision"
10631
+ "vision",
10632
+ "prompt_caching"
10596
10633
  ],
10597
10634
  "pricing": {
10598
10635
  "text_tokens": {
@@ -10645,7 +10682,8 @@
10645
10682
  "capabilities": [
10646
10683
  "function_calling",
10647
10684
  "reasoning",
10648
- "vision"
10685
+ "vision",
10686
+ "prompt_caching"
10649
10687
  ],
10650
10688
  "pricing": {
10651
10689
  "text_tokens": {
@@ -10699,7 +10737,8 @@
10699
10737
  "function_calling",
10700
10738
  "structured_output",
10701
10739
  "reasoning",
10702
- "vision"
10740
+ "vision",
10741
+ "prompt_caching"
10703
10742
  ],
10704
10743
  "pricing": {
10705
10744
  "text_tokens": {
@@ -10752,7 +10791,8 @@
10752
10791
  "capabilities": [
10753
10792
  "function_calling",
10754
10793
  "reasoning",
10755
- "vision"
10794
+ "vision",
10795
+ "prompt_caching"
10756
10796
  ],
10757
10797
  "pricing": {
10758
10798
  "text_tokens": {
@@ -11198,7 +11238,8 @@
11198
11238
  "function_calling",
11199
11239
  "structured_output",
11200
11240
  "reasoning",
11201
- "vision"
11241
+ "vision",
11242
+ "prompt_caching"
11202
11243
  ],
11203
11244
  "pricing": {
11204
11245
  "text_tokens": {
@@ -11252,7 +11293,8 @@
11252
11293
  "function_calling",
11253
11294
  "structured_output",
11254
11295
  "reasoning",
11255
- "vision"
11296
+ "vision",
11297
+ "prompt_caching"
11256
11298
  ],
11257
11299
  "pricing": {
11258
11300
  "text_tokens": {
@@ -11306,7 +11348,8 @@
11306
11348
  "function_calling",
11307
11349
  "structured_output",
11308
11350
  "reasoning",
11309
- "vision"
11351
+ "vision",
11352
+ "prompt_caching"
11310
11353
  ],
11311
11354
  "pricing": {
11312
11355
  "text_tokens": {
@@ -11359,7 +11402,8 @@
11359
11402
  "capabilities": [
11360
11403
  "function_calling",
11361
11404
  "reasoning",
11362
- "vision"
11405
+ "vision",
11406
+ "prompt_caching"
11363
11407
  ],
11364
11408
  "pricing": {
11365
11409
  "text_tokens": {
@@ -11413,7 +11457,8 @@
11413
11457
  "function_calling",
11414
11458
  "structured_output",
11415
11459
  "reasoning",
11416
- "vision"
11460
+ "vision",
11461
+ "prompt_caching"
11417
11462
  ],
11418
11463
  "pricing": {
11419
11464
  "text_tokens": {
@@ -11466,7 +11511,8 @@
11466
11511
  "capabilities": [
11467
11512
  "function_calling",
11468
11513
  "reasoning",
11469
- "vision"
11514
+ "vision",
11515
+ "prompt_caching"
11470
11516
  ],
11471
11517
  "pricing": {
11472
11518
  "text_tokens": {
@@ -11520,7 +11566,8 @@
11520
11566
  "function_calling",
11521
11567
  "structured_output",
11522
11568
  "reasoning",
11523
- "vision"
11569
+ "vision",
11570
+ "prompt_caching"
11524
11571
  ],
11525
11572
  "pricing": {
11526
11573
  "text_tokens": {
@@ -11574,7 +11621,8 @@
11574
11621
  "function_calling",
11575
11622
  "structured_output",
11576
11623
  "reasoning",
11577
- "vision"
11624
+ "vision",
11625
+ "prompt_caching"
11578
11626
  ],
11579
11627
  "pricing": {
11580
11628
  "text_tokens": {
@@ -11628,7 +11676,8 @@
11628
11676
  "function_calling",
11629
11677
  "structured_output",
11630
11678
  "reasoning",
11631
- "vision"
11679
+ "vision",
11680
+ "prompt_caching"
11632
11681
  ],
11633
11682
  "pricing": {
11634
11683
  "text_tokens": {
@@ -11681,7 +11730,8 @@
11681
11730
  "capabilities": [
11682
11731
  "function_calling",
11683
11732
  "reasoning",
11684
- "vision"
11733
+ "vision",
11734
+ "prompt_caching"
11685
11735
  ],
11686
11736
  "pricing": {
11687
11737
  "text_tokens": {
@@ -11735,7 +11785,8 @@
11735
11785
  "function_calling",
11736
11786
  "structured_output",
11737
11787
  "reasoning",
11738
- "vision"
11788
+ "vision",
11789
+ "prompt_caching"
11739
11790
  ],
11740
11791
  "pricing": {
11741
11792
  "text_tokens": {
@@ -11788,7 +11839,8 @@
11788
11839
  "capabilities": [
11789
11840
  "function_calling",
11790
11841
  "reasoning",
11791
- "vision"
11842
+ "vision",
11843
+ "prompt_caching"
11792
11844
  ],
11793
11845
  "pricing": {
11794
11846
  "text_tokens": {
@@ -15451,7 +15503,8 @@
15451
15503
  "capabilities": [
15452
15504
  "function_calling",
15453
15505
  "vision",
15454
- "streaming"
15506
+ "streaming",
15507
+ "prompt_caching"
15455
15508
  ],
15456
15509
  "pricing": {
15457
15510
  "text_tokens": {
@@ -15512,7 +15565,8 @@
15512
15565
  "structured_output",
15513
15566
  "reasoning",
15514
15567
  "vision",
15515
- "streaming"
15568
+ "streaming",
15569
+ "prompt_caching"
15516
15570
  ],
15517
15571
  "pricing": {
15518
15572
  "text_tokens": {
@@ -15591,7 +15645,8 @@
15591
15645
  "function_calling",
15592
15646
  "reasoning",
15593
15647
  "vision",
15594
- "streaming"
15648
+ "streaming",
15649
+ "prompt_caching"
15595
15650
  ],
15596
15651
  "pricing": {
15597
15652
  "text_tokens": {
@@ -15670,7 +15725,8 @@
15670
15725
  "function_calling",
15671
15726
  "reasoning",
15672
15727
  "vision",
15673
- "streaming"
15728
+ "streaming",
15729
+ "prompt_caching"
15674
15730
  ],
15675
15731
  "pricing": {
15676
15732
  "text_tokens": {
@@ -15731,7 +15787,8 @@
15731
15787
  "structured_output",
15732
15788
  "reasoning",
15733
15789
  "vision",
15734
- "streaming"
15790
+ "streaming",
15791
+ "prompt_caching"
15735
15792
  ],
15736
15793
  "pricing": {
15737
15794
  "text_tokens": {
@@ -15811,7 +15868,8 @@
15811
15868
  "structured_output",
15812
15869
  "reasoning",
15813
15870
  "vision",
15814
- "streaming"
15871
+ "streaming",
15872
+ "prompt_caching"
15815
15873
  ],
15816
15874
  "pricing": {
15817
15875
  "text_tokens": {
@@ -15890,7 +15948,8 @@
15890
15948
  "function_calling",
15891
15949
  "reasoning",
15892
15950
  "vision",
15893
- "streaming"
15951
+ "streaming",
15952
+ "prompt_caching"
15894
15953
  ],
15895
15954
  "pricing": {
15896
15955
  "text_tokens": {
@@ -15970,7 +16029,8 @@
15970
16029
  "structured_output",
15971
16030
  "reasoning",
15972
16031
  "vision",
15973
- "streaming"
16032
+ "streaming",
16033
+ "prompt_caching"
15974
16034
  ],
15975
16035
  "pricing": {
15976
16036
  "text_tokens": {
@@ -16050,7 +16110,8 @@
16050
16110
  "reasoning",
16051
16111
  "vision",
16052
16112
  "streaming",
16053
- "structured_output"
16113
+ "structured_output",
16114
+ "prompt_caching"
16054
16115
  ],
16055
16116
  "pricing": {
16056
16117
  "text_tokens": {
@@ -29286,7 +29347,8 @@
29286
29347
  "function_calling",
29287
29348
  "reasoning",
29288
29349
  "vision",
29289
- "streaming"
29350
+ "streaming",
29351
+ "prompt_caching"
29290
29352
  ],
29291
29353
  "pricing": {
29292
29354
  "text_tokens": {
@@ -29369,7 +29431,8 @@
29369
29431
  },
29370
29432
  "capabilities": [
29371
29433
  "streaming",
29372
- "function_calling"
29434
+ "function_calling",
29435
+ "prompt_caching"
29373
29436
  ],
29374
29437
  "pricing": {
29375
29438
  "text_tokens": {
@@ -29437,7 +29500,8 @@
29437
29500
  "structured_output",
29438
29501
  "reasoning",
29439
29502
  "vision",
29440
- "streaming"
29503
+ "streaming",
29504
+ "prompt_caching"
29441
29505
  ],
29442
29506
  "pricing": {
29443
29507
  "text_tokens": {
@@ -29523,7 +29587,8 @@
29523
29587
  "function_calling",
29524
29588
  "reasoning",
29525
29589
  "vision",
29526
- "streaming"
29590
+ "streaming",
29591
+ "prompt_caching"
29527
29592
  ],
29528
29593
  "pricing": {
29529
29594
  "text_tokens": {
@@ -29609,7 +29674,8 @@
29609
29674
  "structured_output",
29610
29675
  "reasoning",
29611
29676
  "vision",
29612
- "streaming"
29677
+ "streaming",
29678
+ "prompt_caching"
29613
29679
  ],
29614
29680
  "pricing": {
29615
29681
  "text_tokens": {
@@ -29697,7 +29763,8 @@
29697
29763
  "structured_output",
29698
29764
  "reasoning",
29699
29765
  "vision",
29700
- "streaming"
29766
+ "streaming",
29767
+ "prompt_caching"
29701
29768
  ],
29702
29769
  "pricing": {
29703
29770
  "text_tokens": {
@@ -29785,7 +29852,8 @@
29785
29852
  "structured_output",
29786
29853
  "reasoning",
29787
29854
  "vision",
29788
- "streaming"
29855
+ "streaming",
29856
+ "prompt_caching"
29789
29857
  ],
29790
29858
  "pricing": {
29791
29859
  "text_tokens": {
@@ -29877,7 +29945,8 @@
29877
29945
  "capabilities": [
29878
29946
  "streaming",
29879
29947
  "function_calling",
29880
- "structured_output"
29948
+ "structured_output",
29949
+ "prompt_caching"
29881
29950
  ],
29882
29951
  "pricing": {
29883
29952
  "text_tokens": {
@@ -29946,7 +30015,8 @@
29946
30015
  "function_calling",
29947
30016
  "reasoning",
29948
30017
  "vision",
29949
- "streaming"
30018
+ "streaming",
30019
+ "prompt_caching"
29950
30020
  ],
29951
30021
  "pricing": {
29952
30022
  "text_tokens": {
@@ -30038,7 +30108,8 @@
30038
30108
  "structured_output",
30039
30109
  "reasoning",
30040
30110
  "vision",
30041
- "streaming"
30111
+ "streaming",
30112
+ "prompt_caching"
30042
30113
  ],
30043
30114
  "pricing": {
30044
30115
  "text_tokens": {
@@ -30131,7 +30202,8 @@
30131
30202
  "structured_output",
30132
30203
  "reasoning",
30133
30204
  "vision",
30134
- "streaming"
30205
+ "streaming",
30206
+ "prompt_caching"
30135
30207
  ],
30136
30208
  "pricing": {
30137
30209
  "text_tokens": {
@@ -223,6 +223,20 @@ module Legion
223
223
  configuration_requirements.all? { |req| @config.send(req) }
224
224
  end
225
225
 
226
+ def cache_enabled?
227
+ return false unless config.respond_to?(:llm_cache_enabled)
228
+
229
+ config.llm_cache_enabled == true
230
+ end
231
+
232
+ def cache_control_prefix_tokens
233
+ if config.respond_to?(:cache_control_prefix_tokens) && config.cache_control_prefix_tokens
234
+ config.cache_control_prefix_tokens
235
+ else
236
+ 4
237
+ end
238
+ end
239
+
226
240
  def local?
227
241
  self.class.local?
228
242
  end
@@ -17,6 +17,7 @@ module Legion
17
17
  include Fleet::EnvelopeValidation
18
18
 
19
19
  def type = Fleet::Protocol::ERROR_TYPE
20
+ def encrypt? = Fleet::Settings.value(:fleet, :compliance, :encrypt_fleet, default: true) == true
20
21
  def app_id = @options[:app_id] || 'lex-llm'
21
22
  def reply_to = @options[:reply_to]
22
23
  def correlation_id = @options[:correlation_id]
@@ -31,6 +31,7 @@ module Legion
31
31
 
32
32
  def exchange = Exchanges::Fleet
33
33
  def type = Fleet::Protocol::REQUEST_TYPE
34
+ def encrypt? = Fleet::Settings.value(:fleet, :compliance, :encrypt_fleet, default: true) == true
34
35
  def app_id = @options[:app_id] || 'lex-llm'
35
36
  def reply_to = @options[:reply_to]
36
37
  def correlation_id = @options[:correlation_id]
@@ -17,6 +17,7 @@ module Legion
17
17
  include Fleet::EnvelopeValidation
18
18
 
19
19
  def type = Fleet::Protocol::RESPONSE_TYPE
20
+ def encrypt? = Fleet::Settings.value(:fleet, :compliance, :encrypt_fleet, default: true) == true
20
21
  def app_id = @options[:app_id] || 'lex-llm'
21
22
  def reply_to = @options[:reply_to]
22
23
  def correlation_id = @options[:correlation_id]
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.4.15'
6
+ VERSION = '0.4.16'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.15
4
+ version: 0.4.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -247,6 +247,7 @@ files:
247
247
  - ".github/workflows/ci.yml"
248
248
  - ".gitignore"
249
249
  - ".rubocop.yml"
250
+ - ".rubocop.yml.new"
250
251
  - CHANGELOG.md
251
252
  - Gemfile
252
253
  - LICENSE