lex-llm 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f3fc1bac35781a8134a6d24d7467a790cdd506244cfd4f0e66955a4fa82ceb9
4
- data.tar.gz: 0c1cdfe9dee8e21c5b9bba0a01b12f5ef41e30b46c73ff8d22ccc35d621818a9
3
+ metadata.gz: b266813f29f9a144b2a57408f39fe98bc27b7a53e59b13871ca22c0dc8cf6127
4
+ data.tar.gz: 80cb7a8866d4cd2b9c150dd4567f99aa09d12208f8acfa38df3dd578c7c93831
5
5
  SHA512:
6
- metadata.gz: 4592bdc8998415754bfce42444be4168fc05eacd3d20be7872c3f5ed2ef3384cd44a9027cb23bb7f3f0e8dda8b12451f51332b16d2a4611975e950da0a5da2af
7
- data.tar.gz: a90e2831742bc0af3c0d540f8459434d8fb287cb5504dbaf2be6c22425aceff4d929ff5c230485951d6669b46be79dd439d0f80a688272b52b8f494adab83b4f
6
+ metadata.gz: d43d28ab982b938f012a66000f73ee7cb4b9cae34ae31cbb6c11794d87845280ae919e2b91b81b594f76f6e11b95e9c57ff796c46d1ce595a74962b6d4a91800
7
+ data.tar.gz: 1976f2adfd60d698e547e92b00f7d779ab28b5c75c975a7245bc58ecc94dbb0d81b767c76552b3d1cee24a53fdd8d2bb98d1e3cb204816e27963491326daee50
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.4.3 - 2026-05-06
4
+
5
+ - Move provider-owned fleet responder execution into `lex-llm` so provider gems no longer depend on `legion-llm`.
6
+ - Add shared responder-side fleet token validation, idempotency protection, provider dispatch, and response/error publishing helpers.
7
+ - Reserve fleet replay tokens before provider dispatch, split replay TTL into auth settings, and raise explicit responder transport configuration errors.
8
+
3
9
  ## 0.4.2 - 2026-05-06
4
10
 
5
11
  - Remove the temporary settings logger wrapper and lazy-load fleet transport envelopes so `lex-llm` boot does not force `legion-transport` loading.
data/README.md CHANGED
@@ -48,7 +48,7 @@ gem 'lex-llm'
48
48
  Provider extensions should declare `lex-llm` as a gemspec dependency:
49
49
 
50
50
  ```ruby
51
- spec.add_dependency 'lex-llm', '>= 0.4.0'
51
+ spec.add_dependency 'lex-llm', '>= 0.4.3'
52
52
  ```
53
53
 
54
54
  For local development across LegionIO repos, prefer a local path override in the app or test `Gemfile`, not a permanent git dependency in the gemspec.
data/lex-llm.gemspec CHANGED
@@ -29,11 +29,13 @@ Gem::Specification.new do |spec|
29
29
 
30
30
  # Runtime dependencies
31
31
  spec.add_dependency 'base64'
32
+ spec.add_dependency 'concurrent-ruby', '>= 1.2'
32
33
  spec.add_dependency 'event_stream_parser', '~> 1'
33
34
  spec.add_dependency 'faraday', ENV['FARADAY_VERSION'] || '>= 1.10.0'
34
35
  spec.add_dependency 'faraday-multipart', '>= 1'
35
36
  spec.add_dependency 'faraday-net_http', '>= 1'
36
37
  spec.add_dependency 'faraday-retry', '>= 1'
38
+ spec.add_dependency 'legion-crypt', '>= 1.5.1'
37
39
  spec.add_dependency 'legion-json', '>= 1.2.1'
38
40
  spec.add_dependency 'legion-logging', '>= 1.3.2'
39
41
  spec.add_dependency 'legion-settings', '>= 1.3.14'
@@ -0,0 +1,304 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ require_relative 'protocol'
6
+ require_relative 'settings'
7
+ require_relative 'worker_execution'
8
+
9
+ module Legion
10
+ module Extensions
11
+ module Llm
12
+ module Transport
13
+ # Autoloads responder publish envelopes without booting legion-transport during lex-llm load.
14
+ module Messages
15
+ autoload :FleetError, File.expand_path('../transport/messages/fleet_error', __dir__) unless
16
+ autoload?(:FleetError) || const_defined?(:FleetError, false)
17
+ autoload :FleetResponse, File.expand_path('../transport/messages/fleet_response', __dir__) unless
18
+ autoload?(:FleetResponse) || const_defined?(:FleetResponse, false)
19
+ end
20
+ end
21
+
22
+ module Fleet
23
+ # Shared implementation for provider-owned fleet responder runners.
24
+ module ProviderResponder
25
+ class ConfigurationError < StandardError; end
26
+
27
+ REQUIRED_FIELDS = %i[
28
+ request_id correlation_id idempotency_key operation provider provider_instance model params reply_to
29
+ message_context caller trace_context signed_token timeout_seconds expires_at protocol_version
30
+ ].freeze
31
+ LEGACY_FIELDS = %i[schema_version request_type fleet_correlation_id].freeze
32
+
33
+ FleetEnvelope = Struct.new(:data, keyword_init: true) do
34
+ def [](key)
35
+ data[key.to_sym] || data[key.to_s]
36
+ end
37
+
38
+ def key?(key)
39
+ data.key?(key.to_sym) || data.key?(key.to_s)
40
+ end
41
+
42
+ def fetch(key, default = nil)
43
+ key?(key) ? self[key] : default
44
+ end
45
+
46
+ def to_h = data
47
+ def protocol_version = self[:protocol_version]
48
+ def request_id = self[:request_id]
49
+ def correlation_id = self[:correlation_id]
50
+ def idempotency_key = self[:idempotency_key]
51
+ def operation = self[:operation]
52
+ def provider = self[:provider]
53
+ def provider_instance = self[:provider_instance]
54
+ def model = self[:model]
55
+ def params = self[:params] || {}
56
+ def reply_to = self[:reply_to]
57
+ def message_context = self[:message_context] || {}
58
+ def trace_context = self[:trace_context] || {}
59
+ end
60
+
61
+ module_function
62
+
63
+ # Public runner entry point mirrors AMQP delivery callbacks, which carry both delivery and property metadata.
64
+ # rubocop:disable Metrics/ParameterLists
65
+ def call(payload:, provider_family:, provider_class:, provider_instances:, delivery: nil, properties: nil)
66
+ envelope = parse_payload(payload)
67
+ check_envelope!(envelope, provider_family:)
68
+ provider = build_provider(envelope:, provider_class:, provider_instances:)
69
+ response = WorkerExecution.call(envelope: envelope, provider: provider)
70
+ publish_response(envelope, response)
71
+ ack(delivery || properties)
72
+ response
73
+ rescue StandardError => e
74
+ safe_publish_error(envelope, e) if defined?(envelope) && envelope
75
+ reject(delivery || properties, requeue: requeue_error?(e))
76
+ raise
77
+ end
78
+ # rubocop:enable Metrics/ParameterLists
79
+
80
+ def enabled_for?(provider_instances)
81
+ instances = resolve_provider_instances(provider_instances)
82
+ instances.any? do |_instance_id, settings|
83
+ truthy?(dig(settings, :fleet, :respond_to_requests))
84
+ end
85
+ end
86
+
87
+ def parse_payload(payload)
88
+ hash = case payload
89
+ when FleetEnvelope
90
+ payload.to_h
91
+ when String
92
+ parse_json(payload)
93
+ else
94
+ payload.respond_to?(:to_h) ? payload.to_h : {}
95
+ end
96
+ FleetEnvelope.new(data: deep_symbolize(hash))
97
+ end
98
+
99
+ def check_envelope!(envelope, provider_family:)
100
+ reject_legacy_fields!(envelope)
101
+ REQUIRED_FIELDS.each do |field|
102
+ raise ArgumentError, "#{field} is required" unless envelope.key?(field) && !envelope[field].nil?
103
+ end
104
+
105
+ validate_protocol_version!(envelope)
106
+ validate_provider_family!(envelope, provider_family)
107
+ end
108
+
109
+ def build_provider(envelope:, provider_class:, provider_instances:)
110
+ instances = resolve_provider_instances(provider_instances)
111
+ instance_id = envelope.provider_instance.to_s
112
+ instance_settings = instances[instance_id.to_sym] || instances[instance_id]
113
+ unless instance_settings
114
+ raise ConfigurationError,
115
+ "fleet provider instance is not configured: #{instance_id}"
116
+ end
117
+ unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
118
+ raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
119
+ end
120
+
121
+ provider_class.new(deep_symbolize(instance_settings))
122
+ end
123
+
124
+ def publish_response(envelope, response)
125
+ transport_message_class(:FleetResponse).new(
126
+ protocol_version: envelope.protocol_version,
127
+ request_id: envelope.request_id,
128
+ correlation_id: envelope.correlation_id,
129
+ idempotency_key: envelope.idempotency_key,
130
+ operation: envelope.operation,
131
+ provider: envelope.provider,
132
+ provider_instance: envelope.provider_instance,
133
+ model: envelope.model,
134
+ reply_to: envelope.reply_to,
135
+ message_context: envelope.message_context,
136
+ trace_context: envelope.trace_context,
137
+ content: response_content(response),
138
+ tool_calls: response_field(response, :tool_calls) || [],
139
+ usage: response_usage(response),
140
+ finish_reason: response_field(response, :finish_reason),
141
+ metadata: response_metadata(response)
142
+ ).publish
143
+ end
144
+
145
+ def publish_error(envelope, error)
146
+ transport_message_class(:FleetError).new(
147
+ protocol_version: envelope.protocol_version,
148
+ request_id: envelope.request_id,
149
+ correlation_id: envelope.correlation_id,
150
+ idempotency_key: envelope.idempotency_key,
151
+ operation: envelope.operation,
152
+ provider: envelope.provider,
153
+ provider_instance: envelope.provider_instance,
154
+ model: envelope.model,
155
+ reply_to: envelope.reply_to,
156
+ message_context: envelope.message_context,
157
+ trace_context: envelope.trace_context,
158
+ code: error_code(error),
159
+ message: error.message,
160
+ error_class: error.class.name,
161
+ retryable: retryable_error?(error),
162
+ metadata: {}
163
+ ).publish
164
+ end
165
+
166
+ def safe_publish_error(envelope, error)
167
+ publish_error(envelope, error)
168
+ rescue StandardError
169
+ nil
170
+ end
171
+
172
+ def transport_message_class(name)
173
+ ::Legion::Extensions::Llm::Transport::Messages.const_get(name)
174
+ rescue LoadError, NameError => e
175
+ raise ConfigurationError, "fleet responder transport unavailable for #{name}: #{e.message}"
176
+ end
177
+
178
+ def ack(delivery)
179
+ return unless delivery
180
+
181
+ if delivery.respond_to?(:ack)
182
+ delivery.ack
183
+ elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
184
+ delivery.channel.ack(delivery.delivery_tag)
185
+ end
186
+ end
187
+
188
+ def reject(delivery, requeue:)
189
+ return unless delivery
190
+
191
+ if delivery.respond_to?(:reject)
192
+ delivery.reject(requeue)
193
+ elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
194
+ delivery.channel.reject(delivery.delivery_tag, requeue)
195
+ end
196
+ end
197
+
198
+ def parse_json(payload)
199
+ if defined?(::Legion::JSON)
200
+ ::Legion::JSON.parse(payload)
201
+ else
202
+ ::JSON.parse(payload)
203
+ end
204
+ end
205
+
206
+ def reject_legacy_fields!(envelope)
207
+ LEGACY_FIELDS.each do |field|
208
+ raise ArgumentError, "#{field} is not supported by fleet protocol v2" if envelope.key?(field)
209
+ end
210
+ end
211
+
212
+ def validate_protocol_version!(envelope)
213
+ return if envelope.protocol_version == Protocol::VERSION
214
+
215
+ raise ArgumentError, "protocol_version must be #{Protocol::VERSION}"
216
+ end
217
+
218
+ def validate_provider_family!(envelope, provider_family)
219
+ return if envelope.provider.to_s == provider_family.to_s
220
+
221
+ raise ArgumentError, "fleet request provider #{envelope.provider} does not match #{provider_family}"
222
+ end
223
+
224
+ def resolve_provider_instances(provider_instances)
225
+ instances = provider_instances.respond_to?(:call) ? provider_instances.call : provider_instances
226
+ deep_symbolize(instances || {})
227
+ end
228
+
229
+ def requeue_error?(error)
230
+ retryable_error?(error) &&
231
+ Settings.value(:fleet, :consumer, :requeue_transient, default: true) != false
232
+ end
233
+
234
+ def retryable_error?(error)
235
+ return false if error.is_a?(ConfigurationError)
236
+ return false if error.is_a?(WorkerExecution::PolicyError)
237
+
238
+ true
239
+ end
240
+
241
+ def error_code(error)
242
+ return 'configuration_error' if error.is_a?(ConfigurationError)
243
+ return 'policy_error' if error.is_a?(WorkerExecution::PolicyError)
244
+
245
+ 'provider_error'
246
+ end
247
+
248
+ def response_content(response)
249
+ response_field(response, :content) || response_field(response, :result) || response.to_s
250
+ end
251
+
252
+ def response_usage(response)
253
+ usage = response_field(response, :usage) || response_field(response, :tokens)
254
+ return deep_symbolize(usage) if usage.respond_to?(:to_h)
255
+
256
+ {
257
+ input_tokens: response_field(response, :input_tokens),
258
+ output_tokens: response_field(response, :output_tokens),
259
+ thinking_tokens: response_field(response, :thinking_tokens)
260
+ }.compact
261
+ end
262
+
263
+ def response_metadata(response)
264
+ metadata = response_field(response, :metadata)
265
+ metadata.respond_to?(:to_h) ? deep_symbolize(metadata) : {}
266
+ end
267
+
268
+ def response_field(response, field)
269
+ return response[field] if response.respond_to?(:key?) && response.key?(field)
270
+ return response[field.to_s] if response.respond_to?(:key?) && response.key?(field.to_s)
271
+ return response.public_send(field) if response.respond_to?(field)
272
+
273
+ nil
274
+ end
275
+
276
+ def dig(hash, *keys)
277
+ keys.reduce(hash) do |current, key|
278
+ break nil unless current.respond_to?(:key?)
279
+
280
+ current[key.to_sym] || current[key.to_s]
281
+ end
282
+ end
283
+
284
+ def truthy?(value)
285
+ value == true || value.to_s == 'true'
286
+ end
287
+
288
+ def deep_symbolize(value)
289
+ case value
290
+ when Hash
291
+ value.each_with_object({}) do |(key, child), result|
292
+ result[key.respond_to?(:to_sym) ? key.to_sym : key] = deep_symbolize(child)
293
+ end
294
+ when Array
295
+ value.map { |child| deep_symbolize(child) }
296
+ else
297
+ value
298
+ end
299
+ end
300
+ end
301
+ end
302
+ end
303
+ end
304
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Fleet
7
+ # Reads fleet settings from Legion::Settings when available, falling back to lex-llm defaults.
8
+ module Settings
9
+ module_function
10
+
11
+ def value(*path, default:)
12
+ configured_llm_settings.each do |configured|
13
+ found = dig(configured, *path)
14
+ return found unless found.nil?
15
+ end
16
+
17
+ fallback = dig(default_settings, *path)
18
+ fallback.nil? ? default : fallback
19
+ end
20
+
21
+ def configured_llm_settings
22
+ return [] unless defined?(::Legion::Settings) && ::Legion::Settings.respond_to?(:[])
23
+
24
+ configured = []
25
+ extensions = safe_fetch(::Legion::Settings, :extensions)
26
+ extension_llm = dig(extensions, :llm)
27
+ configured << extension_llm if extension_llm.respond_to?(:key?)
28
+
29
+ llm = safe_fetch(::Legion::Settings, :llm)
30
+ configured << llm if llm.respond_to?(:key?)
31
+ configured
32
+ rescue StandardError
33
+ []
34
+ end
35
+
36
+ def dig(hash, *keys)
37
+ keys.reduce(hash) do |current, key|
38
+ break nil unless current.respond_to?(:key?)
39
+
40
+ symbol_key = key.respond_to?(:to_sym) ? key.to_sym : key
41
+ string_key = key.to_s
42
+ if current.key?(symbol_key)
43
+ current[symbol_key]
44
+ elsif current.key?(string_key)
45
+ current[string_key]
46
+ end
47
+ end
48
+ end
49
+
50
+ def safe_fetch(source, key)
51
+ source[key] || source[key.to_s]
52
+ rescue StandardError
53
+ nil
54
+ end
55
+
56
+ def default_settings
57
+ return ::Legion::Extensions::Llm.default_settings if
58
+ ::Legion::Extensions::Llm.respond_to?(:default_settings)
59
+
60
+ {}
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Fleet
7
+ class TokenError < StandardError; end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'time'
5
+
6
+ require_relative 'settings'
7
+ require_relative 'token_error'
8
+
9
+ module Legion
10
+ module Extensions
11
+ module Llm
12
+ module Fleet
13
+ # Verifies responder-side fleet JWTs and prevents replay on provider nodes.
14
+ module TokenValidator
15
+ @seen_jtis = Concurrent::Map.new
16
+ @replay_mutex = Mutex.new
17
+
18
+ module_function
19
+
20
+ def validate!(token:, envelope:, record_replay: true)
21
+ raise TokenError, 'fleet token is required' if token.to_s.empty?
22
+
23
+ claims = symbolize_keys(jwt_module.verify(
24
+ token,
25
+ verification_key: signing_key,
26
+ issuer: issuer,
27
+ algorithm: algorithm,
28
+ verify_issuer: false
29
+ ))
30
+ validate_registered_claims!(claims)
31
+ validate_request_expiry!(claims)
32
+ validate_envelope_claims!(claims, symbolize_keys(envelope || {}))
33
+ record_replay ? reserve_replay!(claims[:jti]) : ensure_not_replayed!(claims[:jti])
34
+ claims
35
+ rescue TokenError
36
+ raise
37
+ rescue StandardError => e
38
+ raise TokenError, "fleet token verification failed: #{e.message}"
39
+ end
40
+
41
+ def reset_replay_cache!
42
+ @seen_jtis = Concurrent::Map.new
43
+ @replay_mutex = Mutex.new
44
+ end
45
+
46
+ def validate_registered_claims!(claims)
47
+ now = Time.now.to_i
48
+ raise TokenError, 'fleet token issuer mismatch' unless accepted_issuer?(claims[:iss])
49
+ raise TokenError, 'fleet token audience mismatch' unless claims[:aud].to_s == audience
50
+ if claims[:exp].nil? || claims[:exp].to_i + clock_skew_seconds <= now
51
+ raise TokenError,
52
+ 'fleet token expired'
53
+ end
54
+ if claims[:nbf].nil? || claims[:nbf].to_i - clock_skew_seconds > now
55
+ raise TokenError,
56
+ 'fleet token not yet valid'
57
+ end
58
+ raise TokenError, 'fleet token missing jti' if claims[:jti].to_s.empty?
59
+ end
60
+
61
+ def validate_request_expiry!(claims)
62
+ expires_at = claims[:expires_at]
63
+ raise TokenError, 'fleet request expires_at is required' if expires_at.to_s.empty?
64
+
65
+ expires = Time.iso8601(expires_at.to_s)
66
+ raise TokenError, 'fleet request expired' if expires + clock_skew_seconds <= Time.now.utc
67
+ rescue ArgumentError
68
+ raise TokenError, 'fleet request expires_at is invalid'
69
+ end
70
+
71
+ def validate_envelope_claims!(claims, envelope)
72
+ %i[
73
+ request_id correlation_id idempotency_key operation provider provider_instance
74
+ model reply_to message_context params caller trace_context timeout_seconds expires_at
75
+ ].each do |key|
76
+ expected = canonical_value(envelope[key])
77
+ actual = canonical_value(claims[key])
78
+ raise TokenError, "fleet token #{key} claim mismatch" unless actual == expected
79
+ end
80
+ end
81
+
82
+ def reserve_replay!(jti)
83
+ @replay_mutex.synchronize do
84
+ now = Time.now.to_i
85
+ purge_replay_cache_locked!(now)
86
+ existing = @seen_jtis[jti.to_s]
87
+ raise TokenError, 'fleet token replay detected' if active_replay?(existing, now)
88
+
89
+ @seen_jtis[jti.to_s] = replay_entry(:inflight, now)
90
+ end
91
+ end
92
+
93
+ def mark_replay!(jti)
94
+ @replay_mutex.synchronize do
95
+ @seen_jtis[jti.to_s] = replay_entry(:complete)
96
+ end
97
+ end
98
+
99
+ def release_replay!(jti)
100
+ @replay_mutex.synchronize do
101
+ entry = @seen_jtis[jti.to_s]
102
+ @seen_jtis.delete(jti.to_s) if entry.nil? || entry[:state] == :inflight
103
+ end
104
+ end
105
+
106
+ def ensure_not_replayed!(jti)
107
+ @replay_mutex.synchronize do
108
+ now = Time.now.to_i
109
+ purge_replay_cache_locked!(now)
110
+ raise TokenError, 'fleet token replay detected' if active_replay?(@seen_jtis[jti.to_s], now)
111
+ end
112
+ end
113
+
114
+ def purge_replay_cache!
115
+ @replay_mutex.synchronize { purge_replay_cache_locked!(Time.now.to_i) }
116
+ end
117
+
118
+ def purge_replay_cache_locked!(now)
119
+ @seen_jtis.each_pair { |jti, entry| @seen_jtis.delete(jti) unless active_replay?(entry, now) }
120
+ end
121
+
122
+ def active_replay?(entry, now)
123
+ entry && entry[:expires_at] > now
124
+ end
125
+
126
+ def replay_entry(state, now = Time.now.to_i)
127
+ { state: state, expires_at: now + replay_ttl_seconds }
128
+ end
129
+
130
+ def replay_ttl_seconds
131
+ ttl = Settings.value(:fleet, :auth, :replay_ttl_seconds, default: 600).to_i
132
+ ttl.positive? ? ttl : 600
133
+ end
134
+
135
+ def accepted_issuer?(value)
136
+ accepted_issuers.map(&:to_s).include?(value.to_s)
137
+ end
138
+
139
+ def accepted_issuers
140
+ issuers = Settings.value(:fleet, :auth, :accepted_issuers, default: [issuer])
141
+ issuers = [issuer] if Array(issuers).empty?
142
+ Array(issuers)
143
+ end
144
+
145
+ def clock_skew_seconds
146
+ Settings.value(:fleet, :auth, :max_clock_skew_seconds, default: 30).to_i
147
+ end
148
+
149
+ def issuer
150
+ Settings.value(:fleet, :auth, :issuer, default: 'legion-llm')
151
+ end
152
+
153
+ def audience
154
+ Settings.value(:fleet, :auth, :audience, default: 'lex-llm-fleet-worker')
155
+ end
156
+
157
+ def algorithm
158
+ Settings.value(:fleet, :auth, :algorithm, default: 'HS256')
159
+ end
160
+
161
+ def signing_key
162
+ if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
163
+ return ::Legion::Crypt.cluster_secret
164
+ end
165
+
166
+ raise TokenError, 'no signing key available - Legion::Crypt not initialized'
167
+ rescue TokenError
168
+ raise
169
+ rescue StandardError => e
170
+ raise TokenError, "no signing key available: #{e.message}"
171
+ end
172
+
173
+ def jwt_module
174
+ return ::Legion::Crypt::JWT if defined?(::Legion::Crypt::JWT) && ::Legion::Crypt::JWT.respond_to?(:verify)
175
+
176
+ raise TokenError, 'Legion::Crypt::JWT.verify unavailable'
177
+ end
178
+
179
+ def symbolize_keys(hash)
180
+ return {} unless hash.respond_to?(:each)
181
+
182
+ hash.each_with_object({}) do |(key, value), result|
183
+ result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
184
+ end
185
+ end
186
+
187
+ def canonical_value(value)
188
+ case value
189
+ when Hash
190
+ value.each_with_object({}) do |(key, child), result|
191
+ result[key.to_s] = canonical_value(child)
192
+ end.sort.to_h
193
+ when Array
194
+ value.map { |child| canonical_value(child) }
195
+ when Symbol
196
+ value.to_s
197
+ else
198
+ value
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+
5
+ require_relative 'settings'
6
+ require_relative 'token_validator'
7
+
8
+ module Legion
9
+ module Extensions
10
+ module Llm
11
+ module Fleet
12
+ # Applies responder-side policy and dispatches a fleet request to a local lex-llm provider.
13
+ module WorkerExecution
14
+ class PolicyError < StandardError; end
15
+
16
+ @idempotency_keys = Concurrent::Map.new
17
+ @idempotency_mutex = Mutex.new
18
+
19
+ module_function
20
+
21
+ def call(envelope:, provider:)
22
+ claims = nil
23
+ idempotency_key = nil
24
+ claims = validate_identity!(envelope)
25
+ validate_policy!(envelope)
26
+ idempotency_key = validate_idempotency!(envelope)
27
+ response = dispatch_local_provider!(envelope: envelope, provider: provider)
28
+ mark_idempotency_success!(idempotency_key) if idempotency_key
29
+ TokenValidator.mark_replay!(claims[:jti]) if claims.is_a?(Hash)
30
+ response
31
+ rescue TokenError => e
32
+ release_idempotency!(idempotency_key) if idempotency_key
33
+ release_replay!(claims)
34
+ raise PolicyError, e.message
35
+ rescue StandardError
36
+ release_idempotency!(idempotency_key) if idempotency_key
37
+ release_replay!(claims)
38
+ raise
39
+ end
40
+
41
+ def validate_identity!(envelope)
42
+ return true unless responder_setting(:require_auth, default: true)
43
+
44
+ TokenValidator.validate!(token: envelope_value(envelope, :signed_token), envelope: envelope)
45
+ end
46
+
47
+ def validate_policy!(_envelope)
48
+ return true unless responder_setting(:require_policy, default: false)
49
+
50
+ raise PolicyError, 'fleet responder policy enforcement unavailable'
51
+ end
52
+
53
+ def validate_idempotency!(envelope)
54
+ return nil unless responder_setting(:require_idempotency, default: true)
55
+
56
+ key = envelope_value(envelope, :idempotency_key)
57
+ raise PolicyError, 'fleet idempotency_key is required' if key.to_s.empty?
58
+
59
+ reserve_idempotency_key!(key.to_s)
60
+ key.to_s
61
+ end
62
+
63
+ def dispatch_local_provider!(envelope:, provider:)
64
+ provider = provider.call(envelope) if provider.respond_to?(:call) && !provider.respond_to?(:chat)
65
+ operation = envelope_value(envelope, :operation).to_sym
66
+ params = normalize_hash(envelope_value(envelope, :params) || {})
67
+ model = envelope_value(envelope, :model)
68
+
69
+ case operation
70
+ when :chat
71
+ provider.chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
72
+ when :stream
73
+ provider.stream_chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
74
+ when :embed
75
+ provider.embed(text: params[:text], model: model, **except(params, :text))
76
+ when :count_tokens
77
+ provider.count_tokens(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
78
+ else
79
+ raise PolicyError, "unsupported fleet operation: #{operation}"
80
+ end
81
+ end
82
+
83
+ def reset_idempotency_cache!
84
+ @idempotency_keys = Concurrent::Map.new
85
+ @idempotency_mutex = Mutex.new
86
+ end
87
+
88
+ def mark_idempotency_success!(key)
89
+ @idempotency_mutex.synchronize do
90
+ @idempotency_keys[key.to_s] = { state: :complete, expires_at: Time.now.to_i + idempotency_ttl_seconds }
91
+ end
92
+ end
93
+
94
+ def release_idempotency!(key)
95
+ @idempotency_mutex.synchronize { @idempotency_keys.delete(key.to_s) }
96
+ end
97
+
98
+ def release_replay!(claims)
99
+ return unless claims.is_a?(Hash) && claims[:jti]
100
+
101
+ TokenValidator.release_replay!(claims[:jti])
102
+ end
103
+
104
+ def purge_idempotency_cache!
105
+ @idempotency_mutex.synchronize do
106
+ now = Time.now.to_i
107
+ @idempotency_keys.each_pair do |key, entry|
108
+ @idempotency_keys.delete(key) if entry[:expires_at] <= now
109
+ end
110
+ end
111
+ end
112
+
113
+ def reserve_idempotency_key!(key)
114
+ @idempotency_mutex.synchronize do
115
+ now = Time.now.to_i
116
+ existing = @idempotency_keys[key]
117
+ raise PolicyError, 'duplicate fleet idempotency key' if existing && existing[:expires_at] > now
118
+
119
+ @idempotency_keys[key] = { state: :inflight, expires_at: now + idempotency_ttl_seconds }
120
+ end
121
+ end
122
+
123
+ def idempotency_ttl_seconds
124
+ ttl = responder_setting(:idempotency_ttl_seconds, default: 600).to_i
125
+ ttl.positive? ? ttl : 600
126
+ end
127
+
128
+ def responder_setting(key, default:)
129
+ value = Settings.value(:fleet, :responder, key, default: nil)
130
+ return auth_required? if key == :require_auth && value.nil?
131
+ return default if value.nil?
132
+
133
+ value
134
+ end
135
+
136
+ def auth_required?
137
+ Settings.value(:fleet, :auth, :require_signed_token, default: true) != false
138
+ end
139
+
140
+ def envelope_value(envelope, key)
141
+ return nil unless envelope.respond_to?(:key?)
142
+
143
+ envelope[key] || envelope[key.to_s]
144
+ end
145
+
146
+ def normalize_hash(hash)
147
+ return {} unless hash.respond_to?(:each)
148
+
149
+ hash.each_with_object({}) do |(key, value), result|
150
+ result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
151
+ end
152
+ end
153
+
154
+ def except(hash, *keys)
155
+ exclusions = keys.map(&:to_sym)
156
+ hash.each_with_object({}) do |(key, value), result|
157
+ normalized_key = key.respond_to?(:to_sym) ? key.to_sym : key
158
+ result[normalized_key] = value unless exclusions.include?(normalized_key)
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.4.2'
6
+ VERSION = '0.4.3'
7
7
  end
8
8
  end
9
9
  end
@@ -129,7 +129,14 @@ module Legion
129
129
  audience: 'lex-llm-fleet-worker',
130
130
  algorithm: 'HS256',
131
131
  accepted_issuers: ['legion-llm'],
132
- max_clock_skew_seconds: 30
132
+ max_clock_skew_seconds: 30,
133
+ replay_ttl_seconds: 600
134
+ },
135
+ responder: {
136
+ require_auth: nil,
137
+ require_policy: false,
138
+ require_idempotency: true,
139
+ idempotency_ttl_seconds: 600
133
140
  }
134
141
  }
135
142
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: concurrent-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '1.2'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: event_stream_parser
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +108,20 @@ dependencies:
94
108
  - - ">="
95
109
  - !ruby/object:Gem::Version
96
110
  version: '1'
111
+ - !ruby/object:Gem::Dependency
112
+ name: legion-crypt
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: 1.5.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: 1.5.1
97
125
  - !ruby/object:Gem::Dependency
98
126
  name: legion-json
99
127
  requirement: !ruby/object:Gem::Requirement
@@ -229,7 +257,12 @@ files:
229
257
  - lib/legion/extensions/llm/fleet/default_exchange_reply.rb
230
258
  - lib/legion/extensions/llm/fleet/envelope_validation.rb
231
259
  - lib/legion/extensions/llm/fleet/protocol.rb
260
+ - lib/legion/extensions/llm/fleet/provider_responder.rb
232
261
  - lib/legion/extensions/llm/fleet/publish_safety.rb
262
+ - lib/legion/extensions/llm/fleet/settings.rb
263
+ - lib/legion/extensions/llm/fleet/token_error.rb
264
+ - lib/legion/extensions/llm/fleet/token_validator.rb
265
+ - lib/legion/extensions/llm/fleet/worker_execution.rb
233
266
  - lib/legion/extensions/llm/image.rb
234
267
  - lib/legion/extensions/llm/message.rb
235
268
  - lib/legion/extensions/llm/mime_type.rb