lex-llm 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/lex-llm.gemspec +2 -0
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +304 -0
- data/lib/legion/extensions/llm/fleet/settings.rb +66 -0
- data/lib/legion/extensions/llm/fleet/token_error.rb +11 -0
- data/lib/legion/extensions/llm/fleet/token_validator.rb +205 -0
- data/lib/legion/extensions/llm/fleet/worker_execution.rb +165 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +8 -1
- metadata +34 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b266813f29f9a144b2a57408f39fe98bc27b7a53e59b13871ca22c0dc8cf6127
|
|
4
|
+
data.tar.gz: 80cb7a8866d4cd2b9c150dd4567f99aa09d12208f8acfa38df3dd578c7c93831
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d43d28ab982b938f012a66000f73ee7cb4b9cae34ae31cbb6c11794d87845280ae919e2b91b81b594f76f6e11b95e9c57ff796c46d1ce595a74962b6d4a91800
|
|
7
|
+
data.tar.gz: 1976f2adfd60d698e547e92b00f7d779ab28b5c75c975a7245bc58ecc94dbb0d81b767c76552b3d1cee24a53fdd8d2bb98d1e3cb204816e27963491326daee50
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.3 - 2026-05-06
|
|
4
|
+
|
|
5
|
+
- Move provider-owned fleet responder execution into `lex-llm` so provider gems no longer depend on `legion-llm`.
|
|
6
|
+
- Add shared responder-side fleet token validation, idempotency protection, provider dispatch, and response/error publishing helpers.
|
|
7
|
+
- Reserve fleet replay tokens before provider dispatch, split replay TTL into auth settings, and raise explicit responder transport configuration errors.
|
|
8
|
+
|
|
3
9
|
## 0.4.2 - 2026-05-06
|
|
4
10
|
|
|
5
11
|
- Remove the temporary settings logger wrapper and lazy-load fleet transport envelopes so `lex-llm` boot does not force `legion-transport` loading.
|
data/README.md
CHANGED
|
@@ -48,7 +48,7 @@ gem 'lex-llm'
|
|
|
48
48
|
Provider extensions should declare `lex-llm` as a gemspec dependency:
|
|
49
49
|
|
|
50
50
|
```ruby
|
|
51
|
-
spec.add_dependency 'lex-llm', '>= 0.4.
|
|
51
|
+
spec.add_dependency 'lex-llm', '>= 0.4.3'
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
For local development across LegionIO repos, prefer a local path override in the app or test `Gemfile`, not a permanent git dependency in the gemspec.
|
data/lex-llm.gemspec
CHANGED
|
@@ -29,11 +29,13 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
|
|
30
30
|
# Runtime dependencies
|
|
31
31
|
spec.add_dependency 'base64'
|
|
32
|
+
spec.add_dependency 'concurrent-ruby', '>= 1.2'
|
|
32
33
|
spec.add_dependency 'event_stream_parser', '~> 1'
|
|
33
34
|
spec.add_dependency 'faraday', ENV['FARADAY_VERSION'] || '>= 1.10.0'
|
|
34
35
|
spec.add_dependency 'faraday-multipart', '>= 1'
|
|
35
36
|
spec.add_dependency 'faraday-net_http', '>= 1'
|
|
36
37
|
spec.add_dependency 'faraday-retry', '>= 1'
|
|
38
|
+
spec.add_dependency 'legion-crypt', '>= 1.5.1'
|
|
37
39
|
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
38
40
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
39
41
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
require_relative 'protocol'
|
|
6
|
+
require_relative 'settings'
|
|
7
|
+
require_relative 'worker_execution'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
# Autoloads responder publish envelopes without booting legion-transport during lex-llm load.
|
|
14
|
+
module Messages
|
|
15
|
+
autoload :FleetError, File.expand_path('../transport/messages/fleet_error', __dir__) unless
|
|
16
|
+
autoload?(:FleetError) || const_defined?(:FleetError, false)
|
|
17
|
+
autoload :FleetResponse, File.expand_path('../transport/messages/fleet_response', __dir__) unless
|
|
18
|
+
autoload?(:FleetResponse) || const_defined?(:FleetResponse, false)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
module Fleet
|
|
23
|
+
# Shared implementation for provider-owned fleet responder runners.
|
|
24
|
+
module ProviderResponder
|
|
25
|
+
class ConfigurationError < StandardError; end
|
|
26
|
+
|
|
27
|
+
REQUIRED_FIELDS = %i[
|
|
28
|
+
request_id correlation_id idempotency_key operation provider provider_instance model params reply_to
|
|
29
|
+
message_context caller trace_context signed_token timeout_seconds expires_at protocol_version
|
|
30
|
+
].freeze
|
|
31
|
+
LEGACY_FIELDS = %i[schema_version request_type fleet_correlation_id].freeze
|
|
32
|
+
|
|
33
|
+
FleetEnvelope = Struct.new(:data, keyword_init: true) do
|
|
34
|
+
def [](key)
|
|
35
|
+
data[key.to_sym] || data[key.to_s]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def key?(key)
|
|
39
|
+
data.key?(key.to_sym) || data.key?(key.to_s)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def fetch(key, default = nil)
|
|
43
|
+
key?(key) ? self[key] : default
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def to_h = data
|
|
47
|
+
def protocol_version = self[:protocol_version]
|
|
48
|
+
def request_id = self[:request_id]
|
|
49
|
+
def correlation_id = self[:correlation_id]
|
|
50
|
+
def idempotency_key = self[:idempotency_key]
|
|
51
|
+
def operation = self[:operation]
|
|
52
|
+
def provider = self[:provider]
|
|
53
|
+
def provider_instance = self[:provider_instance]
|
|
54
|
+
def model = self[:model]
|
|
55
|
+
def params = self[:params] || {}
|
|
56
|
+
def reply_to = self[:reply_to]
|
|
57
|
+
def message_context = self[:message_context] || {}
|
|
58
|
+
def trace_context = self[:trace_context] || {}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
module_function
|
|
62
|
+
|
|
63
|
+
# Public runner entry point mirrors AMQP delivery callbacks, which carry both delivery and property metadata.
|
|
64
|
+
# rubocop:disable Metrics/ParameterLists
|
|
65
|
+
def call(payload:, provider_family:, provider_class:, provider_instances:, delivery: nil, properties: nil)
|
|
66
|
+
envelope = parse_payload(payload)
|
|
67
|
+
check_envelope!(envelope, provider_family:)
|
|
68
|
+
provider = build_provider(envelope:, provider_class:, provider_instances:)
|
|
69
|
+
response = WorkerExecution.call(envelope: envelope, provider: provider)
|
|
70
|
+
publish_response(envelope, response)
|
|
71
|
+
ack(delivery || properties)
|
|
72
|
+
response
|
|
73
|
+
rescue StandardError => e
|
|
74
|
+
safe_publish_error(envelope, e) if defined?(envelope) && envelope
|
|
75
|
+
reject(delivery || properties, requeue: requeue_error?(e))
|
|
76
|
+
raise
|
|
77
|
+
end
|
|
78
|
+
# rubocop:enable Metrics/ParameterLists
|
|
79
|
+
|
|
80
|
+
def enabled_for?(provider_instances)
|
|
81
|
+
instances = resolve_provider_instances(provider_instances)
|
|
82
|
+
instances.any? do |_instance_id, settings|
|
|
83
|
+
truthy?(dig(settings, :fleet, :respond_to_requests))
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def parse_payload(payload)
|
|
88
|
+
hash = case payload
|
|
89
|
+
when FleetEnvelope
|
|
90
|
+
payload.to_h
|
|
91
|
+
when String
|
|
92
|
+
parse_json(payload)
|
|
93
|
+
else
|
|
94
|
+
payload.respond_to?(:to_h) ? payload.to_h : {}
|
|
95
|
+
end
|
|
96
|
+
FleetEnvelope.new(data: deep_symbolize(hash))
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def check_envelope!(envelope, provider_family:)
|
|
100
|
+
reject_legacy_fields!(envelope)
|
|
101
|
+
REQUIRED_FIELDS.each do |field|
|
|
102
|
+
raise ArgumentError, "#{field} is required" unless envelope.key?(field) && !envelope[field].nil?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
validate_protocol_version!(envelope)
|
|
106
|
+
validate_provider_family!(envelope, provider_family)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def build_provider(envelope:, provider_class:, provider_instances:)
|
|
110
|
+
instances = resolve_provider_instances(provider_instances)
|
|
111
|
+
instance_id = envelope.provider_instance.to_s
|
|
112
|
+
instance_settings = instances[instance_id.to_sym] || instances[instance_id]
|
|
113
|
+
unless instance_settings
|
|
114
|
+
raise ConfigurationError,
|
|
115
|
+
"fleet provider instance is not configured: #{instance_id}"
|
|
116
|
+
end
|
|
117
|
+
unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
|
|
118
|
+
raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
provider_class.new(deep_symbolize(instance_settings))
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def publish_response(envelope, response)
|
|
125
|
+
transport_message_class(:FleetResponse).new(
|
|
126
|
+
protocol_version: envelope.protocol_version,
|
|
127
|
+
request_id: envelope.request_id,
|
|
128
|
+
correlation_id: envelope.correlation_id,
|
|
129
|
+
idempotency_key: envelope.idempotency_key,
|
|
130
|
+
operation: envelope.operation,
|
|
131
|
+
provider: envelope.provider,
|
|
132
|
+
provider_instance: envelope.provider_instance,
|
|
133
|
+
model: envelope.model,
|
|
134
|
+
reply_to: envelope.reply_to,
|
|
135
|
+
message_context: envelope.message_context,
|
|
136
|
+
trace_context: envelope.trace_context,
|
|
137
|
+
content: response_content(response),
|
|
138
|
+
tool_calls: response_field(response, :tool_calls) || [],
|
|
139
|
+
usage: response_usage(response),
|
|
140
|
+
finish_reason: response_field(response, :finish_reason),
|
|
141
|
+
metadata: response_metadata(response)
|
|
142
|
+
).publish
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def publish_error(envelope, error)
|
|
146
|
+
transport_message_class(:FleetError).new(
|
|
147
|
+
protocol_version: envelope.protocol_version,
|
|
148
|
+
request_id: envelope.request_id,
|
|
149
|
+
correlation_id: envelope.correlation_id,
|
|
150
|
+
idempotency_key: envelope.idempotency_key,
|
|
151
|
+
operation: envelope.operation,
|
|
152
|
+
provider: envelope.provider,
|
|
153
|
+
provider_instance: envelope.provider_instance,
|
|
154
|
+
model: envelope.model,
|
|
155
|
+
reply_to: envelope.reply_to,
|
|
156
|
+
message_context: envelope.message_context,
|
|
157
|
+
trace_context: envelope.trace_context,
|
|
158
|
+
code: error_code(error),
|
|
159
|
+
message: error.message,
|
|
160
|
+
error_class: error.class.name,
|
|
161
|
+
retryable: retryable_error?(error),
|
|
162
|
+
metadata: {}
|
|
163
|
+
).publish
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def safe_publish_error(envelope, error)
|
|
167
|
+
publish_error(envelope, error)
|
|
168
|
+
rescue StandardError
|
|
169
|
+
nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def transport_message_class(name)
|
|
173
|
+
::Legion::Extensions::Llm::Transport::Messages.const_get(name)
|
|
174
|
+
rescue LoadError, NameError => e
|
|
175
|
+
raise ConfigurationError, "fleet responder transport unavailable for #{name}: #{e.message}"
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def ack(delivery)
|
|
179
|
+
return unless delivery
|
|
180
|
+
|
|
181
|
+
if delivery.respond_to?(:ack)
|
|
182
|
+
delivery.ack
|
|
183
|
+
elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
|
|
184
|
+
delivery.channel.ack(delivery.delivery_tag)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def reject(delivery, requeue:)
|
|
189
|
+
return unless delivery
|
|
190
|
+
|
|
191
|
+
if delivery.respond_to?(:reject)
|
|
192
|
+
delivery.reject(requeue)
|
|
193
|
+
elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
|
|
194
|
+
delivery.channel.reject(delivery.delivery_tag, requeue)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def parse_json(payload)
|
|
199
|
+
if defined?(::Legion::JSON)
|
|
200
|
+
::Legion::JSON.parse(payload)
|
|
201
|
+
else
|
|
202
|
+
::JSON.parse(payload)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def reject_legacy_fields!(envelope)
|
|
207
|
+
LEGACY_FIELDS.each do |field|
|
|
208
|
+
raise ArgumentError, "#{field} is not supported by fleet protocol v2" if envelope.key?(field)
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def validate_protocol_version!(envelope)
|
|
213
|
+
return if envelope.protocol_version == Protocol::VERSION
|
|
214
|
+
|
|
215
|
+
raise ArgumentError, "protocol_version must be #{Protocol::VERSION}"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def validate_provider_family!(envelope, provider_family)
|
|
219
|
+
return if envelope.provider.to_s == provider_family.to_s
|
|
220
|
+
|
|
221
|
+
raise ArgumentError, "fleet request provider #{envelope.provider} does not match #{provider_family}"
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def resolve_provider_instances(provider_instances)
|
|
225
|
+
instances = provider_instances.respond_to?(:call) ? provider_instances.call : provider_instances
|
|
226
|
+
deep_symbolize(instances || {})
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def requeue_error?(error)
|
|
230
|
+
retryable_error?(error) &&
|
|
231
|
+
Settings.value(:fleet, :consumer, :requeue_transient, default: true) != false
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def retryable_error?(error)
|
|
235
|
+
return false if error.is_a?(ConfigurationError)
|
|
236
|
+
return false if error.is_a?(WorkerExecution::PolicyError)
|
|
237
|
+
|
|
238
|
+
true
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def error_code(error)
|
|
242
|
+
return 'configuration_error' if error.is_a?(ConfigurationError)
|
|
243
|
+
return 'policy_error' if error.is_a?(WorkerExecution::PolicyError)
|
|
244
|
+
|
|
245
|
+
'provider_error'
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def response_content(response)
|
|
249
|
+
response_field(response, :content) || response_field(response, :result) || response.to_s
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def response_usage(response)
|
|
253
|
+
usage = response_field(response, :usage) || response_field(response, :tokens)
|
|
254
|
+
return deep_symbolize(usage) if usage.respond_to?(:to_h)
|
|
255
|
+
|
|
256
|
+
{
|
|
257
|
+
input_tokens: response_field(response, :input_tokens),
|
|
258
|
+
output_tokens: response_field(response, :output_tokens),
|
|
259
|
+
thinking_tokens: response_field(response, :thinking_tokens)
|
|
260
|
+
}.compact
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def response_metadata(response)
|
|
264
|
+
metadata = response_field(response, :metadata)
|
|
265
|
+
metadata.respond_to?(:to_h) ? deep_symbolize(metadata) : {}
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def response_field(response, field)
|
|
269
|
+
return response[field] if response.respond_to?(:key?) && response.key?(field)
|
|
270
|
+
return response[field.to_s] if response.respond_to?(:key?) && response.key?(field.to_s)
|
|
271
|
+
return response.public_send(field) if response.respond_to?(field)
|
|
272
|
+
|
|
273
|
+
nil
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def dig(hash, *keys)
|
|
277
|
+
keys.reduce(hash) do |current, key|
|
|
278
|
+
break nil unless current.respond_to?(:key?)
|
|
279
|
+
|
|
280
|
+
current[key.to_sym] || current[key.to_s]
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def truthy?(value)
|
|
285
|
+
value == true || value.to_s == 'true'
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def deep_symbolize(value)
|
|
289
|
+
case value
|
|
290
|
+
when Hash
|
|
291
|
+
value.each_with_object({}) do |(key, child), result|
|
|
292
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = deep_symbolize(child)
|
|
293
|
+
end
|
|
294
|
+
when Array
|
|
295
|
+
value.map { |child| deep_symbolize(child) }
|
|
296
|
+
else
|
|
297
|
+
value
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Fleet
|
|
7
|
+
# Reads fleet settings from Legion::Settings when available, falling back to lex-llm defaults.
|
|
8
|
+
module Settings
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def value(*path, default:)
|
|
12
|
+
configured_llm_settings.each do |configured|
|
|
13
|
+
found = dig(configured, *path)
|
|
14
|
+
return found unless found.nil?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
fallback = dig(default_settings, *path)
|
|
18
|
+
fallback.nil? ? default : fallback
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def configured_llm_settings
|
|
22
|
+
return [] unless defined?(::Legion::Settings) && ::Legion::Settings.respond_to?(:[])
|
|
23
|
+
|
|
24
|
+
configured = []
|
|
25
|
+
extensions = safe_fetch(::Legion::Settings, :extensions)
|
|
26
|
+
extension_llm = dig(extensions, :llm)
|
|
27
|
+
configured << extension_llm if extension_llm.respond_to?(:key?)
|
|
28
|
+
|
|
29
|
+
llm = safe_fetch(::Legion::Settings, :llm)
|
|
30
|
+
configured << llm if llm.respond_to?(:key?)
|
|
31
|
+
configured
|
|
32
|
+
rescue StandardError
|
|
33
|
+
[]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def dig(hash, *keys)
|
|
37
|
+
keys.reduce(hash) do |current, key|
|
|
38
|
+
break nil unless current.respond_to?(:key?)
|
|
39
|
+
|
|
40
|
+
symbol_key = key.respond_to?(:to_sym) ? key.to_sym : key
|
|
41
|
+
string_key = key.to_s
|
|
42
|
+
if current.key?(symbol_key)
|
|
43
|
+
current[symbol_key]
|
|
44
|
+
elsif current.key?(string_key)
|
|
45
|
+
current[string_key]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def safe_fetch(source, key)
|
|
51
|
+
source[key] || source[key.to_s]
|
|
52
|
+
rescue StandardError
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def default_settings
|
|
57
|
+
return ::Legion::Extensions::Llm.default_settings if
|
|
58
|
+
::Legion::Extensions::Llm.respond_to?(:default_settings)
|
|
59
|
+
|
|
60
|
+
{}
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
require_relative 'settings'
|
|
7
|
+
require_relative 'token_error'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Fleet
|
|
13
|
+
# Verifies responder-side fleet JWTs and prevents replay on provider nodes.
|
|
14
|
+
module TokenValidator
|
|
15
|
+
@seen_jtis = Concurrent::Map.new
|
|
16
|
+
@replay_mutex = Mutex.new
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
def validate!(token:, envelope:, record_replay: true)
|
|
21
|
+
raise TokenError, 'fleet token is required' if token.to_s.empty?
|
|
22
|
+
|
|
23
|
+
claims = symbolize_keys(jwt_module.verify(
|
|
24
|
+
token,
|
|
25
|
+
verification_key: signing_key,
|
|
26
|
+
issuer: issuer,
|
|
27
|
+
algorithm: algorithm,
|
|
28
|
+
verify_issuer: false
|
|
29
|
+
))
|
|
30
|
+
validate_registered_claims!(claims)
|
|
31
|
+
validate_request_expiry!(claims)
|
|
32
|
+
validate_envelope_claims!(claims, symbolize_keys(envelope || {}))
|
|
33
|
+
record_replay ? reserve_replay!(claims[:jti]) : ensure_not_replayed!(claims[:jti])
|
|
34
|
+
claims
|
|
35
|
+
rescue TokenError
|
|
36
|
+
raise
|
|
37
|
+
rescue StandardError => e
|
|
38
|
+
raise TokenError, "fleet token verification failed: #{e.message}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def reset_replay_cache!
|
|
42
|
+
@seen_jtis = Concurrent::Map.new
|
|
43
|
+
@replay_mutex = Mutex.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate_registered_claims!(claims)
|
|
47
|
+
now = Time.now.to_i
|
|
48
|
+
raise TokenError, 'fleet token issuer mismatch' unless accepted_issuer?(claims[:iss])
|
|
49
|
+
raise TokenError, 'fleet token audience mismatch' unless claims[:aud].to_s == audience
|
|
50
|
+
if claims[:exp].nil? || claims[:exp].to_i + clock_skew_seconds <= now
|
|
51
|
+
raise TokenError,
|
|
52
|
+
'fleet token expired'
|
|
53
|
+
end
|
|
54
|
+
if claims[:nbf].nil? || claims[:nbf].to_i - clock_skew_seconds > now
|
|
55
|
+
raise TokenError,
|
|
56
|
+
'fleet token not yet valid'
|
|
57
|
+
end
|
|
58
|
+
raise TokenError, 'fleet token missing jti' if claims[:jti].to_s.empty?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def validate_request_expiry!(claims)
|
|
62
|
+
expires_at = claims[:expires_at]
|
|
63
|
+
raise TokenError, 'fleet request expires_at is required' if expires_at.to_s.empty?
|
|
64
|
+
|
|
65
|
+
expires = Time.iso8601(expires_at.to_s)
|
|
66
|
+
raise TokenError, 'fleet request expired' if expires + clock_skew_seconds <= Time.now.utc
|
|
67
|
+
rescue ArgumentError
|
|
68
|
+
raise TokenError, 'fleet request expires_at is invalid'
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def validate_envelope_claims!(claims, envelope)
|
|
72
|
+
%i[
|
|
73
|
+
request_id correlation_id idempotency_key operation provider provider_instance
|
|
74
|
+
model reply_to message_context params caller trace_context timeout_seconds expires_at
|
|
75
|
+
].each do |key|
|
|
76
|
+
expected = canonical_value(envelope[key])
|
|
77
|
+
actual = canonical_value(claims[key])
|
|
78
|
+
raise TokenError, "fleet token #{key} claim mismatch" unless actual == expected
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def reserve_replay!(jti)
|
|
83
|
+
@replay_mutex.synchronize do
|
|
84
|
+
now = Time.now.to_i
|
|
85
|
+
purge_replay_cache_locked!(now)
|
|
86
|
+
existing = @seen_jtis[jti.to_s]
|
|
87
|
+
raise TokenError, 'fleet token replay detected' if active_replay?(existing, now)
|
|
88
|
+
|
|
89
|
+
@seen_jtis[jti.to_s] = replay_entry(:inflight, now)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def mark_replay!(jti)
|
|
94
|
+
@replay_mutex.synchronize do
|
|
95
|
+
@seen_jtis[jti.to_s] = replay_entry(:complete)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def release_replay!(jti)
|
|
100
|
+
@replay_mutex.synchronize do
|
|
101
|
+
entry = @seen_jtis[jti.to_s]
|
|
102
|
+
@seen_jtis.delete(jti.to_s) if entry.nil? || entry[:state] == :inflight
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def ensure_not_replayed!(jti)
|
|
107
|
+
@replay_mutex.synchronize do
|
|
108
|
+
now = Time.now.to_i
|
|
109
|
+
purge_replay_cache_locked!(now)
|
|
110
|
+
raise TokenError, 'fleet token replay detected' if active_replay?(@seen_jtis[jti.to_s], now)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def purge_replay_cache!
|
|
115
|
+
@replay_mutex.synchronize { purge_replay_cache_locked!(Time.now.to_i) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def purge_replay_cache_locked!(now)
|
|
119
|
+
@seen_jtis.each_pair { |jti, entry| @seen_jtis.delete(jti) unless active_replay?(entry, now) }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def active_replay?(entry, now)
|
|
123
|
+
entry && entry[:expires_at] > now
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def replay_entry(state, now = Time.now.to_i)
|
|
127
|
+
{ state: state, expires_at: now + replay_ttl_seconds }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def replay_ttl_seconds
|
|
131
|
+
ttl = Settings.value(:fleet, :auth, :replay_ttl_seconds, default: 600).to_i
|
|
132
|
+
ttl.positive? ? ttl : 600
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def accepted_issuer?(value)
|
|
136
|
+
accepted_issuers.map(&:to_s).include?(value.to_s)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def accepted_issuers
|
|
140
|
+
issuers = Settings.value(:fleet, :auth, :accepted_issuers, default: [issuer])
|
|
141
|
+
issuers = [issuer] if Array(issuers).empty?
|
|
142
|
+
Array(issuers)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def clock_skew_seconds
|
|
146
|
+
Settings.value(:fleet, :auth, :max_clock_skew_seconds, default: 30).to_i
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def issuer
|
|
150
|
+
Settings.value(:fleet, :auth, :issuer, default: 'legion-llm')
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def audience
|
|
154
|
+
Settings.value(:fleet, :auth, :audience, default: 'lex-llm-fleet-worker')
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def algorithm
|
|
158
|
+
Settings.value(:fleet, :auth, :algorithm, default: 'HS256')
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def signing_key
|
|
162
|
+
if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
|
|
163
|
+
return ::Legion::Crypt.cluster_secret
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
raise TokenError, 'no signing key available - Legion::Crypt not initialized'
|
|
167
|
+
rescue TokenError
|
|
168
|
+
raise
|
|
169
|
+
rescue StandardError => e
|
|
170
|
+
raise TokenError, "no signing key available: #{e.message}"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def jwt_module
|
|
174
|
+
return ::Legion::Crypt::JWT if defined?(::Legion::Crypt::JWT) && ::Legion::Crypt::JWT.respond_to?(:verify)
|
|
175
|
+
|
|
176
|
+
raise TokenError, 'Legion::Crypt::JWT.verify unavailable'
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def symbolize_keys(hash)
|
|
180
|
+
return {} unless hash.respond_to?(:each)
|
|
181
|
+
|
|
182
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
183
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def canonical_value(value)
|
|
188
|
+
case value
|
|
189
|
+
when Hash
|
|
190
|
+
value.each_with_object({}) do |(key, child), result|
|
|
191
|
+
result[key.to_s] = canonical_value(child)
|
|
192
|
+
end.sort.to_h
|
|
193
|
+
when Array
|
|
194
|
+
value.map { |child| canonical_value(child) }
|
|
195
|
+
when Symbol
|
|
196
|
+
value.to_s
|
|
197
|
+
else
|
|
198
|
+
value
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
|
|
5
|
+
require_relative 'settings'
|
|
6
|
+
require_relative 'token_validator'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Llm
|
|
11
|
+
module Fleet
|
|
12
|
+
# Applies responder-side policy and dispatches a fleet request to a local lex-llm provider.
|
|
13
|
+
module WorkerExecution
|
|
14
|
+
class PolicyError < StandardError; end
|
|
15
|
+
|
|
16
|
+
@idempotency_keys = Concurrent::Map.new
|
|
17
|
+
@idempotency_mutex = Mutex.new
|
|
18
|
+
|
|
19
|
+
module_function
|
|
20
|
+
|
|
21
|
+
def call(envelope:, provider:)
|
|
22
|
+
claims = nil
|
|
23
|
+
idempotency_key = nil
|
|
24
|
+
claims = validate_identity!(envelope)
|
|
25
|
+
validate_policy!(envelope)
|
|
26
|
+
idempotency_key = validate_idempotency!(envelope)
|
|
27
|
+
response = dispatch_local_provider!(envelope: envelope, provider: provider)
|
|
28
|
+
mark_idempotency_success!(idempotency_key) if idempotency_key
|
|
29
|
+
TokenValidator.mark_replay!(claims[:jti]) if claims.is_a?(Hash)
|
|
30
|
+
response
|
|
31
|
+
rescue TokenError => e
|
|
32
|
+
release_idempotency!(idempotency_key) if idempotency_key
|
|
33
|
+
release_replay!(claims)
|
|
34
|
+
raise PolicyError, e.message
|
|
35
|
+
rescue StandardError
|
|
36
|
+
release_idempotency!(idempotency_key) if idempotency_key
|
|
37
|
+
release_replay!(claims)
|
|
38
|
+
raise
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def validate_identity!(envelope)
|
|
42
|
+
return true unless responder_setting(:require_auth, default: true)
|
|
43
|
+
|
|
44
|
+
TokenValidator.validate!(token: envelope_value(envelope, :signed_token), envelope: envelope)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def validate_policy!(_envelope)
|
|
48
|
+
return true unless responder_setting(:require_policy, default: false)
|
|
49
|
+
|
|
50
|
+
raise PolicyError, 'fleet responder policy enforcement unavailable'
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def validate_idempotency!(envelope)
|
|
54
|
+
return nil unless responder_setting(:require_idempotency, default: true)
|
|
55
|
+
|
|
56
|
+
key = envelope_value(envelope, :idempotency_key)
|
|
57
|
+
raise PolicyError, 'fleet idempotency_key is required' if key.to_s.empty?
|
|
58
|
+
|
|
59
|
+
reserve_idempotency_key!(key.to_s)
|
|
60
|
+
key.to_s
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def dispatch_local_provider!(envelope:, provider:)
|
|
64
|
+
provider = provider.call(envelope) if provider.respond_to?(:call) && !provider.respond_to?(:chat)
|
|
65
|
+
operation = envelope_value(envelope, :operation).to_sym
|
|
66
|
+
params = normalize_hash(envelope_value(envelope, :params) || {})
|
|
67
|
+
model = envelope_value(envelope, :model)
|
|
68
|
+
|
|
69
|
+
case operation
|
|
70
|
+
when :chat
|
|
71
|
+
provider.chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
72
|
+
when :stream
|
|
73
|
+
provider.stream_chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
74
|
+
when :embed
|
|
75
|
+
provider.embed(text: params[:text], model: model, **except(params, :text))
|
|
76
|
+
when :count_tokens
|
|
77
|
+
provider.count_tokens(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
78
|
+
else
|
|
79
|
+
raise PolicyError, "unsupported fleet operation: #{operation}"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def reset_idempotency_cache!
|
|
84
|
+
@idempotency_keys = Concurrent::Map.new
|
|
85
|
+
@idempotency_mutex = Mutex.new
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def mark_idempotency_success!(key)
|
|
89
|
+
@idempotency_mutex.synchronize do
|
|
90
|
+
@idempotency_keys[key.to_s] = { state: :complete, expires_at: Time.now.to_i + idempotency_ttl_seconds }
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def release_idempotency!(key)
|
|
95
|
+
@idempotency_mutex.synchronize { @idempotency_keys.delete(key.to_s) }
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def release_replay!(claims)
|
|
99
|
+
return unless claims.is_a?(Hash) && claims[:jti]
|
|
100
|
+
|
|
101
|
+
TokenValidator.release_replay!(claims[:jti])
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def purge_idempotency_cache!
|
|
105
|
+
@idempotency_mutex.synchronize do
|
|
106
|
+
now = Time.now.to_i
|
|
107
|
+
@idempotency_keys.each_pair do |key, entry|
|
|
108
|
+
@idempotency_keys.delete(key) if entry[:expires_at] <= now
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def reserve_idempotency_key!(key)
|
|
114
|
+
@idempotency_mutex.synchronize do
|
|
115
|
+
now = Time.now.to_i
|
|
116
|
+
existing = @idempotency_keys[key]
|
|
117
|
+
raise PolicyError, 'duplicate fleet idempotency key' if existing && existing[:expires_at] > now
|
|
118
|
+
|
|
119
|
+
@idempotency_keys[key] = { state: :inflight, expires_at: now + idempotency_ttl_seconds }
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def idempotency_ttl_seconds
|
|
124
|
+
ttl = responder_setting(:idempotency_ttl_seconds, default: 600).to_i
|
|
125
|
+
ttl.positive? ? ttl : 600
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def responder_setting(key, default:)
|
|
129
|
+
value = Settings.value(:fleet, :responder, key, default: nil)
|
|
130
|
+
return auth_required? if key == :require_auth && value.nil?
|
|
131
|
+
return default if value.nil?
|
|
132
|
+
|
|
133
|
+
value
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def auth_required?
|
|
137
|
+
Settings.value(:fleet, :auth, :require_signed_token, default: true) != false
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def envelope_value(envelope, key)
|
|
141
|
+
return nil unless envelope.respond_to?(:key?)
|
|
142
|
+
|
|
143
|
+
envelope[key] || envelope[key.to_s]
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def normalize_hash(hash)
|
|
147
|
+
return {} unless hash.respond_to?(:each)
|
|
148
|
+
|
|
149
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
150
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def except(hash, *keys)
|
|
155
|
+
exclusions = keys.map(&:to_sym)
|
|
156
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
157
|
+
normalized_key = key.respond_to?(:to_sym) ? key.to_sym : key
|
|
158
|
+
result[normalized_key] = value unless exclusions.include?(normalized_key)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -129,7 +129,14 @@ module Legion
|
|
|
129
129
|
audience: 'lex-llm-fleet-worker',
|
|
130
130
|
algorithm: 'HS256',
|
|
131
131
|
accepted_issuers: ['legion-llm'],
|
|
132
|
-
max_clock_skew_seconds: 30
|
|
132
|
+
max_clock_skew_seconds: 30,
|
|
133
|
+
replay_ttl_seconds: 600
|
|
134
|
+
},
|
|
135
|
+
responder: {
|
|
136
|
+
require_auth: nil,
|
|
137
|
+
require_policy: false,
|
|
138
|
+
require_idempotency: true,
|
|
139
|
+
idempotency_ttl_seconds: 600
|
|
133
140
|
}
|
|
134
141
|
}
|
|
135
142
|
}
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -24,6 +24,20 @@ dependencies:
|
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: concurrent-ruby
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.2'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.2'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: event_stream_parser
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,6 +108,20 @@ dependencies:
|
|
|
94
108
|
- - ">="
|
|
95
109
|
- !ruby/object:Gem::Version
|
|
96
110
|
version: '1'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: legion-crypt
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 1.5.1
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 1.5.1
|
|
97
125
|
- !ruby/object:Gem::Dependency
|
|
98
126
|
name: legion-json
|
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -229,7 +257,12 @@ files:
|
|
|
229
257
|
- lib/legion/extensions/llm/fleet/default_exchange_reply.rb
|
|
230
258
|
- lib/legion/extensions/llm/fleet/envelope_validation.rb
|
|
231
259
|
- lib/legion/extensions/llm/fleet/protocol.rb
|
|
260
|
+
- lib/legion/extensions/llm/fleet/provider_responder.rb
|
|
232
261
|
- lib/legion/extensions/llm/fleet/publish_safety.rb
|
|
262
|
+
- lib/legion/extensions/llm/fleet/settings.rb
|
|
263
|
+
- lib/legion/extensions/llm/fleet/token_error.rb
|
|
264
|
+
- lib/legion/extensions/llm/fleet/token_validator.rb
|
|
265
|
+
- lib/legion/extensions/llm/fleet/worker_execution.rb
|
|
233
266
|
- lib/legion/extensions/llm/image.rb
|
|
234
267
|
- lib/legion/extensions/llm/message.rb
|
|
235
268
|
- lib/legion/extensions/llm/mime_type.rb
|