lex-llm 0.3.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +49 -0
- data/README.md +18 -2
- data/lex-llm.gemspec +3 -0
- data/lib/legion/extensions/llm/auto_registration.rb +7 -36
- data/lib/legion/extensions/llm/embedding.rb +1 -1
- data/lib/legion/extensions/llm/error.rb +14 -0
- data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
- data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
- data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +304 -0
- data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
- data/lib/legion/extensions/llm/fleet/settings.rb +66 -0
- data/lib/legion/extensions/llm/fleet/token_error.rb +11 -0
- data/lib/legion/extensions/llm/fleet/token_validator.rb +205 -0
- data/lib/legion/extensions/llm/fleet/worker_execution.rb +165 -0
- data/lib/legion/extensions/llm/message.rb +9 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
- data/lib/legion/extensions/llm/provider.rb +198 -4
- data/lib/legion/extensions/llm/provider_contract.rb +21 -0
- data/lib/legion/extensions/llm/provider_settings.rb +18 -1
- data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
- data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
- data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
- data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +38 -11
- metadata +62 -1
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Fleet
|
|
7
|
+
# Publish-result helpers kept local to fleet messages so they work with older legion-transport releases.
|
|
8
|
+
module PublishSafety
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def return_publish_result?(options)
|
|
12
|
+
options[:return_result] == true || options[:mandatory] == true || options[:publisher_confirm] == true ||
|
|
13
|
+
options[:spool] == false
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def install_return_listener(exchange_dest, options, return_state)
|
|
17
|
+
return unless options[:mandatory] == true
|
|
18
|
+
|
|
19
|
+
return_channel = publish_channel(exchange_dest)
|
|
20
|
+
return unless return_channel.respond_to?(:on_return)
|
|
21
|
+
|
|
22
|
+
expected_correlation_id = correlation_id
|
|
23
|
+
expected_message_id = message_id
|
|
24
|
+
return_channel.on_return do |return_info, properties, _content|
|
|
25
|
+
next unless returned_message_matches?(
|
|
26
|
+
properties,
|
|
27
|
+
correlation_id: expected_correlation_id,
|
|
28
|
+
message_id: expected_message_id
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
record_return!(return_state, return_info)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def returned_message_matches?(properties, correlation_id:, message_id:)
|
|
36
|
+
return false if property_mismatch?(properties, :correlation_id, correlation_id)
|
|
37
|
+
return false if property_mismatch?(properties, :message_id, message_id)
|
|
38
|
+
|
|
39
|
+
true
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def property_mismatch?(properties, key, expected)
|
|
43
|
+
return false unless expected
|
|
44
|
+
return false unless properties.respond_to?(key)
|
|
45
|
+
|
|
46
|
+
value = properties.public_send(key)
|
|
47
|
+
value && value != expected
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def record_return!(return_state, return_info)
|
|
51
|
+
return_state[:returned] = true
|
|
52
|
+
return_state[:reply_code] = return_info.reply_code if return_info.respond_to?(:reply_code)
|
|
53
|
+
return_state[:reply_text] = return_info.reply_text if return_info.respond_to?(:reply_text)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def prepare_publisher_confirms(exchange_dest, options)
|
|
57
|
+
return unless options[:publisher_confirm] == true
|
|
58
|
+
|
|
59
|
+
confirm_channel = publish_channel(exchange_dest)
|
|
60
|
+
confirm_channel.confirm_select if confirm_channel.respond_to?(:confirm_select)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def publish_result(exchange_dest, options, return_state)
|
|
64
|
+
status = confirm_publish(exchange_dest, options)
|
|
65
|
+
status = :unroutable if return_state[:returned]
|
|
66
|
+
{
|
|
67
|
+
status: status,
|
|
68
|
+
accepted: status == :accepted,
|
|
69
|
+
exchange: exchange_name(exchange_dest),
|
|
70
|
+
routing_key: options[:routing_key] || routing_key || '',
|
|
71
|
+
message_id: message_id,
|
|
72
|
+
return_reply_code: return_state[:reply_code],
|
|
73
|
+
return_reply_text: return_state[:reply_text],
|
|
74
|
+
correlation_id: correlation_id
|
|
75
|
+
}.compact
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def publish_failure_result(status, error, options)
|
|
79
|
+
{
|
|
80
|
+
status: status,
|
|
81
|
+
accepted: false,
|
|
82
|
+
error_class: error.class.name,
|
|
83
|
+
error: error.message,
|
|
84
|
+
routing_key: options[:routing_key] || routing_key || '',
|
|
85
|
+
message_id: message_id,
|
|
86
|
+
correlation_id: correlation_id
|
|
87
|
+
}.compact
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def confirm_publish(exchange_dest, options)
|
|
91
|
+
return :accepted unless options[:publisher_confirm] == true
|
|
92
|
+
|
|
93
|
+
confirm_channel = publish_channel(exchange_dest)
|
|
94
|
+
return :accepted unless confirm_channel.respond_to?(:wait_for_confirms)
|
|
95
|
+
|
|
96
|
+
timeout = options[:publish_confirm_timeout_ms]
|
|
97
|
+
confirmed = if timeout
|
|
98
|
+
confirm_channel.wait_for_confirms(timeout.to_f / 1000.0)
|
|
99
|
+
else
|
|
100
|
+
confirm_channel.wait_for_confirms
|
|
101
|
+
end
|
|
102
|
+
confirmed == false ? :nacked : :accepted
|
|
103
|
+
rescue Timeout::Error => e
|
|
104
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.fleet.publish.confirm')
|
|
105
|
+
:confirm_timeout
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def publish_channel(exchange_dest)
|
|
109
|
+
return exchange_dest.channel if exchange_dest.respond_to?(:channel)
|
|
110
|
+
|
|
111
|
+
channel
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def exchange_name(exchange_dest)
|
|
115
|
+
return exchange_dest.name if exchange_dest.respond_to?(:name)
|
|
116
|
+
|
|
117
|
+
exchange_dest.to_s
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Fleet
|
|
7
|
+
# Reads fleet settings from Legion::Settings when available, falling back to lex-llm defaults.
|
|
8
|
+
module Settings
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def value(*path, default:)
|
|
12
|
+
configured_llm_settings.each do |configured|
|
|
13
|
+
found = dig(configured, *path)
|
|
14
|
+
return found unless found.nil?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
fallback = dig(default_settings, *path)
|
|
18
|
+
fallback.nil? ? default : fallback
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def configured_llm_settings
|
|
22
|
+
return [] unless defined?(::Legion::Settings) && ::Legion::Settings.respond_to?(:[])
|
|
23
|
+
|
|
24
|
+
configured = []
|
|
25
|
+
extensions = safe_fetch(::Legion::Settings, :extensions)
|
|
26
|
+
extension_llm = dig(extensions, :llm)
|
|
27
|
+
configured << extension_llm if extension_llm.respond_to?(:key?)
|
|
28
|
+
|
|
29
|
+
llm = safe_fetch(::Legion::Settings, :llm)
|
|
30
|
+
configured << llm if llm.respond_to?(:key?)
|
|
31
|
+
configured
|
|
32
|
+
rescue StandardError
|
|
33
|
+
[]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def dig(hash, *keys)
|
|
37
|
+
keys.reduce(hash) do |current, key|
|
|
38
|
+
break nil unless current.respond_to?(:key?)
|
|
39
|
+
|
|
40
|
+
symbol_key = key.respond_to?(:to_sym) ? key.to_sym : key
|
|
41
|
+
string_key = key.to_s
|
|
42
|
+
if current.key?(symbol_key)
|
|
43
|
+
current[symbol_key]
|
|
44
|
+
elsif current.key?(string_key)
|
|
45
|
+
current[string_key]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def safe_fetch(source, key)
|
|
51
|
+
source[key] || source[key.to_s]
|
|
52
|
+
rescue StandardError
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def default_settings
|
|
57
|
+
return ::Legion::Extensions::Llm.default_settings if
|
|
58
|
+
::Legion::Extensions::Llm.respond_to?(:default_settings)
|
|
59
|
+
|
|
60
|
+
{}
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
require_relative 'settings'
|
|
7
|
+
require_relative 'token_error'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Fleet
|
|
13
|
+
# Verifies responder-side fleet JWTs and prevents replay on provider nodes.
|
|
14
|
+
module TokenValidator
|
|
15
|
+
@seen_jtis = Concurrent::Map.new
|
|
16
|
+
@replay_mutex = Mutex.new
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
def validate!(token:, envelope:, record_replay: true)
|
|
21
|
+
raise TokenError, 'fleet token is required' if token.to_s.empty?
|
|
22
|
+
|
|
23
|
+
claims = symbolize_keys(jwt_module.verify(
|
|
24
|
+
token,
|
|
25
|
+
verification_key: signing_key,
|
|
26
|
+
issuer: issuer,
|
|
27
|
+
algorithm: algorithm,
|
|
28
|
+
verify_issuer: false
|
|
29
|
+
))
|
|
30
|
+
validate_registered_claims!(claims)
|
|
31
|
+
validate_request_expiry!(claims)
|
|
32
|
+
validate_envelope_claims!(claims, symbolize_keys(envelope || {}))
|
|
33
|
+
record_replay ? reserve_replay!(claims[:jti]) : ensure_not_replayed!(claims[:jti])
|
|
34
|
+
claims
|
|
35
|
+
rescue TokenError
|
|
36
|
+
raise
|
|
37
|
+
rescue StandardError => e
|
|
38
|
+
raise TokenError, "fleet token verification failed: #{e.message}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def reset_replay_cache!
|
|
42
|
+
@seen_jtis = Concurrent::Map.new
|
|
43
|
+
@replay_mutex = Mutex.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate_registered_claims!(claims)
|
|
47
|
+
now = Time.now.to_i
|
|
48
|
+
raise TokenError, 'fleet token issuer mismatch' unless accepted_issuer?(claims[:iss])
|
|
49
|
+
raise TokenError, 'fleet token audience mismatch' unless claims[:aud].to_s == audience
|
|
50
|
+
if claims[:exp].nil? || claims[:exp].to_i + clock_skew_seconds <= now
|
|
51
|
+
raise TokenError,
|
|
52
|
+
'fleet token expired'
|
|
53
|
+
end
|
|
54
|
+
if claims[:nbf].nil? || claims[:nbf].to_i - clock_skew_seconds > now
|
|
55
|
+
raise TokenError,
|
|
56
|
+
'fleet token not yet valid'
|
|
57
|
+
end
|
|
58
|
+
raise TokenError, 'fleet token missing jti' if claims[:jti].to_s.empty?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def validate_request_expiry!(claims)
|
|
62
|
+
expires_at = claims[:expires_at]
|
|
63
|
+
raise TokenError, 'fleet request expires_at is required' if expires_at.to_s.empty?
|
|
64
|
+
|
|
65
|
+
expires = Time.iso8601(expires_at.to_s)
|
|
66
|
+
raise TokenError, 'fleet request expired' if expires + clock_skew_seconds <= Time.now.utc
|
|
67
|
+
rescue ArgumentError
|
|
68
|
+
raise TokenError, 'fleet request expires_at is invalid'
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def validate_envelope_claims!(claims, envelope)
|
|
72
|
+
%i[
|
|
73
|
+
request_id correlation_id idempotency_key operation provider provider_instance
|
|
74
|
+
model reply_to message_context params caller trace_context timeout_seconds expires_at
|
|
75
|
+
].each do |key|
|
|
76
|
+
expected = canonical_value(envelope[key])
|
|
77
|
+
actual = canonical_value(claims[key])
|
|
78
|
+
raise TokenError, "fleet token #{key} claim mismatch" unless actual == expected
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def reserve_replay!(jti)
|
|
83
|
+
@replay_mutex.synchronize do
|
|
84
|
+
now = Time.now.to_i
|
|
85
|
+
purge_replay_cache_locked!(now)
|
|
86
|
+
existing = @seen_jtis[jti.to_s]
|
|
87
|
+
raise TokenError, 'fleet token replay detected' if active_replay?(existing, now)
|
|
88
|
+
|
|
89
|
+
@seen_jtis[jti.to_s] = replay_entry(:inflight, now)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def mark_replay!(jti)
|
|
94
|
+
@replay_mutex.synchronize do
|
|
95
|
+
@seen_jtis[jti.to_s] = replay_entry(:complete)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def release_replay!(jti)
|
|
100
|
+
@replay_mutex.synchronize do
|
|
101
|
+
entry = @seen_jtis[jti.to_s]
|
|
102
|
+
@seen_jtis.delete(jti.to_s) if entry.nil? || entry[:state] == :inflight
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def ensure_not_replayed!(jti)
|
|
107
|
+
@replay_mutex.synchronize do
|
|
108
|
+
now = Time.now.to_i
|
|
109
|
+
purge_replay_cache_locked!(now)
|
|
110
|
+
raise TokenError, 'fleet token replay detected' if active_replay?(@seen_jtis[jti.to_s], now)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def purge_replay_cache!
|
|
115
|
+
@replay_mutex.synchronize { purge_replay_cache_locked!(Time.now.to_i) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def purge_replay_cache_locked!(now)
|
|
119
|
+
@seen_jtis.each_pair { |jti, entry| @seen_jtis.delete(jti) unless active_replay?(entry, now) }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def active_replay?(entry, now)
|
|
123
|
+
entry && entry[:expires_at] > now
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def replay_entry(state, now = Time.now.to_i)
|
|
127
|
+
{ state: state, expires_at: now + replay_ttl_seconds }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def replay_ttl_seconds
|
|
131
|
+
ttl = Settings.value(:fleet, :auth, :replay_ttl_seconds, default: 600).to_i
|
|
132
|
+
ttl.positive? ? ttl : 600
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def accepted_issuer?(value)
|
|
136
|
+
accepted_issuers.map(&:to_s).include?(value.to_s)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def accepted_issuers
|
|
140
|
+
issuers = Settings.value(:fleet, :auth, :accepted_issuers, default: [issuer])
|
|
141
|
+
issuers = [issuer] if Array(issuers).empty?
|
|
142
|
+
Array(issuers)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def clock_skew_seconds
|
|
146
|
+
Settings.value(:fleet, :auth, :max_clock_skew_seconds, default: 30).to_i
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def issuer
|
|
150
|
+
Settings.value(:fleet, :auth, :issuer, default: 'legion-llm')
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def audience
|
|
154
|
+
Settings.value(:fleet, :auth, :audience, default: 'lex-llm-fleet-worker')
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def algorithm
|
|
158
|
+
Settings.value(:fleet, :auth, :algorithm, default: 'HS256')
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def signing_key
|
|
162
|
+
if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
|
|
163
|
+
return ::Legion::Crypt.cluster_secret
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
raise TokenError, 'no signing key available - Legion::Crypt not initialized'
|
|
167
|
+
rescue TokenError
|
|
168
|
+
raise
|
|
169
|
+
rescue StandardError => e
|
|
170
|
+
raise TokenError, "no signing key available: #{e.message}"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def jwt_module
|
|
174
|
+
return ::Legion::Crypt::JWT if defined?(::Legion::Crypt::JWT) && ::Legion::Crypt::JWT.respond_to?(:verify)
|
|
175
|
+
|
|
176
|
+
raise TokenError, 'Legion::Crypt::JWT.verify unavailable'
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def symbolize_keys(hash)
|
|
180
|
+
return {} unless hash.respond_to?(:each)
|
|
181
|
+
|
|
182
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
183
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def canonical_value(value)
|
|
188
|
+
case value
|
|
189
|
+
when Hash
|
|
190
|
+
value.each_with_object({}) do |(key, child), result|
|
|
191
|
+
result[key.to_s] = canonical_value(child)
|
|
192
|
+
end.sort.to_h
|
|
193
|
+
when Array
|
|
194
|
+
value.map { |child| canonical_value(child) }
|
|
195
|
+
when Symbol
|
|
196
|
+
value.to_s
|
|
197
|
+
else
|
|
198
|
+
value
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
|
|
5
|
+
require_relative 'settings'
|
|
6
|
+
require_relative 'token_validator'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Llm
|
|
11
|
+
module Fleet
|
|
12
|
+
# Applies responder-side policy and dispatches a fleet request to a local lex-llm provider.
|
|
13
|
+
module WorkerExecution
|
|
14
|
+
class PolicyError < StandardError; end
|
|
15
|
+
|
|
16
|
+
@idempotency_keys = Concurrent::Map.new
|
|
17
|
+
@idempotency_mutex = Mutex.new
|
|
18
|
+
|
|
19
|
+
module_function
|
|
20
|
+
|
|
21
|
+
def call(envelope:, provider:)
|
|
22
|
+
claims = nil
|
|
23
|
+
idempotency_key = nil
|
|
24
|
+
claims = validate_identity!(envelope)
|
|
25
|
+
validate_policy!(envelope)
|
|
26
|
+
idempotency_key = validate_idempotency!(envelope)
|
|
27
|
+
response = dispatch_local_provider!(envelope: envelope, provider: provider)
|
|
28
|
+
mark_idempotency_success!(idempotency_key) if idempotency_key
|
|
29
|
+
TokenValidator.mark_replay!(claims[:jti]) if claims.is_a?(Hash)
|
|
30
|
+
response
|
|
31
|
+
rescue TokenError => e
|
|
32
|
+
release_idempotency!(idempotency_key) if idempotency_key
|
|
33
|
+
release_replay!(claims)
|
|
34
|
+
raise PolicyError, e.message
|
|
35
|
+
rescue StandardError
|
|
36
|
+
release_idempotency!(idempotency_key) if idempotency_key
|
|
37
|
+
release_replay!(claims)
|
|
38
|
+
raise
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def validate_identity!(envelope)
|
|
42
|
+
return true unless responder_setting(:require_auth, default: true)
|
|
43
|
+
|
|
44
|
+
TokenValidator.validate!(token: envelope_value(envelope, :signed_token), envelope: envelope)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def validate_policy!(_envelope)
|
|
48
|
+
return true unless responder_setting(:require_policy, default: false)
|
|
49
|
+
|
|
50
|
+
raise PolicyError, 'fleet responder policy enforcement unavailable'
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def validate_idempotency!(envelope)
|
|
54
|
+
return nil unless responder_setting(:require_idempotency, default: true)
|
|
55
|
+
|
|
56
|
+
key = envelope_value(envelope, :idempotency_key)
|
|
57
|
+
raise PolicyError, 'fleet idempotency_key is required' if key.to_s.empty?
|
|
58
|
+
|
|
59
|
+
reserve_idempotency_key!(key.to_s)
|
|
60
|
+
key.to_s
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def dispatch_local_provider!(envelope:, provider:)
|
|
64
|
+
provider = provider.call(envelope) if provider.respond_to?(:call) && !provider.respond_to?(:chat)
|
|
65
|
+
operation = envelope_value(envelope, :operation).to_sym
|
|
66
|
+
params = normalize_hash(envelope_value(envelope, :params) || {})
|
|
67
|
+
model = envelope_value(envelope, :model)
|
|
68
|
+
|
|
69
|
+
case operation
|
|
70
|
+
when :chat
|
|
71
|
+
provider.chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
72
|
+
when :stream
|
|
73
|
+
provider.stream_chat(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
74
|
+
when :embed
|
|
75
|
+
provider.embed(text: params[:text], model: model, **except(params, :text))
|
|
76
|
+
when :count_tokens
|
|
77
|
+
provider.count_tokens(messages: params.fetch(:messages, []), model: model, **except(params, :messages))
|
|
78
|
+
else
|
|
79
|
+
raise PolicyError, "unsupported fleet operation: #{operation}"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def reset_idempotency_cache!
|
|
84
|
+
@idempotency_keys = Concurrent::Map.new
|
|
85
|
+
@idempotency_mutex = Mutex.new
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def mark_idempotency_success!(key)
|
|
89
|
+
@idempotency_mutex.synchronize do
|
|
90
|
+
@idempotency_keys[key.to_s] = { state: :complete, expires_at: Time.now.to_i + idempotency_ttl_seconds }
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def release_idempotency!(key)
|
|
95
|
+
@idempotency_mutex.synchronize { @idempotency_keys.delete(key.to_s) }
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def release_replay!(claims)
|
|
99
|
+
return unless claims.is_a?(Hash) && claims[:jti]
|
|
100
|
+
|
|
101
|
+
TokenValidator.release_replay!(claims[:jti])
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def purge_idempotency_cache!
|
|
105
|
+
@idempotency_mutex.synchronize do
|
|
106
|
+
now = Time.now.to_i
|
|
107
|
+
@idempotency_keys.each_pair do |key, entry|
|
|
108
|
+
@idempotency_keys.delete(key) if entry[:expires_at] <= now
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def reserve_idempotency_key!(key)
|
|
114
|
+
@idempotency_mutex.synchronize do
|
|
115
|
+
now = Time.now.to_i
|
|
116
|
+
existing = @idempotency_keys[key]
|
|
117
|
+
raise PolicyError, 'duplicate fleet idempotency key' if existing && existing[:expires_at] > now
|
|
118
|
+
|
|
119
|
+
@idempotency_keys[key] = { state: :inflight, expires_at: now + idempotency_ttl_seconds }
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def idempotency_ttl_seconds
|
|
124
|
+
ttl = responder_setting(:idempotency_ttl_seconds, default: 600).to_i
|
|
125
|
+
ttl.positive? ? ttl : 600
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def responder_setting(key, default:)
|
|
129
|
+
value = Settings.value(:fleet, :responder, key, default: nil)
|
|
130
|
+
return auth_required? if key == :require_auth && value.nil?
|
|
131
|
+
return default if value.nil?
|
|
132
|
+
|
|
133
|
+
value
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def auth_required?
|
|
137
|
+
Settings.value(:fleet, :auth, :require_signed_token, default: true) != false
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def envelope_value(envelope, key)
|
|
141
|
+
return nil unless envelope.respond_to?(:key?)
|
|
142
|
+
|
|
143
|
+
envelope[key] || envelope[key.to_s]
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def normalize_hash(hash)
|
|
147
|
+
return {} unless hash.respond_to?(:each)
|
|
148
|
+
|
|
149
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
150
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = value
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def except(hash, *keys)
|
|
155
|
+
exclusions = keys.map(&:to_sym)
|
|
156
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
157
|
+
normalized_key = key.respond_to?(:to_sym) ? key.to_sym : key
|
|
158
|
+
result[normalized_key] = value unless exclusions.include?(normalized_key)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -80,12 +80,18 @@ module Legion
|
|
|
80
80
|
content: content,
|
|
81
81
|
model_id: model_id,
|
|
82
82
|
tool_calls: tool_calls,
|
|
83
|
-
tool_call_id: tool_call_id
|
|
84
|
-
thinking: thinking&.text,
|
|
85
|
-
thinking_signature: thinking&.signature
|
|
83
|
+
tool_call_id: tool_call_id
|
|
86
84
|
}.merge(tokens ? tokens.to_h : {}).compact
|
|
87
85
|
end
|
|
88
86
|
|
|
87
|
+
def to_internal_h
|
|
88
|
+
to_h.merge(
|
|
89
|
+
thinking: thinking&.text,
|
|
90
|
+
thinking_signature: thinking&.signature,
|
|
91
|
+
raw: raw
|
|
92
|
+
).compact
|
|
93
|
+
end
|
|
94
|
+
|
|
89
95
|
def instance_variables
|
|
90
96
|
super - [:@raw]
|
|
91
97
|
end
|