lex-llm 0.3.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +49 -0
- data/README.md +18 -2
- data/lex-llm.gemspec +3 -0
- data/lib/legion/extensions/llm/auto_registration.rb +7 -36
- data/lib/legion/extensions/llm/embedding.rb +1 -1
- data/lib/legion/extensions/llm/error.rb +14 -0
- data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
- data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
- data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +304 -0
- data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
- data/lib/legion/extensions/llm/fleet/settings.rb +66 -0
- data/lib/legion/extensions/llm/fleet/token_error.rb +11 -0
- data/lib/legion/extensions/llm/fleet/token_validator.rb +205 -0
- data/lib/legion/extensions/llm/fleet/worker_execution.rb +165 -0
- data/lib/legion/extensions/llm/message.rb +9 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
- data/lib/legion/extensions/llm/provider.rb +198 -4
- data/lib/legion/extensions/llm/provider_contract.rb +21 -0
- data/lib/legion/extensions/llm/provider_settings.rb +18 -1
- data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
- data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
- data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
- data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +38 -11
- metadata +62 -1
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Responses
|
|
7
|
+
# Separates provider thinking markup from caller-visible text.
|
|
8
|
+
module ThinkingExtractor
|
|
9
|
+
Extraction = Struct.new(:content, :thinking, :signature, :metadata, keyword_init: true)
|
|
10
|
+
|
|
11
|
+
THINK_OPEN = '<think>'
|
|
12
|
+
THINK_CLOSE = '</think>'
|
|
13
|
+
THINK_PATTERN = %r{<think>(.*?)</think>}m
|
|
14
|
+
THINKING_METADATA_KEYS = %i[
|
|
15
|
+
reasoning_content reasoning thinking thinking_text thinking_signature reasoning_signature thought_signature
|
|
16
|
+
].freeze
|
|
17
|
+
RAW_METADATA_KEYS = %i[
|
|
18
|
+
raw raw_response response_body provider_body provider_response
|
|
19
|
+
].freeze
|
|
20
|
+
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
def extract(content, metadata: {})
|
|
24
|
+
metadata = normalized_metadata(metadata)
|
|
25
|
+
content, extracted_thinking = extract_from_content(content)
|
|
26
|
+
metadata_thinking = extract_metadata_thinking(metadata)
|
|
27
|
+
metadata_signature = extract_metadata_signature(metadata)
|
|
28
|
+
|
|
29
|
+
Extraction.new(
|
|
30
|
+
content: content,
|
|
31
|
+
thinking: compact_thinking([metadata_thinking, extracted_thinking]),
|
|
32
|
+
signature: metadata_signature,
|
|
33
|
+
metadata: scrub_metadata(metadata)
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def extract_from_content(content)
|
|
38
|
+
return [content, nil] unless content.is_a?(String)
|
|
39
|
+
|
|
40
|
+
clean = +''
|
|
41
|
+
thinking_parts = []
|
|
42
|
+
remaining = content.dup
|
|
43
|
+
|
|
44
|
+
remaining = consume_next_segment(remaining, clean, thinking_parts) until remaining.empty?
|
|
45
|
+
|
|
46
|
+
[clean.strip, compact_thinking(thinking_parts)]
|
|
47
|
+
end
|
|
48
|
+
private_class_method :extract_from_content
|
|
49
|
+
|
|
50
|
+
def consume_next_segment(remaining, clean, thinking_parts)
|
|
51
|
+
close_index = remaining.index(THINK_CLOSE)
|
|
52
|
+
open_index = remaining.index(THINK_OPEN)
|
|
53
|
+
|
|
54
|
+
if close_index && (open_index.nil? || close_index < open_index)
|
|
55
|
+
thinking_parts << remaining.slice(0, close_index)
|
|
56
|
+
remaining.slice((close_index + THINK_CLOSE.length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
57
|
+
elsif open_index
|
|
58
|
+
consume_open_think_segment(remaining, open_index, clean, thinking_parts)
|
|
59
|
+
else
|
|
60
|
+
clean << remaining
|
|
61
|
+
+''
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
private_class_method :consume_next_segment
|
|
65
|
+
|
|
66
|
+
def consume_open_think_segment(remaining, open_index, clean, thinking_parts)
|
|
67
|
+
clean << remaining.slice(0, open_index)
|
|
68
|
+
after_open = remaining.slice((open_index + THINK_OPEN.length)..).to_s
|
|
69
|
+
close_index = after_open.index(THINK_CLOSE)
|
|
70
|
+
unless close_index
|
|
71
|
+
thinking_parts << after_open
|
|
72
|
+
return +''
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
thinking_parts << after_open.slice(0, close_index)
|
|
76
|
+
after_open.slice((close_index + THINK_CLOSE.length)..).to_s
|
|
77
|
+
end
|
|
78
|
+
private_class_method :consume_open_think_segment
|
|
79
|
+
|
|
80
|
+
def extract_metadata_thinking(metadata)
|
|
81
|
+
compact_thinking(
|
|
82
|
+
[
|
|
83
|
+
metadata[:reasoning_content],
|
|
84
|
+
metadata[:reasoning],
|
|
85
|
+
metadata[:thinking],
|
|
86
|
+
metadata[:thinking_text]
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
private_class_method :extract_metadata_thinking
|
|
91
|
+
|
|
92
|
+
def extract_metadata_signature(metadata)
|
|
93
|
+
[
|
|
94
|
+
metadata[:thinking_signature],
|
|
95
|
+
metadata[:reasoning_signature],
|
|
96
|
+
metadata[:thought_signature]
|
|
97
|
+
].compact.map { |signature| signature.to_s.strip }.find { |signature| !signature.empty? }
|
|
98
|
+
end
|
|
99
|
+
private_class_method :extract_metadata_signature
|
|
100
|
+
|
|
101
|
+
def scrub_metadata(metadata)
|
|
102
|
+
metadata.each_with_object({}) do |(key, value), scrubbed|
|
|
103
|
+
normalized_key = normalize_metadata_key(key)
|
|
104
|
+
next if THINKING_METADATA_KEYS.include?(normalized_key) || RAW_METADATA_KEYS.include?(normalized_key)
|
|
105
|
+
|
|
106
|
+
scrubbed[normalized_key] = scrub_metadata_value(value)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
private_class_method :scrub_metadata
|
|
110
|
+
|
|
111
|
+
def normalize_metadata_key(key)
|
|
112
|
+
key.to_s
|
|
113
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
|
114
|
+
.tr('-', '_')
|
|
115
|
+
.downcase
|
|
116
|
+
.to_sym
|
|
117
|
+
end
|
|
118
|
+
private_class_method :normalize_metadata_key
|
|
119
|
+
|
|
120
|
+
def scrub_metadata_value(value)
|
|
121
|
+
case value
|
|
122
|
+
when Hash
|
|
123
|
+
scrub_metadata(normalized_metadata(value))
|
|
124
|
+
when Array
|
|
125
|
+
value.map { |item| scrub_metadata_value(item) }
|
|
126
|
+
when String
|
|
127
|
+
extract_from_content(value).first
|
|
128
|
+
else
|
|
129
|
+
value
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
private_class_method :scrub_metadata_value
|
|
133
|
+
|
|
134
|
+
def normalized_metadata(metadata)
|
|
135
|
+
return {} if metadata.nil?
|
|
136
|
+
|
|
137
|
+
metadata.to_h.transform_keys { |key| normalize_metadata_key(key) }
|
|
138
|
+
end
|
|
139
|
+
private_class_method :normalized_metadata
|
|
140
|
+
|
|
141
|
+
def compact_thinking(parts)
|
|
142
|
+
text = parts.compact.map { |part| part.to_s.strip }.reject(&:empty?).join
|
|
143
|
+
blank_to_nil(text)
|
|
144
|
+
end
|
|
145
|
+
private_class_method :compact_thinking
|
|
146
|
+
|
|
147
|
+
def blank_to_nil(value)
|
|
148
|
+
value.nil? || value.empty? ? nil : value
|
|
149
|
+
end
|
|
150
|
+
private_class_method :blank_to_nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -210,8 +210,11 @@ module Legion
|
|
|
210
210
|
end
|
|
211
211
|
|
|
212
212
|
def consume_non_think_content(remaining, start_tag, output)
|
|
213
|
+
unmatched_close = remaining.index('</think>')
|
|
213
214
|
start_index = remaining.index(start_tag)
|
|
214
|
-
if start_index
|
|
215
|
+
if unmatched_close && (start_index.nil? || unmatched_close < start_index)
|
|
216
|
+
consume_unmatched_think_close(remaining, unmatched_close)
|
|
217
|
+
elsif start_index
|
|
215
218
|
output << remaining.slice(0, start_index)
|
|
216
219
|
@inside_think_tag = true
|
|
217
220
|
remaining.slice((start_index + start_tag.length)..) || +''
|
|
@@ -223,6 +226,14 @@ module Legion
|
|
|
223
226
|
end
|
|
224
227
|
end
|
|
225
228
|
|
|
229
|
+
def consume_unmatched_think_close(remaining, close_index)
|
|
230
|
+
end_tag = '</think>'
|
|
231
|
+
thinking = remaining.slice(0, close_index)
|
|
232
|
+
@thinking_text << thinking
|
|
233
|
+
@last_thinking_delta << thinking
|
|
234
|
+
remaining.slice((close_index + end_tag.length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
235
|
+
end
|
|
236
|
+
|
|
226
237
|
def longest_suffix_prefix(text, tag)
|
|
227
238
|
max = [text.length, tag.length - 1].min
|
|
228
239
|
max.downto(1) do |len|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Llm
|
|
8
|
+
module Transport
|
|
9
|
+
module Exchanges
|
|
10
|
+
# Shared topic exchange for live LLM fleet requests and replies.
|
|
11
|
+
class Fleet < ::Legion::Transport::Exchange
|
|
12
|
+
def exchange_name
|
|
13
|
+
'llm.fleet'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def default_type
|
|
17
|
+
'topic'
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require_relative '../../fleet/default_exchange_reply'
|
|
5
|
+
require_relative '../../fleet/envelope_validation'
|
|
6
|
+
require_relative '../../fleet/protocol'
|
|
7
|
+
require_relative '../exchanges/fleet'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
module Messages
|
|
14
|
+
# Correlated protocol-v2 error envelope for fleet reply queues.
|
|
15
|
+
class FleetError < ::Legion::Transport::Message
|
|
16
|
+
include Fleet::DefaultExchangeReply
|
|
17
|
+
include Fleet::EnvelopeValidation
|
|
18
|
+
|
|
19
|
+
def type = Fleet::Protocol::ERROR_TYPE
|
|
20
|
+
def app_id = @options[:app_id] || 'lex-llm'
|
|
21
|
+
def reply_to = @options[:reply_to]
|
|
22
|
+
def correlation_id = @options[:correlation_id]
|
|
23
|
+
def message_id = @options[:message_id] ||= "llm_fleet_err_#{SecureRandom.uuid}"
|
|
24
|
+
|
|
25
|
+
def routing_key
|
|
26
|
+
@options[:reply_to] || raise(ArgumentError, 'reply_to is required')
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def validate
|
|
30
|
+
reject_legacy_options!
|
|
31
|
+
require_option!(:request_id)
|
|
32
|
+
require_option!(:correlation_id)
|
|
33
|
+
require_option!(:reply_to)
|
|
34
|
+
require_option!(:code)
|
|
35
|
+
require_protocol_version!
|
|
36
|
+
@valid = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def message
|
|
40
|
+
super.merge(
|
|
41
|
+
protocol_version: @options[:protocol_version] || Fleet::Protocol::VERSION,
|
|
42
|
+
request_id: @options[:request_id],
|
|
43
|
+
correlation_id: correlation_id,
|
|
44
|
+
idempotency_key: @options[:idempotency_key],
|
|
45
|
+
operation: @options[:operation],
|
|
46
|
+
provider: @options[:provider],
|
|
47
|
+
provider_instance: @options[:provider_instance] || @options[:instance],
|
|
48
|
+
model: @options[:model],
|
|
49
|
+
reply_to: reply_to,
|
|
50
|
+
message_context: @options[:message_context],
|
|
51
|
+
trace_context: @options[:trace_context],
|
|
52
|
+
code: @options[:code],
|
|
53
|
+
message: @options[:message],
|
|
54
|
+
error_class: @options[:error_class],
|
|
55
|
+
retryable: @options[:retryable],
|
|
56
|
+
metadata: @options[:metadata] || {}
|
|
57
|
+
).compact
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require_relative '../../fleet/envelope_validation'
|
|
5
|
+
require_relative '../../fleet/publish_safety'
|
|
6
|
+
require_relative '../../fleet/protocol'
|
|
7
|
+
require_relative '../exchanges/fleet'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
module Messages
|
|
14
|
+
# Strict protocol-v2 request envelope for outbound fleet work.
|
|
15
|
+
class FleetRequest < ::Legion::Transport::Message
|
|
16
|
+
include Fleet::EnvelopeValidation
|
|
17
|
+
include Fleet::PublishSafety
|
|
18
|
+
|
|
19
|
+
PRIORITY_MAP = { critical: 9, high: 7, normal: 5, low: 2 }.freeze
|
|
20
|
+
DEFAULT_PUBLISH_OPTIONS = {
|
|
21
|
+
mandatory: true,
|
|
22
|
+
publisher_confirm: true,
|
|
23
|
+
spool: false,
|
|
24
|
+
return_result: true
|
|
25
|
+
}.freeze
|
|
26
|
+
REQUIRED_OPTIONS = %i[
|
|
27
|
+
request_id correlation_id operation provider provider_instance model params reply_to
|
|
28
|
+
message_context caller trace_context signed_token timeout_seconds expires_at protocol_version
|
|
29
|
+
idempotency_key
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
def exchange = Exchanges::Fleet
|
|
33
|
+
def type = Fleet::Protocol::REQUEST_TYPE
|
|
34
|
+
def app_id = @options[:app_id] || 'lex-llm'
|
|
35
|
+
def reply_to = @options[:reply_to]
|
|
36
|
+
def correlation_id = @options[:correlation_id]
|
|
37
|
+
def message_id = @options[:message_id] ||= "llm_fleet_req_#{SecureRandom.uuid}"
|
|
38
|
+
|
|
39
|
+
def priority
|
|
40
|
+
PRIORITY_MAP.fetch(@options[:priority].to_sym, 5) if @options[:priority]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def routing_key
|
|
44
|
+
@options[:routing_key] || raise(ArgumentError, 'routing_key is required')
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def expiration
|
|
48
|
+
ttl = @options[:ttl] || @options[:timeout_seconds]
|
|
49
|
+
return super unless ttl
|
|
50
|
+
|
|
51
|
+
(Float(ttl) * 1000).ceil.to_s
|
|
52
|
+
rescue ArgumentError, TypeError
|
|
53
|
+
super
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def publish(options = nil)
|
|
57
|
+
raise unless @valid
|
|
58
|
+
|
|
59
|
+
requested_options = DEFAULT_PUBLISH_OPTIONS.merge(@options).merge(options || {})
|
|
60
|
+
return_result = return_publish_result?(requested_options)
|
|
61
|
+
publish_options = request_publish_options(requested_options)
|
|
62
|
+
validate_payload_size
|
|
63
|
+
exchange_dest = fleet_exchange
|
|
64
|
+
return_state = {}
|
|
65
|
+
install_return_listener(exchange_dest, requested_options, return_state)
|
|
66
|
+
prepare_publisher_confirms(exchange_dest, requested_options)
|
|
67
|
+
exchange_dest.publish(encode_message, **publish_options)
|
|
68
|
+
return nil unless return_result
|
|
69
|
+
|
|
70
|
+
publish_result(exchange_dest, requested_options.merge(publish_options), return_state)
|
|
71
|
+
rescue Bunny::ConnectionClosedError, Bunny::ChannelAlreadyClosed, Bunny::ChannelError,
|
|
72
|
+
Bunny::NetworkErrorWrapper, IOError, Timeout::Error => e
|
|
73
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.fleet.request.publish')
|
|
74
|
+
publish_failure_result(:failed, e, publish_options || requested_options || @options)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def validate
|
|
78
|
+
reject_legacy_options!
|
|
79
|
+
require_option!(:routing_key)
|
|
80
|
+
REQUIRED_OPTIONS.each { |key| require_option!(key) }
|
|
81
|
+
require_protocol_version!
|
|
82
|
+
@valid = true
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def message
|
|
86
|
+
super.merge(
|
|
87
|
+
protocol_version: @options[:protocol_version],
|
|
88
|
+
request_id: @options[:request_id],
|
|
89
|
+
correlation_id: correlation_id,
|
|
90
|
+
idempotency_key: @options[:idempotency_key],
|
|
91
|
+
operation: @options[:operation],
|
|
92
|
+
provider: @options[:provider],
|
|
93
|
+
provider_instance: @options[:provider_instance],
|
|
94
|
+
model: @options[:model],
|
|
95
|
+
params: @options[:params] || {},
|
|
96
|
+
reply_to: reply_to,
|
|
97
|
+
message_context: @options[:message_context],
|
|
98
|
+
caller: @options[:caller],
|
|
99
|
+
trace_context: @options[:trace_context],
|
|
100
|
+
signed_token: @options[:signed_token],
|
|
101
|
+
timeout_seconds: @options[:timeout_seconds],
|
|
102
|
+
expires_at: @options[:expires_at]
|
|
103
|
+
).compact
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def fleet_exchange
|
|
109
|
+
exchange_class = exchange
|
|
110
|
+
if exchange_class.respond_to?(:cached_instance)
|
|
111
|
+
exchange_class.cached_instance || exchange_class.new
|
|
112
|
+
elsif exchange_class.respond_to?(:new)
|
|
113
|
+
exchange_class.new
|
|
114
|
+
else
|
|
115
|
+
exchange_class
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def request_publish_options(options)
|
|
120
|
+
request_publish_envelope(options).tap do |envelope|
|
|
121
|
+
envelope[:mandatory] = true if options[:mandatory] == true
|
|
122
|
+
end.compact
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def request_publish_envelope(options)
|
|
126
|
+
{
|
|
127
|
+
routing_key: routing_key || '',
|
|
128
|
+
content_type: options[:content_type] || content_type,
|
|
129
|
+
content_encoding: options[:content_encoding] || content_encoding,
|
|
130
|
+
type: options[:type] || type,
|
|
131
|
+
priority: options[:priority] || priority,
|
|
132
|
+
expiration: options[:expiration] || expiration,
|
|
133
|
+
headers: request_headers(options),
|
|
134
|
+
persistent: request_persistent(options),
|
|
135
|
+
message_id: message_id,
|
|
136
|
+
correlation_id: correlation_id,
|
|
137
|
+
reply_to: reply_to,
|
|
138
|
+
app_id: options[:app_id] || app_id,
|
|
139
|
+
timestamp: timestamp
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def request_headers(options)
|
|
144
|
+
options[:headers] ? headers.merge(options[:headers]) : headers
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def request_persistent(options)
|
|
148
|
+
options.key?(:persistent) ? options[:persistent] : persistent
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require_relative '../../fleet/default_exchange_reply'
|
|
5
|
+
require_relative '../../fleet/envelope_validation'
|
|
6
|
+
require_relative '../../fleet/protocol'
|
|
7
|
+
require_relative '../exchanges/fleet'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
module Messages
|
|
14
|
+
# Correlated protocol-v2 response envelope for fleet reply queues.
|
|
15
|
+
class FleetResponse < ::Legion::Transport::Message
|
|
16
|
+
include Fleet::DefaultExchangeReply
|
|
17
|
+
include Fleet::EnvelopeValidation
|
|
18
|
+
|
|
19
|
+
def type = Fleet::Protocol::RESPONSE_TYPE
|
|
20
|
+
def app_id = @options[:app_id] || 'lex-llm'
|
|
21
|
+
def reply_to = @options[:reply_to]
|
|
22
|
+
def correlation_id = @options[:correlation_id]
|
|
23
|
+
def message_id = @options[:message_id] ||= "llm_fleet_res_#{SecureRandom.uuid}"
|
|
24
|
+
|
|
25
|
+
def routing_key
|
|
26
|
+
@options[:reply_to] || raise(ArgumentError, 'reply_to is required')
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def validate
|
|
30
|
+
reject_legacy_options!
|
|
31
|
+
require_option!(:request_id)
|
|
32
|
+
require_option!(:correlation_id)
|
|
33
|
+
require_option!(:reply_to)
|
|
34
|
+
require_protocol_version!
|
|
35
|
+
@valid = true
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def message
|
|
39
|
+
super.except(:thinking).merge(
|
|
40
|
+
protocol_version: @options[:protocol_version] || Fleet::Protocol::VERSION,
|
|
41
|
+
request_id: @options[:request_id],
|
|
42
|
+
correlation_id: correlation_id,
|
|
43
|
+
idempotency_key: @options[:idempotency_key],
|
|
44
|
+
operation: @options[:operation],
|
|
45
|
+
provider: @options[:provider],
|
|
46
|
+
provider_instance: @options[:provider_instance] || @options[:instance],
|
|
47
|
+
model: @options[:model],
|
|
48
|
+
reply_to: reply_to,
|
|
49
|
+
message_context: @options[:message_context],
|
|
50
|
+
trace_context: @options[:trace_context],
|
|
51
|
+
content: @options[:content],
|
|
52
|
+
tool_calls: @options[:tool_calls],
|
|
53
|
+
usage: @options[:usage] || {},
|
|
54
|
+
finish_reason: @options[:finish_reason],
|
|
55
|
+
metadata: @options[:metadata] || {}
|
|
56
|
+
).compact
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -110,20 +110,33 @@ module Legion
|
|
|
110
110
|
def self.default_settings
|
|
111
111
|
{
|
|
112
112
|
fleet: {
|
|
113
|
-
|
|
114
|
-
scheduler: :basic_get,
|
|
115
|
-
consumer_priority: 0,
|
|
116
|
-
queue_expires_ms: 60_000,
|
|
117
|
-
message_ttl_ms: 120_000,
|
|
118
|
-
queue_max_length: 100,
|
|
119
|
-
delivery_limit: 3,
|
|
120
|
-
consumer_ack_timeout_ms: 300_000,
|
|
121
|
-
endpoint: {
|
|
113
|
+
consumer: {
|
|
122
114
|
enabled: false,
|
|
115
|
+
scheduler: :basic_get,
|
|
116
|
+
consumer_priority: 0,
|
|
117
|
+
queue_expires_ms: 60_000,
|
|
118
|
+
message_ttl_ms: 120_000,
|
|
119
|
+
queue_max_length: 100,
|
|
120
|
+
delivery_limit: 3,
|
|
121
|
+
consumer_ack_timeout_ms: 90_000,
|
|
123
122
|
empty_lane_backoff_ms: 250,
|
|
124
123
|
idle_backoff_ms: 1_000,
|
|
125
|
-
max_consecutive_pulls_per_lane: 0
|
|
126
|
-
|
|
124
|
+
max_consecutive_pulls_per_lane: 0
|
|
125
|
+
},
|
|
126
|
+
auth: {
|
|
127
|
+
require_signed_token: true,
|
|
128
|
+
issuer: 'legion-llm',
|
|
129
|
+
audience: 'lex-llm-fleet-worker',
|
|
130
|
+
algorithm: 'HS256',
|
|
131
|
+
accepted_issuers: ['legion-llm'],
|
|
132
|
+
max_clock_skew_seconds: 30,
|
|
133
|
+
replay_ttl_seconds: 600
|
|
134
|
+
},
|
|
135
|
+
responder: {
|
|
136
|
+
require_auth: nil,
|
|
137
|
+
require_policy: false,
|
|
138
|
+
require_idempotency: true,
|
|
139
|
+
idempotency_ttl_seconds: 600
|
|
127
140
|
}
|
|
128
141
|
}
|
|
129
142
|
}
|
|
@@ -136,6 +149,20 @@ module Legion
|
|
|
136
149
|
require_relative 'llm/auto_registration'
|
|
137
150
|
require_relative 'llm/credential_sources'
|
|
138
151
|
loader.eager_load
|
|
152
|
+
|
|
153
|
+
module Transport
|
|
154
|
+
# Local autoloads for fleet exchange classes that depend on legion-transport.
|
|
155
|
+
module Exchanges
|
|
156
|
+
autoload :Fleet, File.expand_path('llm/transport/exchanges/fleet', __dir__)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Local autoloads for fleet message classes that depend on legion-transport.
|
|
160
|
+
module Messages
|
|
161
|
+
autoload :FleetRequest, File.expand_path('llm/transport/messages/fleet_request', __dir__)
|
|
162
|
+
autoload :FleetResponse, File.expand_path('llm/transport/messages/fleet_response', __dir__)
|
|
163
|
+
autoload :FleetError, File.expand_path('llm/transport/messages/fleet_error', __dir__)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
139
166
|
end
|
|
140
167
|
end
|
|
141
168
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3
|
|
4
|
+
version: 0.4.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -24,6 +24,20 @@ dependencies:
|
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: concurrent-ruby
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.2'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.2'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: event_stream_parser
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,6 +108,20 @@ dependencies:
|
|
|
94
108
|
- - ">="
|
|
95
109
|
- !ruby/object:Gem::Version
|
|
96
110
|
version: '1'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: legion-crypt
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 1.5.1
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 1.5.1
|
|
97
125
|
- !ruby/object:Gem::Dependency
|
|
98
126
|
name: legion-json
|
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -136,6 +164,20 @@ dependencies:
|
|
|
136
164
|
- - ">="
|
|
137
165
|
- !ruby/object:Gem::Version
|
|
138
166
|
version: 1.3.14
|
|
167
|
+
- !ruby/object:Gem::Dependency
|
|
168
|
+
name: legion-transport
|
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
|
170
|
+
requirements:
|
|
171
|
+
- - ">="
|
|
172
|
+
- !ruby/object:Gem::Version
|
|
173
|
+
version: 1.4.14
|
|
174
|
+
type: :runtime
|
|
175
|
+
prerelease: false
|
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
177
|
+
requirements:
|
|
178
|
+
- - ">="
|
|
179
|
+
- !ruby/object:Gem::Version
|
|
180
|
+
version: 1.4.14
|
|
139
181
|
- !ruby/object:Gem::Dependency
|
|
140
182
|
name: marcel
|
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -211,6 +253,16 @@ files:
|
|
|
211
253
|
- lib/legion/extensions/llm/credential_sources.rb
|
|
212
254
|
- lib/legion/extensions/llm/embedding.rb
|
|
213
255
|
- lib/legion/extensions/llm/error.rb
|
|
256
|
+
- lib/legion/extensions/llm/errors/unsupported_capability.rb
|
|
257
|
+
- lib/legion/extensions/llm/fleet/default_exchange_reply.rb
|
|
258
|
+
- lib/legion/extensions/llm/fleet/envelope_validation.rb
|
|
259
|
+
- lib/legion/extensions/llm/fleet/protocol.rb
|
|
260
|
+
- lib/legion/extensions/llm/fleet/provider_responder.rb
|
|
261
|
+
- lib/legion/extensions/llm/fleet/publish_safety.rb
|
|
262
|
+
- lib/legion/extensions/llm/fleet/settings.rb
|
|
263
|
+
- lib/legion/extensions/llm/fleet/token_error.rb
|
|
264
|
+
- lib/legion/extensions/llm/fleet/token_validator.rb
|
|
265
|
+
- lib/legion/extensions/llm/fleet/worker_execution.rb
|
|
214
266
|
- lib/legion/extensions/llm/image.rb
|
|
215
267
|
- lib/legion/extensions/llm/message.rb
|
|
216
268
|
- lib/legion/extensions/llm/mime_type.rb
|
|
@@ -226,9 +278,14 @@ files:
|
|
|
226
278
|
- lib/legion/extensions/llm/moderation.rb
|
|
227
279
|
- lib/legion/extensions/llm/provider.rb
|
|
228
280
|
- lib/legion/extensions/llm/provider/open_ai_compatible.rb
|
|
281
|
+
- lib/legion/extensions/llm/provider_contract.rb
|
|
229
282
|
- lib/legion/extensions/llm/provider_settings.rb
|
|
230
283
|
- lib/legion/extensions/llm/registry_event_builder.rb
|
|
231
284
|
- lib/legion/extensions/llm/registry_publisher.rb
|
|
285
|
+
- lib/legion/extensions/llm/responses/chat_response.rb
|
|
286
|
+
- lib/legion/extensions/llm/responses/embedding_response.rb
|
|
287
|
+
- lib/legion/extensions/llm/responses/stream_chunk.rb
|
|
288
|
+
- lib/legion/extensions/llm/responses/thinking_extractor.rb
|
|
232
289
|
- lib/legion/extensions/llm/routing.rb
|
|
233
290
|
- lib/legion/extensions/llm/routing/lane_key.rb
|
|
234
291
|
- lib/legion/extensions/llm/routing/model_offering.rb
|
|
@@ -241,8 +298,12 @@ files:
|
|
|
241
298
|
- lib/legion/extensions/llm/tool.rb
|
|
242
299
|
- lib/legion/extensions/llm/tool_call.rb
|
|
243
300
|
- lib/legion/extensions/llm/transcription.rb
|
|
301
|
+
- lib/legion/extensions/llm/transport/exchanges/fleet.rb
|
|
244
302
|
- lib/legion/extensions/llm/transport/exchanges/llm_registry.rb
|
|
245
303
|
- lib/legion/extensions/llm/transport/fleet_lane.rb
|
|
304
|
+
- lib/legion/extensions/llm/transport/messages/fleet_error.rb
|
|
305
|
+
- lib/legion/extensions/llm/transport/messages/fleet_request.rb
|
|
306
|
+
- lib/legion/extensions/llm/transport/messages/fleet_response.rb
|
|
246
307
|
- lib/legion/extensions/llm/transport/messages/registry_event.rb
|
|
247
308
|
- lib/legion/extensions/llm/utils.rb
|
|
248
309
|
- lib/legion/extensions/llm/version.rb
|