legion-llm 0.9.36 → 0.9.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/legion/llm/api/openai/responses.rb +12 -4
- data/lib/legion/llm/call/dispatch.rb +1 -0
- data/lib/legion/llm/call/lex_llm_adapter.rb +220 -0
- data/lib/legion/llm/inference/executor.rb +32 -0
- data/lib/legion/llm/inference/route_attempts.rb +35 -0
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 05ce805ec96361b4a033e7d14a5e9e49e80de7415c75b22fd2af44b41ae447e0
|
|
4
|
+
data.tar.gz: b28f87cc01e43a8165c41d72b315373948e0465094bbb103402ea4e5f66d37bc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 75a99d484b509a4f361b7fae7d3df534e17d6148a613e0e6c9c67b9a6315a73d3ec6830031cd840588c5feb5fd064433638b69191d89b3c0ebddcb9d333d0b62
|
|
7
|
+
data.tar.gz: 9bc998ea9c5e12ec2f3b545bc0b24dc44258aa42fc92882217aae59b5ca7c3c45594888584444c41479d93bc2015b435bfa371440b2b429277b93c05dff7a3dc
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.37] - 2026-05-22
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
- API: OpenAI Responses requests now dispatch to upstream `/v1/responses` through a native `:responses` provider capability instead of adapting Responses input through Chat Completions `stream_chat`, preserving upstream Responses streaming usage from `response.completed.response.usage`
|
|
7
|
+
|
|
3
8
|
## [0.9.36] - 2026-05-22
|
|
4
9
|
|
|
5
10
|
### Fixed
|
|
@@ -76,13 +76,13 @@ module Legion
|
|
|
76
76
|
'X-Accel-Buffering' => 'no'
|
|
77
77
|
|
|
78
78
|
stream do |out|
|
|
79
|
-
Responses.stream_response(out, executor, request_id: request_id, model: model)
|
|
79
|
+
Responses.stream_response(out, executor, request_id: request_id, model: model, upstream_body: body)
|
|
80
80
|
rescue StandardError => e
|
|
81
81
|
handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
|
|
82
82
|
out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
|
|
83
83
|
end
|
|
84
84
|
else
|
|
85
|
-
pipeline_response = executor.
|
|
85
|
+
pipeline_response = executor.call_responses(body: body, stream: false)
|
|
86
86
|
response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
|
|
87
87
|
|
|
88
88
|
log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
|
|
@@ -179,7 +179,7 @@ module Legion
|
|
|
179
179
|
}
|
|
180
180
|
end
|
|
181
181
|
|
|
182
|
-
def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
|
|
182
|
+
def self.stream_response(out, executor, request_id:, model:, upstream_body: nil) # rubocop:disable Metrics/MethodLength
|
|
183
183
|
created_at = Time.now.to_i
|
|
184
184
|
seq = 0
|
|
185
185
|
in_progress_response = { id: request_id, object: 'response', created_at: created_at,
|
|
@@ -218,7 +218,7 @@ module Legion
|
|
|
218
218
|
|
|
219
219
|
full_text = +''
|
|
220
220
|
|
|
221
|
-
pipeline_response = executor
|
|
221
|
+
pipeline_response = call_streaming_executor(executor, upstream_body: upstream_body) do |chunk|
|
|
222
222
|
text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
|
|
223
223
|
next if text.empty?
|
|
224
224
|
|
|
@@ -282,6 +282,14 @@ module Legion
|
|
|
282
282
|
log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
|
|
283
283
|
end
|
|
284
284
|
|
|
285
|
+
def self.call_streaming_executor(executor, upstream_body: nil, &)
|
|
286
|
+
if upstream_body && executor.respond_to?(:call_responses)
|
|
287
|
+
executor.call_responses(body: upstream_body, stream: true, &)
|
|
288
|
+
else
|
|
289
|
+
executor.call_stream(&)
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
285
293
|
def self.sse_event(name, payload)
|
|
286
294
|
"event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
|
|
287
295
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'event_stream_parser'
|
|
3
4
|
require 'legion/logging/helper'
|
|
4
5
|
|
|
5
6
|
module Legion
|
|
@@ -58,6 +59,24 @@ module Legion
|
|
|
58
59
|
end
|
|
59
60
|
end
|
|
60
61
|
|
|
62
|
+
def responses(model:, body:, messages:, stream: false, **opts, &)
|
|
63
|
+
payload = build_responses_payload(
|
|
64
|
+
body: body,
|
|
65
|
+
model: model,
|
|
66
|
+
messages: messages,
|
|
67
|
+
stream: stream,
|
|
68
|
+
system: opts[:system],
|
|
69
|
+
tools: opts[:tools]
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if stream
|
|
73
|
+
stream_responses_payload(payload, offering_metadata: opts[:offering_metadata], &)
|
|
74
|
+
else
|
|
75
|
+
response = provider.connection.post(responses_url, payload)
|
|
76
|
+
responses_hash_response(response.body, offering_metadata: opts[:offering_metadata])
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
61
80
|
def embed(model:, text:, dimensions: nil, **opts)
|
|
62
81
|
model_info = model_info(model, offering_metadata: opts[:offering_metadata])
|
|
63
82
|
response = provider.embed(
|
|
@@ -136,6 +155,207 @@ module Legion
|
|
|
136
155
|
end
|
|
137
156
|
end
|
|
138
157
|
|
|
158
|
+
def responses_url = '/v1/responses'
|
|
159
|
+
|
|
160
|
+
def build_responses_payload(body:, model:, messages:, stream:, system: nil, tools: nil)
|
|
161
|
+
payload = normalize_hash(body).dup
|
|
162
|
+
payload[:model] = model
|
|
163
|
+
payload[:stream] = stream
|
|
164
|
+
payload[:input] = responses_input(messages)
|
|
165
|
+
|
|
166
|
+
system_content = normalize_response_system(system)
|
|
167
|
+
payload[:instructions] = system_content if present_system?(system_content)
|
|
168
|
+
|
|
169
|
+
formatted_tools = responses_tools(tools)
|
|
170
|
+
payload[:tools] = formatted_tools if formatted_tools.any?
|
|
171
|
+
|
|
172
|
+
deep_compact(payload)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def responses_input(messages)
|
|
176
|
+
Array(messages).map do |message|
|
|
177
|
+
normalized = normalize_hash(message)
|
|
178
|
+
if normalized[:role].to_s == 'tool'
|
|
179
|
+
next({
|
|
180
|
+
type: 'function_call_output',
|
|
181
|
+
call_id: normalized[:tool_call_id].to_s,
|
|
182
|
+
output: normalize_message_content(normalized[:content]).to_s
|
|
183
|
+
})
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
{
|
|
187
|
+
role: normalized[:role]&.to_s || 'user',
|
|
188
|
+
content: normalize_message_content(normalized[:content]).to_s,
|
|
189
|
+
tool_call_id: normalized[:tool_call_id]
|
|
190
|
+
}.compact
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def normalize_response_system(system)
|
|
195
|
+
return nil if system.nil?
|
|
196
|
+
return system[:content] || system['content'] if system.is_a?(Hash)
|
|
197
|
+
|
|
198
|
+
system.to_s
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def responses_tools(tools)
|
|
202
|
+
normalize_tools(tools).values.map do |tool|
|
|
203
|
+
{
|
|
204
|
+
type: 'function',
|
|
205
|
+
name: tool.name.to_s,
|
|
206
|
+
description: tool.description.to_s,
|
|
207
|
+
parameters: tool.params_schema || { type: 'object', properties: {} }
|
|
208
|
+
}
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def deep_compact(value)
|
|
213
|
+
case value
|
|
214
|
+
when Hash
|
|
215
|
+
value.each_with_object({}) do |(key, hash_value), compacted|
|
|
216
|
+
compact_value = deep_compact(hash_value)
|
|
217
|
+
compacted[key] = compact_value unless compact_value.nil?
|
|
218
|
+
end
|
|
219
|
+
when Array
|
|
220
|
+
value.map { |entry| deep_compact(entry) }.compact
|
|
221
|
+
else
|
|
222
|
+
value
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def stream_responses_payload(payload, offering_metadata: nil, &block)
|
|
227
|
+
accumulator = build_responses_stream_accumulator
|
|
228
|
+
parser = EventStreamParser::Parser.new
|
|
229
|
+
|
|
230
|
+
response = provider.connection.post(responses_url, payload) do |req|
|
|
231
|
+
req.headers['Accept'] = 'text/event-stream'
|
|
232
|
+
attach_responses_stream_handler(req, parser, accumulator, block)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
responses_stream_response(accumulator, response.body, offering_metadata: offering_metadata)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def build_responses_stream_accumulator
|
|
239
|
+
{
|
|
240
|
+
content: +'',
|
|
241
|
+
model: nil,
|
|
242
|
+
usage: {},
|
|
243
|
+
completed: nil,
|
|
244
|
+
raw: nil
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def attach_responses_stream_handler(req, parser, accumulator, block)
|
|
249
|
+
handler = proc do |chunk, *_args|
|
|
250
|
+
parser.feed(chunk) do |_event, data|
|
|
251
|
+
handle_responses_stream_data(data, accumulator, block)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
if req.options.respond_to?(:on_data=)
|
|
256
|
+
req.options.on_data = handler
|
|
257
|
+
else
|
|
258
|
+
req.options[:on_data] = handler
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def handle_responses_stream_data(data, accumulator, block)
|
|
263
|
+
return if data == '[DONE]'
|
|
264
|
+
|
|
265
|
+
parsed = Legion::JSON.parse(data, symbolize_names: false)
|
|
266
|
+
return unless parsed.is_a?(Hash)
|
|
267
|
+
|
|
268
|
+
accumulator[:raw] = parsed
|
|
269
|
+
case parsed['type']
|
|
270
|
+
when 'response.output_text.delta'
|
|
271
|
+
accumulate_responses_text_delta(parsed, accumulator, block)
|
|
272
|
+
when 'response.completed'
|
|
273
|
+
response = parsed['response'] || {}
|
|
274
|
+
accumulator[:completed] = response
|
|
275
|
+
accumulator[:model] = response['model'] if response['model']
|
|
276
|
+
accumulator[:usage] = responses_usage(response['usage'])
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def accumulate_responses_text_delta(parsed, accumulator, block)
|
|
281
|
+
delta = parsed['delta'].to_s
|
|
282
|
+
return if delta.empty?
|
|
283
|
+
|
|
284
|
+
accumulator[:content] << delta
|
|
285
|
+
block&.call(
|
|
286
|
+
lex_llm_namespace::Chunk.new(
|
|
287
|
+
role: :assistant,
|
|
288
|
+
content: delta,
|
|
289
|
+
model_id: parsed['model'],
|
|
290
|
+
raw: parsed,
|
|
291
|
+
tokens: nil
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def responses_stream_response(accumulator, response_body, offering_metadata: nil)
|
|
297
|
+
completed = accumulator[:completed] || {}
|
|
298
|
+
content = accumulator[:content]
|
|
299
|
+
content = extract_responses_text(completed) if content.empty?
|
|
300
|
+
|
|
301
|
+
{
|
|
302
|
+
result: content,
|
|
303
|
+
model: accumulator[:model] || completed['model'],
|
|
304
|
+
usage: accumulator[:usage],
|
|
305
|
+
metadata: response_metadata(completed.empty? ? response_body : completed, offering_metadata: offering_metadata)
|
|
306
|
+
}.compact
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def responses_hash_response(body, offering_metadata: nil)
|
|
310
|
+
normalized = normalize_string_hash(body)
|
|
311
|
+
{
|
|
312
|
+
result: extract_responses_text(normalized),
|
|
313
|
+
model: normalized['model'],
|
|
314
|
+
usage: responses_usage(normalized['usage']),
|
|
315
|
+
metadata: response_metadata(normalized, offering_metadata: offering_metadata)
|
|
316
|
+
}.compact
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def normalize_string_hash(value)
|
|
320
|
+
return value.map { |entry| normalize_string_hash(entry) } if value.is_a?(Array)
|
|
321
|
+
return {} unless value.respond_to?(:each_pair)
|
|
322
|
+
|
|
323
|
+
value.each_with_object({}) do |(key, hash_value), normalized|
|
|
324
|
+
normalized[key.to_s] = normalize_string_hash_value(hash_value)
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def normalize_string_hash_value(value)
|
|
329
|
+
return normalize_string_hash(value) if value.respond_to?(:each_pair)
|
|
330
|
+
return value.map { |entry| normalize_string_hash_value(entry) } if value.is_a?(Array)
|
|
331
|
+
|
|
332
|
+
value
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def extract_responses_text(body)
|
|
336
|
+
return body['output_text'].to_s if body['output_text']
|
|
337
|
+
|
|
338
|
+
Array(body['output']).flat_map do |item|
|
|
339
|
+
Array(item['content']).filter_map do |content|
|
|
340
|
+
next unless %w[output_text text].include?(content['type'].to_s)
|
|
341
|
+
|
|
342
|
+
content['text']
|
|
343
|
+
end
|
|
344
|
+
end.join
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def responses_usage(usage)
|
|
348
|
+
usage = normalize_string_hash(usage)
|
|
349
|
+
input = usage['input_tokens'] || usage['prompt_tokens']
|
|
350
|
+
output = usage['output_tokens'] || usage['completion_tokens']
|
|
351
|
+
{
|
|
352
|
+
input_tokens: input.to_i,
|
|
353
|
+
output_tokens: output.to_i,
|
|
354
|
+
cache_read_tokens: usage.dig('input_tokens_details', 'cached_tokens').to_i,
|
|
355
|
+
cache_write_tokens: usage.dig('input_tokens_details', 'cache_creation_tokens').to_i
|
|
356
|
+
}
|
|
357
|
+
end
|
|
358
|
+
|
|
139
359
|
def model_info(model, offering_metadata: nil)
|
|
140
360
|
offering = normalize_offering_metadata(offering_metadata)
|
|
141
361
|
lex_llm_namespace::Model::Info.new(
|
|
@@ -124,6 +124,14 @@ module Legion
|
|
|
124
124
|
build_response
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
+
def call_responses(body:, stream: false, &)
|
|
128
|
+
log.debug "[llm][executor] action=call_responses request_id=#{@request.id} profile=#{@profile} stream=#{stream}"
|
|
129
|
+
execute_pre_provider_steps
|
|
130
|
+
execute_provider_request_responses(body: body, stream: stream, &)
|
|
131
|
+
execute_post_provider_steps
|
|
132
|
+
build_response
|
|
133
|
+
end
|
|
134
|
+
|
|
127
135
|
private
|
|
128
136
|
|
|
129
137
|
def llm_setting(key, default = nil)
|
|
@@ -1339,6 +1347,30 @@ module Legion
|
|
|
1339
1347
|
@raw_response = Call::NativeResponseAdapter.new(result)
|
|
1340
1348
|
end
|
|
1341
1349
|
|
|
1350
|
+
def execute_provider_request_responses(body:, stream:, &block)
|
|
1351
|
+
@timestamps[:provider_start] = Time.now
|
|
1352
|
+
@timeline.record(
|
|
1353
|
+
category: :provider, key: 'provider:request_sent',
|
|
1354
|
+
exchange_id: @exchange_id, direction: :outbound,
|
|
1355
|
+
detail: "responses from #{@resolved_provider}",
|
|
1356
|
+
from: 'pipeline', to: "provider:#{@resolved_provider}"
|
|
1357
|
+
)
|
|
1358
|
+
|
|
1359
|
+
raise Legion::LLM::ProviderError, "Native provider not registered: #{@resolved_provider}" unless use_native_dispatch?(@resolved_provider)
|
|
1360
|
+
|
|
1361
|
+
result = dispatch_responses_request(
|
|
1362
|
+
body: body,
|
|
1363
|
+
messages: native_dispatch_messages,
|
|
1364
|
+
stream: stream,
|
|
1365
|
+
stream_block: block
|
|
1366
|
+
)
|
|
1367
|
+
merge_response_offering_metadata(result[:metadata])
|
|
1368
|
+
@raw_response = Call::NativeResponseAdapter.new(result)
|
|
1369
|
+
|
|
1370
|
+
@timestamps[:provider_end] = Time.now
|
|
1371
|
+
record_provider_response
|
|
1372
|
+
end
|
|
1373
|
+
|
|
1342
1374
|
def normalize_message_content(content)
|
|
1343
1375
|
return content if content.nil? || content.is_a?(String)
|
|
1344
1376
|
return content unless content.is_a?(Array)
|
|
@@ -24,6 +24,41 @@ module Legion
|
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
def dispatch_responses_request(body:, messages:, stream:, stream_block: nil)
|
|
28
|
+
raise Legion::LLM::ProviderError, 'Responses API upstream dispatch is not supported for fleet providers' if fleet_dispatch?
|
|
29
|
+
|
|
30
|
+
idempotency_key = next_route_idempotency_key
|
|
31
|
+
result = Call::Dispatch.call(
|
|
32
|
+
provider: @resolved_provider,
|
|
33
|
+
instance: @resolved_instance,
|
|
34
|
+
capability: :responses,
|
|
35
|
+
model: @resolved_model,
|
|
36
|
+
body: body,
|
|
37
|
+
messages: messages,
|
|
38
|
+
stream: stream,
|
|
39
|
+
**native_dispatch_options,
|
|
40
|
+
&stream_block
|
|
41
|
+
)
|
|
42
|
+
record_route_attempt(
|
|
43
|
+
dispatch_path: :direct,
|
|
44
|
+
operation: :responses,
|
|
45
|
+
status: :success,
|
|
46
|
+
idempotency_key: idempotency_key,
|
|
47
|
+
selected_lane: nil
|
|
48
|
+
)
|
|
49
|
+
result
|
|
50
|
+
rescue StandardError => e
|
|
51
|
+
record_route_attempt(
|
|
52
|
+
dispatch_path: :direct,
|
|
53
|
+
operation: :responses,
|
|
54
|
+
status: :failure,
|
|
55
|
+
idempotency_key: idempotency_key,
|
|
56
|
+
selected_lane: nil,
|
|
57
|
+
failure_reason: e.message
|
|
58
|
+
)
|
|
59
|
+
raise
|
|
60
|
+
end
|
|
61
|
+
|
|
27
62
|
def dispatch_direct_request(capability:, operation:, messages:, stream_block: nil)
|
|
28
63
|
idempotency_key = next_route_idempotency_key
|
|
29
64
|
result = Call::Dispatch.call(
|
data/lib/legion/llm/version.rb
CHANGED