legion-llm 0.9.36 → 0.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93611da95712602a9f99e00c4b34523c23838a99d34c3c441ea6bef642231e3f
4
- data.tar.gz: 6ced6ad0b6091c5a3d53702b867eea5f04d35199892338023aebb6bb452ed867
3
+ metadata.gz: 05ce805ec96361b4a033e7d14a5e9e49e80de7415c75b22fd2af44b41ae447e0
4
+ data.tar.gz: b28f87cc01e43a8165c41d72b315373948e0465094bbb103402ea4e5f66d37bc
5
5
  SHA512:
6
- metadata.gz: aa99ed858c6bef1fc214a45d4d59e51f1e9f0262f75dcdbd0f60645d59296edf6fa57e47dfa706dd0b06ec7c7f6dbf572f3832235d0d7125cd9992ec65aa6eee
7
- data.tar.gz: dfe7e2db5cf883de39a5ac47438408a858372a52dd82230baa4a624e33e17b0558eb50359237345afa5b8a1df432b164149c3fce540304ac56ffbad888110c33
6
+ metadata.gz: 75a99d484b509a4f361b7fae7d3df534e17d6148a613e0e6c9c67b9a6315a73d3ec6830031cd840588c5feb5fd064433638b69191d89b3c0ebddcb9d333d0b62
7
+ data.tar.gz: 9bc998ea9c5e12ec2f3b545bc0b24dc44258aa42fc92882217aae59b5ca7c3c45594888584444c41479d93bc2015b435bfa371440b2b429277b93c05dff7a3dc
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.37] - 2026-05-22
4
+
5
+ ### Changed
6
+ - API: OpenAI Responses requests now dispatch to upstream `/v1/responses` through a native `:responses` provider capability instead of adapting Responses input through Chat Completions `stream_chat`, preserving upstream Responses streaming usage from `response.completed.response.usage`
7
+
3
8
  ## [0.9.36] - 2026-05-22
4
9
 
5
10
  ### Fixed
@@ -76,13 +76,13 @@ module Legion
76
76
  'X-Accel-Buffering' => 'no'
77
77
 
78
78
  stream do |out|
79
- Responses.stream_response(out, executor, request_id: request_id, model: model)
79
+ Responses.stream_response(out, executor, request_id: request_id, model: model, upstream_body: body)
80
80
  rescue StandardError => e
81
81
  handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
82
82
  out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
83
83
  end
84
84
  else
85
- pipeline_response = executor.call
85
+ pipeline_response = executor.call_responses(body: body, stream: false)
86
86
  response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
87
87
 
88
88
  log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
@@ -179,7 +179,7 @@ module Legion
179
179
  }
180
180
  end
181
181
 
182
- def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
182
+ def self.stream_response(out, executor, request_id:, model:, upstream_body: nil) # rubocop:disable Metrics/MethodLength
183
183
  created_at = Time.now.to_i
184
184
  seq = 0
185
185
  in_progress_response = { id: request_id, object: 'response', created_at: created_at,
@@ -218,7 +218,7 @@ module Legion
218
218
 
219
219
  full_text = +''
220
220
 
221
- pipeline_response = executor.call_stream do |chunk|
221
+ pipeline_response = call_streaming_executor(executor, upstream_body: upstream_body) do |chunk|
222
222
  text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
223
223
  next if text.empty?
224
224
 
@@ -282,6 +282,14 @@ module Legion
282
282
  log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
283
283
  end
284
284
 
285
+ def self.call_streaming_executor(executor, upstream_body: nil, &)
286
+ if upstream_body && executor.respond_to?(:call_responses)
287
+ executor.call_responses(body: upstream_body, stream: true, &)
288
+ else
289
+ executor.call_stream(&)
290
+ end
291
+ end
292
+
285
293
  def self.sse_event(name, payload)
286
294
  "event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
287
295
  end
@@ -168,6 +168,7 @@ module Legion
168
168
  CAPABILITY_METHODS = {
169
169
  chat: :chat,
170
170
  stream: :stream,
171
+ responses: :responses,
171
172
  embed: :embed,
172
173
  image: :image,
173
174
  count_tokens: :count_tokens
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'event_stream_parser'
3
4
  require 'legion/logging/helper'
4
5
 
5
6
  module Legion
@@ -58,6 +59,24 @@ module Legion
58
59
  end
59
60
  end
60
61
 
62
+ def responses(model:, body:, messages:, stream: false, **opts, &)
63
+ payload = build_responses_payload(
64
+ body: body,
65
+ model: model,
66
+ messages: messages,
67
+ stream: stream,
68
+ system: opts[:system],
69
+ tools: opts[:tools]
70
+ )
71
+
72
+ if stream
73
+ stream_responses_payload(payload, offering_metadata: opts[:offering_metadata], &)
74
+ else
75
+ response = provider.connection.post(responses_url, payload)
76
+ responses_hash_response(response.body, offering_metadata: opts[:offering_metadata])
77
+ end
78
+ end
79
+
61
80
  def embed(model:, text:, dimensions: nil, **opts)
62
81
  model_info = model_info(model, offering_metadata: opts[:offering_metadata])
63
82
  response = provider.embed(
@@ -136,6 +155,207 @@ module Legion
136
155
  end
137
156
  end
138
157
 
158
+ def responses_url = '/v1/responses'
159
+
160
+ def build_responses_payload(body:, model:, messages:, stream:, system: nil, tools: nil)
161
+ payload = normalize_hash(body).dup
162
+ payload[:model] = model
163
+ payload[:stream] = stream
164
+ payload[:input] = responses_input(messages)
165
+
166
+ system_content = normalize_response_system(system)
167
+ payload[:instructions] = system_content if present_system?(system_content)
168
+
169
+ formatted_tools = responses_tools(tools)
170
+ payload[:tools] = formatted_tools if formatted_tools.any?
171
+
172
+ deep_compact(payload)
173
+ end
174
+
175
+ def responses_input(messages)
176
+ Array(messages).map do |message|
177
+ normalized = normalize_hash(message)
178
+ if normalized[:role].to_s == 'tool'
179
+ next({
180
+ type: 'function_call_output',
181
+ call_id: normalized[:tool_call_id].to_s,
182
+ output: normalize_message_content(normalized[:content]).to_s
183
+ })
184
+ end
185
+
186
+ {
187
+ role: normalized[:role]&.to_s || 'user',
188
+ content: normalize_message_content(normalized[:content]).to_s,
189
+ tool_call_id: normalized[:tool_call_id]
190
+ }.compact
191
+ end
192
+ end
193
+
194
+ def normalize_response_system(system)
195
+ return nil if system.nil?
196
+ return system[:content] || system['content'] if system.is_a?(Hash)
197
+
198
+ system.to_s
199
+ end
200
+
201
+ def responses_tools(tools)
202
+ normalize_tools(tools).values.map do |tool|
203
+ {
204
+ type: 'function',
205
+ name: tool.name.to_s,
206
+ description: tool.description.to_s,
207
+ parameters: tool.params_schema || { type: 'object', properties: {} }
208
+ }
209
+ end
210
+ end
211
+
212
+ def deep_compact(value)
213
+ case value
214
+ when Hash
215
+ value.each_with_object({}) do |(key, hash_value), compacted|
216
+ compact_value = deep_compact(hash_value)
217
+ compacted[key] = compact_value unless compact_value.nil?
218
+ end
219
+ when Array
220
+ value.map { |entry| deep_compact(entry) }.compact
221
+ else
222
+ value
223
+ end
224
+ end
225
+
226
+ def stream_responses_payload(payload, offering_metadata: nil, &block)
227
+ accumulator = build_responses_stream_accumulator
228
+ parser = EventStreamParser::Parser.new
229
+
230
+ response = provider.connection.post(responses_url, payload) do |req|
231
+ req.headers['Accept'] = 'text/event-stream'
232
+ attach_responses_stream_handler(req, parser, accumulator, block)
233
+ end
234
+
235
+ responses_stream_response(accumulator, response.body, offering_metadata: offering_metadata)
236
+ end
237
+
238
+ def build_responses_stream_accumulator
239
+ {
240
+ content: +'',
241
+ model: nil,
242
+ usage: {},
243
+ completed: nil,
244
+ raw: nil
245
+ }
246
+ end
247
+
248
+ def attach_responses_stream_handler(req, parser, accumulator, block)
249
+ handler = proc do |chunk, *_args|
250
+ parser.feed(chunk) do |_event, data|
251
+ handle_responses_stream_data(data, accumulator, block)
252
+ end
253
+ end
254
+
255
+ if req.options.respond_to?(:on_data=)
256
+ req.options.on_data = handler
257
+ else
258
+ req.options[:on_data] = handler
259
+ end
260
+ end
261
+
262
+ def handle_responses_stream_data(data, accumulator, block)
263
+ return if data == '[DONE]'
264
+
265
+ parsed = Legion::JSON.parse(data, symbolize_names: false)
266
+ return unless parsed.is_a?(Hash)
267
+
268
+ accumulator[:raw] = parsed
269
+ case parsed['type']
270
+ when 'response.output_text.delta'
271
+ accumulate_responses_text_delta(parsed, accumulator, block)
272
+ when 'response.completed'
273
+ response = parsed['response'] || {}
274
+ accumulator[:completed] = response
275
+ accumulator[:model] = response['model'] if response['model']
276
+ accumulator[:usage] = responses_usage(response['usage'])
277
+ end
278
+ end
279
+
280
+ def accumulate_responses_text_delta(parsed, accumulator, block)
281
+ delta = parsed['delta'].to_s
282
+ return if delta.empty?
283
+
284
+ accumulator[:content] << delta
285
+ block&.call(
286
+ lex_llm_namespace::Chunk.new(
287
+ role: :assistant,
288
+ content: delta,
289
+ model_id: parsed['model'],
290
+ raw: parsed,
291
+ tokens: nil
292
+ )
293
+ )
294
+ end
295
+
296
+ def responses_stream_response(accumulator, response_body, offering_metadata: nil)
297
+ completed = accumulator[:completed] || {}
298
+ content = accumulator[:content]
299
+ content = extract_responses_text(completed) if content.empty?
300
+
301
+ {
302
+ result: content,
303
+ model: accumulator[:model] || completed['model'],
304
+ usage: accumulator[:usage],
305
+ metadata: response_metadata(completed.empty? ? response_body : completed, offering_metadata: offering_metadata)
306
+ }.compact
307
+ end
308
+
309
+ def responses_hash_response(body, offering_metadata: nil)
310
+ normalized = normalize_string_hash(body)
311
+ {
312
+ result: extract_responses_text(normalized),
313
+ model: normalized['model'],
314
+ usage: responses_usage(normalized['usage']),
315
+ metadata: response_metadata(normalized, offering_metadata: offering_metadata)
316
+ }.compact
317
+ end
318
+
319
+ def normalize_string_hash(value)
320
+ return value.map { |entry| normalize_string_hash(entry) } if value.is_a?(Array)
321
+ return {} unless value.respond_to?(:each_pair)
322
+
323
+ value.each_with_object({}) do |(key, hash_value), normalized|
324
+ normalized[key.to_s] = normalize_string_hash_value(hash_value)
325
+ end
326
+ end
327
+
328
+ def normalize_string_hash_value(value)
329
+ return normalize_string_hash(value) if value.respond_to?(:each_pair)
330
+ return value.map { |entry| normalize_string_hash_value(entry) } if value.is_a?(Array)
331
+
332
+ value
333
+ end
334
+
335
+ def extract_responses_text(body)
336
+ return body['output_text'].to_s if body['output_text']
337
+
338
+ Array(body['output']).flat_map do |item|
339
+ Array(item['content']).filter_map do |content|
340
+ next unless %w[output_text text].include?(content['type'].to_s)
341
+
342
+ content['text']
343
+ end
344
+ end.join
345
+ end
346
+
347
+ def responses_usage(usage)
348
+ usage = normalize_string_hash(usage)
349
+ input = usage['input_tokens'] || usage['prompt_tokens']
350
+ output = usage['output_tokens'] || usage['completion_tokens']
351
+ {
352
+ input_tokens: input.to_i,
353
+ output_tokens: output.to_i,
354
+ cache_read_tokens: usage.dig('input_tokens_details', 'cached_tokens').to_i,
355
+ cache_write_tokens: usage.dig('input_tokens_details', 'cache_creation_tokens').to_i
356
+ }
357
+ end
358
+
139
359
  def model_info(model, offering_metadata: nil)
140
360
  offering = normalize_offering_metadata(offering_metadata)
141
361
  lex_llm_namespace::Model::Info.new(
@@ -124,6 +124,14 @@ module Legion
124
124
  build_response
125
125
  end
126
126
 
127
+ def call_responses(body:, stream: false, &)
128
+ log.debug "[llm][executor] action=call_responses request_id=#{@request.id} profile=#{@profile} stream=#{stream}"
129
+ execute_pre_provider_steps
130
+ execute_provider_request_responses(body: body, stream: stream, &)
131
+ execute_post_provider_steps
132
+ build_response
133
+ end
134
+
127
135
  private
128
136
 
129
137
  def llm_setting(key, default = nil)
@@ -1339,6 +1347,30 @@ module Legion
1339
1347
  @raw_response = Call::NativeResponseAdapter.new(result)
1340
1348
  end
1341
1349
 
1350
+ def execute_provider_request_responses(body:, stream:, &block)
1351
+ @timestamps[:provider_start] = Time.now
1352
+ @timeline.record(
1353
+ category: :provider, key: 'provider:request_sent',
1354
+ exchange_id: @exchange_id, direction: :outbound,
1355
+ detail: "responses from #{@resolved_provider}",
1356
+ from: 'pipeline', to: "provider:#{@resolved_provider}"
1357
+ )
1358
+
1359
+ raise Legion::LLM::ProviderError, "Native provider not registered: #{@resolved_provider}" unless use_native_dispatch?(@resolved_provider)
1360
+
1361
+ result = dispatch_responses_request(
1362
+ body: body,
1363
+ messages: native_dispatch_messages,
1364
+ stream: stream,
1365
+ stream_block: block
1366
+ )
1367
+ merge_response_offering_metadata(result[:metadata])
1368
+ @raw_response = Call::NativeResponseAdapter.new(result)
1369
+
1370
+ @timestamps[:provider_end] = Time.now
1371
+ record_provider_response
1372
+ end
1373
+
1342
1374
  def normalize_message_content(content)
1343
1375
  return content if content.nil? || content.is_a?(String)
1344
1376
  return content unless content.is_a?(Array)
@@ -24,6 +24,41 @@ module Legion
24
24
  end
25
25
  end
26
26
 
27
+ def dispatch_responses_request(body:, messages:, stream:, stream_block: nil)
28
+ raise Legion::LLM::ProviderError, 'Responses API upstream dispatch is not supported for fleet providers' if fleet_dispatch?
29
+
30
+ idempotency_key = next_route_idempotency_key
31
+ result = Call::Dispatch.call(
32
+ provider: @resolved_provider,
33
+ instance: @resolved_instance,
34
+ capability: :responses,
35
+ model: @resolved_model,
36
+ body: body,
37
+ messages: messages,
38
+ stream: stream,
39
+ **native_dispatch_options,
40
+ &stream_block
41
+ )
42
+ record_route_attempt(
43
+ dispatch_path: :direct,
44
+ operation: :responses,
45
+ status: :success,
46
+ idempotency_key: idempotency_key,
47
+ selected_lane: nil
48
+ )
49
+ result
50
+ rescue StandardError => e
51
+ record_route_attempt(
52
+ dispatch_path: :direct,
53
+ operation: :responses,
54
+ status: :failure,
55
+ idempotency_key: idempotency_key,
56
+ selected_lane: nil,
57
+ failure_reason: e.message
58
+ )
59
+ raise
60
+ end
61
+
27
62
  def dispatch_direct_request(capability:, operation:, messages:, stream_block: nil)
28
63
  idempotency_key = next_route_idempotency_key
29
64
  result = Call::Dispatch.call(
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.36'
5
+ VERSION = '0.9.37'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.36
4
+ version: 0.9.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity