legion-llm 0.9.34 → 0.9.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca7cfa8bdb01bbb162989ba4ce84f8735eb46e34f36cfea29d4504357d6eaa38
4
- data.tar.gz: 2b8866db7f799f242a070fa590c18f64cbee278c7fc2e827a9fe604cab6ce7d1
3
+ metadata.gz: 93611da95712602a9f99e00c4b34523c23838a99d34c3c441ea6bef642231e3f
4
+ data.tar.gz: 6ced6ad0b6091c5a3d53702b867eea5f04d35199892338023aebb6bb452ed867
5
5
  SHA512:
6
- metadata.gz: 2b3ba4cb577aa0a44b166ef7ee1299db98b5dbfa3759f76fb158ffa5e2c4eca7999198f47d87e70d8ebf79f21e1395137e75f26b033f609b939f70b2faa0b1f6
7
- data.tar.gz: c77913f743c3075977419811d8c20fe7d7c1b48b77cc234b1bdcd4e1c1642734c9f203c60f3d8ec0c021f9cffe9fc79fd18a0abc14f452bdd152175d8a7fe820
6
+ metadata.gz: aa99ed858c6bef1fc214a45d4d59e51f1e9f0262f75dcdbd0f60645d59296edf6fa57e47dfa706dd0b06ec7c7f6dbf572f3832235d0d7125cd9992ec65aa6eee
7
+ data.tar.gz: dfe7e2db5cf883de39a5ac47438408a858372a52dd82230baa4a624e33e17b0558eb50359237345afa5b8a1df432b164149c3fce540304ac56ffbad888110c33
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.36] - 2026-05-22
4
+
5
+ ### Fixed
6
+ - Providers: `LexLLMAdapter` now preserves streamed token usage from the upstream `llm-gateway.uhg.com` Responses API payload added in LegionIO/legion-llm#130, including gateway-shaped `usage`, `raw[:data]`, and `raw[:response][:usage]` token fields, so LegionIO `response.completed.usage.input_tokens` no longer collapses to `0`
7
+
8
+ ## [0.9.35] - 2026-05-22
9
+
10
+ ### Fixed
11
+ - API: OpenAI Responses streaming now emits the full gateway-compatible SSE envelope format (`{ type:, response:, sequence_number: }` for lifecycle events, `{ type:, item_id:, ... }` for content events), matching `llm-gateway.uhg.com` wire format exactly so Codex CLI accepts the stream
12
+ - API: `response.completed` now correctly includes `usage.input_tokens` from `pipeline_response.tokens` via `build_usage` helper; token aliases (`:input`/`:input_tokens`, `:output`/`:output_tokens`) resolved across hash and object-backed payloads
13
+ - API: Added missing `response.in_progress` and `response.content_part.*` SSE events to streaming sequence
14
+
3
15
  ## [0.9.34] - 2026-05-22
4
16
 
5
17
  ### Fixed
@@ -168,30 +168,53 @@ module Legion
168
168
  status: 'completed'
169
169
  }
170
170
 
171
- input_tokens = extract_token(tokens, :input)
172
- output_tokens = extract_token(tokens, :output)
173
-
174
171
  {
175
172
  id: request_id,
176
173
  object: 'response',
177
174
  created_at: Time.now.to_i,
178
175
  model: resolved_model,
179
176
  output: output,
180
- usage: {
181
- input_tokens: input_tokens,
182
- output_tokens: output_tokens,
183
- total_tokens: input_tokens.to_i + output_tokens.to_i
184
- },
177
+ usage: build_usage(tokens),
185
178
  status: 'completed'
186
179
  }
187
180
  end
188
181
 
189
- def self.stream_response(out, executor, request_id:, model:)
190
- out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
182
+ def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
183
+ created_at = Time.now.to_i
184
+ seq = 0
185
+ in_progress_response = { id: request_id, object: 'response', created_at: created_at,
186
+ status: 'in_progress', model: model, output: [], usage: nil }
187
+
188
+ # response.created — envelope matches gateway format: { type:, response:, sequence_number: }
189
+ out << sse_event('response.created', {
190
+ type: 'response.created',
191
+ sequence_number: seq += 1,
192
+ response: in_progress_response
193
+ })
194
+
195
+ out << sse_event('response.in_progress', {
196
+ type: 'response.in_progress',
197
+ sequence_number: seq += 1,
198
+ response: in_progress_response
199
+ })
191
200
 
192
201
  msg_id = "msg_#{SecureRandom.hex(12)}"
193
- item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
194
- out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
202
+ out << sse_event('response.output_item.added', {
203
+ type: 'response.output_item.added',
204
+ sequence_number: seq += 1,
205
+ output_index: 0,
206
+ item: { id: msg_id, type: 'message', role: 'assistant',
207
+ content: [], status: 'in_progress' }
208
+ })
209
+
210
+ out << sse_event('response.content_part.added', {
211
+ type: 'response.content_part.added',
212
+ sequence_number: seq += 1,
213
+ output_index: 0,
214
+ content_index: 0,
215
+ item_id: msg_id,
216
+ part: { type: 'output_text', text: '', annotations: [] }
217
+ })
195
218
 
196
219
  full_text = +''
197
220
 
@@ -200,40 +223,69 @@ module Legion
200
223
  next if text.empty?
201
224
 
202
225
  full_text << text
203
- delta_event = { content_index: 0, delta: text }
204
- out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
226
+ out << sse_event('response.output_text.delta', {
227
+ type: 'response.output_text.delta',
228
+ sequence_number: seq += 1,
229
+ output_index: 0,
230
+ content_index: 0,
231
+ item_id: msg_id,
232
+ delta: text
233
+ })
205
234
  end
206
235
 
207
236
  routing = pipeline_response.routing || {}
208
- tokens = pipeline_response.tokens || {}
237
+ tokens = pipeline_response.tokens || {}
209
238
  resolved_model = (routing[:model] || routing['model'] || model).to_s
210
- input_tokens = extract_token(tokens, :input)
211
- output_tokens = extract_token(tokens, :output)
212
-
213
- out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
214
- done_item = {
215
- output_index: 0,
216
- item: { type: 'message', id: msg_id, role: 'assistant',
217
- content: [{ type: 'output_text', text: full_text }], status: 'completed' }
218
- }
219
- out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
220
-
221
- done_data = {
222
- id: request_id,
223
- object: 'response',
224
- model: resolved_model,
225
- status: 'completed',
226
- usage: {
227
- input_tokens: input_tokens,
228
- output_tokens: output_tokens,
229
- total_tokens: input_tokens.to_i + output_tokens.to_i
230
- }
231
- }
232
- out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
239
+ usage = build_usage(tokens)
240
+
241
+ out << sse_event('response.output_text.done', {
242
+ type: 'response.output_text.done',
243
+ sequence_number: seq += 1,
244
+ output_index: 0,
245
+ content_index: 0,
246
+ item_id: msg_id,
247
+ text: full_text
248
+ })
249
+
250
+ out << sse_event('response.content_part.done', {
251
+ type: 'response.content_part.done',
252
+ sequence_number: seq += 1,
253
+ output_index: 0,
254
+ content_index: 0,
255
+ item_id: msg_id,
256
+ part: { type: 'output_text', text: full_text, annotations: [] }
257
+ })
258
+
259
+ completed_item = { id: msg_id, type: 'message', role: 'assistant', status: 'completed',
260
+ content: [{ type: 'output_text', text: full_text, annotations: [] }] }
261
+ out << sse_event('response.output_item.done', {
262
+ type: 'response.output_item.done',
263
+ sequence_number: seq += 1,
264
+ output_index: 0,
265
+ item: completed_item
266
+ })
267
+
268
+ out << sse_event('response.completed', {
269
+ type: 'response.completed',
270
+ sequence_number: seq + 1,
271
+ response: {
272
+ id: request_id,
273
+ object: 'response',
274
+ created_at: created_at,
275
+ status: 'completed',
276
+ model: resolved_model,
277
+ output: [completed_item],
278
+ usage: usage
279
+ }
280
+ })
233
281
 
234
282
  log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
235
283
  end
236
284
 
285
+ def self.sse_event(name, payload)
286
+ "event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
287
+ end
288
+
237
289
  def self.build_output_tool_calls(pipeline_response)
238
290
  tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
239
291
  return [] unless tools_data.is_a?(Array) && !tools_data.empty?
@@ -258,17 +310,56 @@ module Legion
258
310
  def self.extract_token(tokens, key)
259
311
  return 0 if tokens.nil?
260
312
 
261
- method_name = { input: :input_tokens, output: :output_tokens }[key]
313
+ aliases = token_aliases(key)
262
314
 
263
315
  if tokens.is_a?(Hash)
264
- return (tokens[method_name] || tokens[method_name.to_s] ||
265
- tokens[key] || tokens[key.to_s] || 0).to_i
316
+ aliases.each do |candidate|
317
+ value = tokens[candidate]
318
+ value = tokens[candidate.to_s] if value.nil?
319
+ return value.to_i unless value.nil?
320
+ end
321
+
322
+ return 0
266
323
  end
267
324
 
268
- return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
325
+ aliases.each do |candidate|
326
+ method_name = token_method(candidate)
327
+ return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
328
+ end
269
329
 
270
330
  0
271
331
  end
332
+
333
+ def self.build_usage(tokens)
334
+ input_tokens = extract_token(tokens, :input_tokens)
335
+ output_tokens = extract_token(tokens, :output_tokens)
336
+
337
+ {
338
+ input_tokens: input_tokens,
339
+ output_tokens: output_tokens,
340
+ total_tokens: input_tokens + output_tokens
341
+ }
342
+ end
343
+
344
+ def self.token_aliases(key)
345
+ case key.to_sym
346
+ when :input, :input_tokens
347
+ %i[input_tokens input]
348
+ when :output, :output_tokens
349
+ %i[output_tokens output]
350
+ else
351
+ [key.to_sym]
352
+ end
353
+ end
354
+
355
+ def self.token_method(key)
356
+ {
357
+ input: :input_tokens,
358
+ input_tokens: :input_tokens,
359
+ output: :output_tokens,
360
+ output_tokens: :output_tokens
361
+ }[key.to_sym]
362
+ end
272
363
  end
273
364
  end
274
365
  end
@@ -336,10 +336,11 @@ module Legion
336
336
  end
337
337
 
338
338
  def accumulate_stream_usage(accumulator, chunk)
339
- return unless chunk.respond_to?(:input_tokens)
339
+ usage = usage_hash(chunk)
340
+ return unless token_usage_signal?(chunk, usage)
340
341
 
341
342
  accumulator[:model] = chunk.model_id if chunk.respond_to?(:model_id)
342
- accumulator[:usage] = usage_hash(chunk)
343
+ accumulator[:usage] = merge_usage_hash(accumulator[:usage], usage)
343
344
  accumulator[:raw] = chunk.raw if chunk.respond_to?(:raw)
344
345
  end
345
346
 
@@ -392,13 +393,89 @@ module Legion
392
393
 
393
394
  def usage_hash(response)
394
395
  {
395
- input_tokens: response.input_tokens.to_i,
396
- output_tokens: response.output_tokens.to_i,
397
- cache_read_tokens: response.cached_tokens.to_i,
398
- cache_write_tokens: response.cache_creation_tokens.to_i
396
+ input_tokens: extract_token_metric(response, :input_tokens, :prompt_tokens),
397
+ output_tokens: extract_token_metric(response, :output_tokens, :completion_tokens),
398
+ cache_read_tokens: extract_token_metric(response, :cache_read_tokens, :cached_tokens),
399
+ cache_write_tokens: extract_token_metric(response, :cache_write_tokens, :cache_creation_tokens)
399
400
  }
400
401
  end
401
402
 
403
+ def token_usage_signal?(response, usage)
404
+ usage.values.any?(&:positive?) ||
405
+ response.respond_to?(:usage) ||
406
+ response.respond_to?(:raw) ||
407
+ response.respond_to?(:input_tokens) ||
408
+ response.respond_to?(:output_tokens)
409
+ end
410
+
411
+ def merge_usage_hash(existing, incoming)
412
+ current = existing.is_a?(Hash) ? existing : {}
413
+ latest = incoming.is_a?(Hash) ? incoming : {}
414
+
415
+ {
416
+ input_tokens: [current[:input_tokens].to_i, latest[:input_tokens].to_i].max,
417
+ output_tokens: [current[:output_tokens].to_i, latest[:output_tokens].to_i].max,
418
+ cache_read_tokens: [current[:cache_read_tokens].to_i, latest[:cache_read_tokens].to_i].max,
419
+ cache_write_tokens: [current[:cache_write_tokens].to_i, latest[:cache_write_tokens].to_i].max
420
+ }
421
+ end
422
+
423
+ def extract_token_metric(response, canonical_key, legacy_key = nil)
424
+ values = token_metric_candidates(response, canonical_key, legacy_key)
425
+ positive = values.find(&:positive?)
426
+ positive || values.first || 0
427
+ end
428
+
429
+ def token_metric_candidates(response, canonical_key, legacy_key = nil)
430
+ keys = [canonical_key, legacy_key].compact
431
+ token_metric_sources(response).flat_map do |source|
432
+ keys.filter_map { |key| extract_metric_value(source, key) }
433
+ end
434
+ end
435
+
436
+ def token_metric_sources(response)
437
+ sources = [response]
438
+ sources << response.usage if response.respond_to?(:usage)
439
+ sources << response.raw if response.respond_to?(:raw)
440
+
441
+ sources.compact.flat_map { |source| expand_token_metric_source(source) }.compact.uniq
442
+ end
443
+
444
+ def expand_token_metric_source(source, depth = 0)
445
+ return [] if source.nil?
446
+ return [source] unless source.respond_to?(:key?) && depth < 3
447
+
448
+ nested = [source]
449
+ nested << hash_value(source, :usage)
450
+ nested << hash_value(source, :data)
451
+ nested << hash_value(source, :response)
452
+ nested.compact.flat_map { |entry| [entry, *expand_token_metric_source(entry, depth + 1)] }
453
+ end
454
+
455
+ def extract_metric_value(source, key)
456
+ if source.respond_to?(key)
457
+ value = source.public_send(key)
458
+ return value.to_i unless value.nil?
459
+ end
460
+
461
+ return nil unless source.respond_to?(:key?)
462
+
463
+ value = hash_value(source, key)
464
+ value&.to_i
465
+ rescue StandardError => e
466
+ log.debug "[llm][adapter] action=extract_metric_value key=#{key} class=#{source.class} error=#{e.class}: #{e.message}"
467
+ nil
468
+ end
469
+
470
+ def hash_value(hash, key)
471
+ return hash[key] if hash.key?(key)
472
+
473
+ string_key = key.to_s
474
+ return hash[string_key] if hash.key?(string_key)
475
+
476
+ nil
477
+ end
478
+
402
479
  def stream_thinking_hash(accumulator)
403
480
  thinking_text = accumulator[:thinking_text]
404
481
  return nil if thinking_text.empty?
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.34'
5
+ VERSION = '0.9.36'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.34
4
+ version: 0.9.36
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity