legion-llm 0.9.34 → 0.9.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/llm/api/openai/responses.rb +133 -42
- data/lib/legion/llm/call/lex_llm_adapter.rb +83 -6
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 93611da95712602a9f99e00c4b34523c23838a99d34c3c441ea6bef642231e3f
|
|
4
|
+
data.tar.gz: 6ced6ad0b6091c5a3d53702b867eea5f04d35199892338023aebb6bb452ed867
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aa99ed858c6bef1fc214a45d4d59e51f1e9f0262f75dcdbd0f60645d59296edf6fa57e47dfa706dd0b06ec7c7f6dbf572f3832235d0d7125cd9992ec65aa6eee
|
|
7
|
+
data.tar.gz: dfe7e2db5cf883de39a5ac47438408a858372a52dd82230baa4a624e33e17b0558eb50359237345afa5b8a1df432b164149c3fce540304ac56ffbad888110c33
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.36] - 2026-05-22
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Providers: `LexLLMAdapter` now preserves streamed token usage from the upstream `llm-gateway.uhg.com` Responses API payload added in LegionIO/legion-llm#130, including gateway-shaped `usage`, `raw[:data]`, and `raw[:response][:usage]` token fields, so LegionIO `response.completed.usage.input_tokens` no longer collapses to `0`
|
|
7
|
+
|
|
8
|
+
## [0.9.35] - 2026-05-22
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- API: OpenAI Responses streaming now emits the full gateway-compatible SSE envelope format (`{ type:, response:, sequence_number: }` for lifecycle events, `{ type:, item_id:, ... }` for content events), matching `llm-gateway.uhg.com` wire format exactly so Codex CLI accepts the stream
|
|
12
|
+
- API: `response.completed` now correctly includes `usage.input_tokens` from `pipeline_response.tokens` via `build_usage` helper; token aliases (`:input`/`:input_tokens`, `:output`/`:output_tokens`) resolved across hash and object-backed payloads
|
|
13
|
+
- API: Added missing `response.in_progress` and `response.content_part.*` SSE events to streaming sequence
|
|
14
|
+
|
|
3
15
|
## [0.9.34] - 2026-05-22
|
|
4
16
|
|
|
5
17
|
### Fixed
|
|
@@ -168,30 +168,53 @@ module Legion
|
|
|
168
168
|
status: 'completed'
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
-
input_tokens = extract_token(tokens, :input)
|
|
172
|
-
output_tokens = extract_token(tokens, :output)
|
|
173
|
-
|
|
174
171
|
{
|
|
175
172
|
id: request_id,
|
|
176
173
|
object: 'response',
|
|
177
174
|
created_at: Time.now.to_i,
|
|
178
175
|
model: resolved_model,
|
|
179
176
|
output: output,
|
|
180
|
-
usage:
|
|
181
|
-
input_tokens: input_tokens,
|
|
182
|
-
output_tokens: output_tokens,
|
|
183
|
-
total_tokens: input_tokens.to_i + output_tokens.to_i
|
|
184
|
-
},
|
|
177
|
+
usage: build_usage(tokens),
|
|
185
178
|
status: 'completed'
|
|
186
179
|
}
|
|
187
180
|
end
|
|
188
181
|
|
|
189
|
-
def self.stream_response(out, executor, request_id:, model:)
|
|
190
|
-
|
|
182
|
+
def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
|
|
183
|
+
created_at = Time.now.to_i
|
|
184
|
+
seq = 0
|
|
185
|
+
in_progress_response = { id: request_id, object: 'response', created_at: created_at,
|
|
186
|
+
status: 'in_progress', model: model, output: [], usage: nil }
|
|
187
|
+
|
|
188
|
+
# response.created — envelope matches gateway format: { type:, response:, sequence_number: }
|
|
189
|
+
out << sse_event('response.created', {
|
|
190
|
+
type: 'response.created',
|
|
191
|
+
sequence_number: seq += 1,
|
|
192
|
+
response: in_progress_response
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
out << sse_event('response.in_progress', {
|
|
196
|
+
type: 'response.in_progress',
|
|
197
|
+
sequence_number: seq += 1,
|
|
198
|
+
response: in_progress_response
|
|
199
|
+
})
|
|
191
200
|
|
|
192
201
|
msg_id = "msg_#{SecureRandom.hex(12)}"
|
|
193
|
-
|
|
194
|
-
|
|
202
|
+
out << sse_event('response.output_item.added', {
|
|
203
|
+
type: 'response.output_item.added',
|
|
204
|
+
sequence_number: seq += 1,
|
|
205
|
+
output_index: 0,
|
|
206
|
+
item: { id: msg_id, type: 'message', role: 'assistant',
|
|
207
|
+
content: [], status: 'in_progress' }
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
out << sse_event('response.content_part.added', {
|
|
211
|
+
type: 'response.content_part.added',
|
|
212
|
+
sequence_number: seq += 1,
|
|
213
|
+
output_index: 0,
|
|
214
|
+
content_index: 0,
|
|
215
|
+
item_id: msg_id,
|
|
216
|
+
part: { type: 'output_text', text: '', annotations: [] }
|
|
217
|
+
})
|
|
195
218
|
|
|
196
219
|
full_text = +''
|
|
197
220
|
|
|
@@ -200,40 +223,69 @@ module Legion
|
|
|
200
223
|
next if text.empty?
|
|
201
224
|
|
|
202
225
|
full_text << text
|
|
203
|
-
|
|
204
|
-
|
|
226
|
+
out << sse_event('response.output_text.delta', {
|
|
227
|
+
type: 'response.output_text.delta',
|
|
228
|
+
sequence_number: seq += 1,
|
|
229
|
+
output_index: 0,
|
|
230
|
+
content_index: 0,
|
|
231
|
+
item_id: msg_id,
|
|
232
|
+
delta: text
|
|
233
|
+
})
|
|
205
234
|
end
|
|
206
235
|
|
|
207
236
|
routing = pipeline_response.routing || {}
|
|
208
|
-
tokens
|
|
237
|
+
tokens = pipeline_response.tokens || {}
|
|
209
238
|
resolved_model = (routing[:model] || routing['model'] || model).to_s
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
out <<
|
|
239
|
+
usage = build_usage(tokens)
|
|
240
|
+
|
|
241
|
+
out << sse_event('response.output_text.done', {
|
|
242
|
+
type: 'response.output_text.done',
|
|
243
|
+
sequence_number: seq += 1,
|
|
244
|
+
output_index: 0,
|
|
245
|
+
content_index: 0,
|
|
246
|
+
item_id: msg_id,
|
|
247
|
+
text: full_text
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
out << sse_event('response.content_part.done', {
|
|
251
|
+
type: 'response.content_part.done',
|
|
252
|
+
sequence_number: seq += 1,
|
|
253
|
+
output_index: 0,
|
|
254
|
+
content_index: 0,
|
|
255
|
+
item_id: msg_id,
|
|
256
|
+
part: { type: 'output_text', text: full_text, annotations: [] }
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
completed_item = { id: msg_id, type: 'message', role: 'assistant', status: 'completed',
|
|
260
|
+
content: [{ type: 'output_text', text: full_text, annotations: [] }] }
|
|
261
|
+
out << sse_event('response.output_item.done', {
|
|
262
|
+
type: 'response.output_item.done',
|
|
263
|
+
sequence_number: seq += 1,
|
|
264
|
+
output_index: 0,
|
|
265
|
+
item: completed_item
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
out << sse_event('response.completed', {
|
|
269
|
+
type: 'response.completed',
|
|
270
|
+
sequence_number: seq + 1,
|
|
271
|
+
response: {
|
|
272
|
+
id: request_id,
|
|
273
|
+
object: 'response',
|
|
274
|
+
created_at: created_at,
|
|
275
|
+
status: 'completed',
|
|
276
|
+
model: resolved_model,
|
|
277
|
+
output: [completed_item],
|
|
278
|
+
usage: usage
|
|
279
|
+
}
|
|
280
|
+
})
|
|
233
281
|
|
|
234
282
|
log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
|
|
235
283
|
end
|
|
236
284
|
|
|
285
|
+
def self.sse_event(name, payload)
|
|
286
|
+
"event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
|
|
287
|
+
end
|
|
288
|
+
|
|
237
289
|
def self.build_output_tool_calls(pipeline_response)
|
|
238
290
|
tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
|
|
239
291
|
return [] unless tools_data.is_a?(Array) && !tools_data.empty?
|
|
@@ -258,17 +310,56 @@ module Legion
|
|
|
258
310
|
def self.extract_token(tokens, key)
|
|
259
311
|
return 0 if tokens.nil?
|
|
260
312
|
|
|
261
|
-
|
|
313
|
+
aliases = token_aliases(key)
|
|
262
314
|
|
|
263
315
|
if tokens.is_a?(Hash)
|
|
264
|
-
|
|
265
|
-
|
|
316
|
+
aliases.each do |candidate|
|
|
317
|
+
value = tokens[candidate]
|
|
318
|
+
value = tokens[candidate.to_s] if value.nil?
|
|
319
|
+
return value.to_i unless value.nil?
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
return 0
|
|
266
323
|
end
|
|
267
324
|
|
|
268
|
-
|
|
325
|
+
aliases.each do |candidate|
|
|
326
|
+
method_name = token_method(candidate)
|
|
327
|
+
return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
|
|
328
|
+
end
|
|
269
329
|
|
|
270
330
|
0
|
|
271
331
|
end
|
|
332
|
+
|
|
333
|
+
def self.build_usage(tokens)
|
|
334
|
+
input_tokens = extract_token(tokens, :input_tokens)
|
|
335
|
+
output_tokens = extract_token(tokens, :output_tokens)
|
|
336
|
+
|
|
337
|
+
{
|
|
338
|
+
input_tokens: input_tokens,
|
|
339
|
+
output_tokens: output_tokens,
|
|
340
|
+
total_tokens: input_tokens + output_tokens
|
|
341
|
+
}
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def self.token_aliases(key)
|
|
345
|
+
case key.to_sym
|
|
346
|
+
when :input, :input_tokens
|
|
347
|
+
%i[input_tokens input]
|
|
348
|
+
when :output, :output_tokens
|
|
349
|
+
%i[output_tokens output]
|
|
350
|
+
else
|
|
351
|
+
[key.to_sym]
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def self.token_method(key)
|
|
356
|
+
{
|
|
357
|
+
input: :input_tokens,
|
|
358
|
+
input_tokens: :input_tokens,
|
|
359
|
+
output: :output_tokens,
|
|
360
|
+
output_tokens: :output_tokens
|
|
361
|
+
}[key.to_sym]
|
|
362
|
+
end
|
|
272
363
|
end
|
|
273
364
|
end
|
|
274
365
|
end
|
|
@@ -336,10 +336,11 @@ module Legion
|
|
|
336
336
|
end
|
|
337
337
|
|
|
338
338
|
def accumulate_stream_usage(accumulator, chunk)
|
|
339
|
-
|
|
339
|
+
usage = usage_hash(chunk)
|
|
340
|
+
return unless token_usage_signal?(chunk, usage)
|
|
340
341
|
|
|
341
342
|
accumulator[:model] = chunk.model_id if chunk.respond_to?(:model_id)
|
|
342
|
-
accumulator[:usage] =
|
|
343
|
+
accumulator[:usage] = merge_usage_hash(accumulator[:usage], usage)
|
|
343
344
|
accumulator[:raw] = chunk.raw if chunk.respond_to?(:raw)
|
|
344
345
|
end
|
|
345
346
|
|
|
@@ -392,13 +393,89 @@ module Legion
|
|
|
392
393
|
|
|
393
394
|
def usage_hash(response)
|
|
394
395
|
{
|
|
395
|
-
input_tokens: response
|
|
396
|
-
output_tokens: response
|
|
397
|
-
cache_read_tokens: response
|
|
398
|
-
cache_write_tokens: response
|
|
396
|
+
input_tokens: extract_token_metric(response, :input_tokens, :prompt_tokens),
|
|
397
|
+
output_tokens: extract_token_metric(response, :output_tokens, :completion_tokens),
|
|
398
|
+
cache_read_tokens: extract_token_metric(response, :cache_read_tokens, :cached_tokens),
|
|
399
|
+
cache_write_tokens: extract_token_metric(response, :cache_write_tokens, :cache_creation_tokens)
|
|
399
400
|
}
|
|
400
401
|
end
|
|
401
402
|
|
|
403
|
+
def token_usage_signal?(response, usage)
|
|
404
|
+
usage.values.any?(&:positive?) ||
|
|
405
|
+
response.respond_to?(:usage) ||
|
|
406
|
+
response.respond_to?(:raw) ||
|
|
407
|
+
response.respond_to?(:input_tokens) ||
|
|
408
|
+
response.respond_to?(:output_tokens)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def merge_usage_hash(existing, incoming)
|
|
412
|
+
current = existing.is_a?(Hash) ? existing : {}
|
|
413
|
+
latest = incoming.is_a?(Hash) ? incoming : {}
|
|
414
|
+
|
|
415
|
+
{
|
|
416
|
+
input_tokens: [current[:input_tokens].to_i, latest[:input_tokens].to_i].max,
|
|
417
|
+
output_tokens: [current[:output_tokens].to_i, latest[:output_tokens].to_i].max,
|
|
418
|
+
cache_read_tokens: [current[:cache_read_tokens].to_i, latest[:cache_read_tokens].to_i].max,
|
|
419
|
+
cache_write_tokens: [current[:cache_write_tokens].to_i, latest[:cache_write_tokens].to_i].max
|
|
420
|
+
}
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def extract_token_metric(response, canonical_key, legacy_key = nil)
|
|
424
|
+
values = token_metric_candidates(response, canonical_key, legacy_key)
|
|
425
|
+
positive = values.find(&:positive?)
|
|
426
|
+
positive || values.first || 0
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def token_metric_candidates(response, canonical_key, legacy_key = nil)
|
|
430
|
+
keys = [canonical_key, legacy_key].compact
|
|
431
|
+
token_metric_sources(response).flat_map do |source|
|
|
432
|
+
keys.filter_map { |key| extract_metric_value(source, key) }
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def token_metric_sources(response)
|
|
437
|
+
sources = [response]
|
|
438
|
+
sources << response.usage if response.respond_to?(:usage)
|
|
439
|
+
sources << response.raw if response.respond_to?(:raw)
|
|
440
|
+
|
|
441
|
+
sources.compact.flat_map { |source| expand_token_metric_source(source) }.compact.uniq
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def expand_token_metric_source(source, depth = 0)
|
|
445
|
+
return [] if source.nil?
|
|
446
|
+
return [source] unless source.respond_to?(:key?) && depth < 3
|
|
447
|
+
|
|
448
|
+
nested = [source]
|
|
449
|
+
nested << hash_value(source, :usage)
|
|
450
|
+
nested << hash_value(source, :data)
|
|
451
|
+
nested << hash_value(source, :response)
|
|
452
|
+
nested.compact.flat_map { |entry| [entry, *expand_token_metric_source(entry, depth + 1)] }
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def extract_metric_value(source, key)
|
|
456
|
+
if source.respond_to?(key)
|
|
457
|
+
value = source.public_send(key)
|
|
458
|
+
return value.to_i unless value.nil?
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
return nil unless source.respond_to?(:key?)
|
|
462
|
+
|
|
463
|
+
value = hash_value(source, key)
|
|
464
|
+
value&.to_i
|
|
465
|
+
rescue StandardError => e
|
|
466
|
+
log.debug "[llm][adapter] action=extract_metric_value key=#{key} class=#{source.class} error=#{e.class}: #{e.message}"
|
|
467
|
+
nil
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
def hash_value(hash, key)
|
|
471
|
+
return hash[key] if hash.key?(key)
|
|
472
|
+
|
|
473
|
+
string_key = key.to_s
|
|
474
|
+
return hash[string_key] if hash.key?(string_key)
|
|
475
|
+
|
|
476
|
+
nil
|
|
477
|
+
end
|
|
478
|
+
|
|
402
479
|
def stream_thinking_hash(accumulator)
|
|
403
480
|
thinking_text = accumulator[:thinking_text]
|
|
404
481
|
return nil if thinking_text.empty?
|
data/lib/legion/llm/version.rb
CHANGED