legion-llm 0.9.33 → 0.9.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/llm/api/openai/responses.rb +137 -41
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0f260c2a456cc265c9ae615a84021ad73c8a82dd8bc7969e0c0d6afdab6c18e6
|
|
4
|
+
data.tar.gz: 38c4fac7b3fdb5ca97857317f17beda3fc202542aac0aed954d2959bb12c4beb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37ccea0f649857ee3492c4df9fb9c6c58667d5899bcba2fac5eb2daef119c2bf361c1ae58fe37815a952625ecb8d8722d760ca5860ec99644f04d423da47c6f0
|
|
7
|
+
data.tar.gz: 48c4845b82db666058aea36a4d98e20108c6754ba52cd15598c9e18f7b5a7921a9c37f34b81d8b3ece8f9e733f42dc0bf965c4c8120ecf40212e4b75692baa1a
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.35] - 2026-05-22
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- API: OpenAI Responses streaming now emits the full gateway-compatible SSE envelope format (`{ type:, response:, sequence_number: }` for lifecycle events, `{ type:, item_id:, ... }` for content events), matching `llm-gateway.uhg.com` wire format exactly so Codex CLI accepts the stream
|
|
7
|
+
- API: `response.completed` now correctly includes `usage.input_tokens` from `pipeline_response.tokens` via `build_usage` helper; token aliases (`:input`/`:input_tokens`, `:output`/`:output_tokens`) resolved across hash and object-backed payloads
|
|
8
|
+
- API: Added missing `response.in_progress` and `response.content_part.*` SSE events to streaming sequence
|
|
9
|
+
|
|
10
|
+
## [0.9.34] - 2026-05-22
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- API: `extract_token` now correctly reads `:input_tokens` / `:output_tokens` hash keys from `pipeline_response.tokens`, fixing `input_tokens: 0` in streaming `response.completed` events (caused Codex CLI `stream disconnected before completion` error)
|
|
14
|
+
|
|
3
15
|
## [0.9.33] - 2026-05-22
|
|
4
16
|
|
|
5
17
|
### Added
|
|
@@ -168,30 +168,53 @@ module Legion
|
|
|
168
168
|
status: 'completed'
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
-
input_tokens = extract_token(tokens, :input)
|
|
172
|
-
output_tokens = extract_token(tokens, :output)
|
|
173
|
-
|
|
174
171
|
{
|
|
175
172
|
id: request_id,
|
|
176
173
|
object: 'response',
|
|
177
174
|
created_at: Time.now.to_i,
|
|
178
175
|
model: resolved_model,
|
|
179
176
|
output: output,
|
|
180
|
-
usage:
|
|
181
|
-
input_tokens: input_tokens,
|
|
182
|
-
output_tokens: output_tokens,
|
|
183
|
-
total_tokens: input_tokens.to_i + output_tokens.to_i
|
|
184
|
-
},
|
|
177
|
+
usage: build_usage(tokens),
|
|
185
178
|
status: 'completed'
|
|
186
179
|
}
|
|
187
180
|
end
|
|
188
181
|
|
|
189
|
-
def self.stream_response(out, executor, request_id:, model:)
|
|
190
|
-
|
|
182
|
+
def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
|
|
183
|
+
created_at = Time.now.to_i
|
|
184
|
+
seq = 0
|
|
185
|
+
in_progress_response = { id: request_id, object: 'response', created_at: created_at,
|
|
186
|
+
status: 'in_progress', model: model, output: [], usage: nil }
|
|
187
|
+
|
|
188
|
+
# response.created — envelope matches gateway format: { type:, response:, sequence_number: }
|
|
189
|
+
out << sse_event('response.created', {
|
|
190
|
+
type: 'response.created',
|
|
191
|
+
sequence_number: seq += 1,
|
|
192
|
+
response: in_progress_response
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
out << sse_event('response.in_progress', {
|
|
196
|
+
type: 'response.in_progress',
|
|
197
|
+
sequence_number: seq += 1,
|
|
198
|
+
response: in_progress_response
|
|
199
|
+
})
|
|
191
200
|
|
|
192
201
|
msg_id = "msg_#{SecureRandom.hex(12)}"
|
|
193
|
-
|
|
194
|
-
|
|
202
|
+
out << sse_event('response.output_item.added', {
|
|
203
|
+
type: 'response.output_item.added',
|
|
204
|
+
sequence_number: seq += 1,
|
|
205
|
+
output_index: 0,
|
|
206
|
+
item: { id: msg_id, type: 'message', role: 'assistant',
|
|
207
|
+
content: [], status: 'in_progress' }
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
out << sse_event('response.content_part.added', {
|
|
211
|
+
type: 'response.content_part.added',
|
|
212
|
+
sequence_number: seq += 1,
|
|
213
|
+
output_index: 0,
|
|
214
|
+
content_index: 0,
|
|
215
|
+
item_id: msg_id,
|
|
216
|
+
part: { type: 'output_text', text: '', annotations: [] }
|
|
217
|
+
})
|
|
195
218
|
|
|
196
219
|
full_text = +''
|
|
197
220
|
|
|
@@ -200,40 +223,69 @@ module Legion
|
|
|
200
223
|
next if text.empty?
|
|
201
224
|
|
|
202
225
|
full_text << text
|
|
203
|
-
|
|
204
|
-
|
|
226
|
+
out << sse_event('response.output_text.delta', {
|
|
227
|
+
type: 'response.output_text.delta',
|
|
228
|
+
sequence_number: seq += 1,
|
|
229
|
+
output_index: 0,
|
|
230
|
+
content_index: 0,
|
|
231
|
+
item_id: msg_id,
|
|
232
|
+
delta: text
|
|
233
|
+
})
|
|
205
234
|
end
|
|
206
235
|
|
|
207
236
|
routing = pipeline_response.routing || {}
|
|
208
|
-
tokens
|
|
237
|
+
tokens = pipeline_response.tokens || {}
|
|
209
238
|
resolved_model = (routing[:model] || routing['model'] || model).to_s
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
out <<
|
|
239
|
+
usage = build_usage(tokens)
|
|
240
|
+
|
|
241
|
+
out << sse_event('response.output_text.done', {
|
|
242
|
+
type: 'response.output_text.done',
|
|
243
|
+
sequence_number: seq += 1,
|
|
244
|
+
output_index: 0,
|
|
245
|
+
content_index: 0,
|
|
246
|
+
item_id: msg_id,
|
|
247
|
+
text: full_text
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
out << sse_event('response.content_part.done', {
|
|
251
|
+
type: 'response.content_part.done',
|
|
252
|
+
sequence_number: seq += 1,
|
|
253
|
+
output_index: 0,
|
|
254
|
+
content_index: 0,
|
|
255
|
+
item_id: msg_id,
|
|
256
|
+
part: { type: 'output_text', text: full_text, annotations: [] }
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
completed_item = { id: msg_id, type: 'message', role: 'assistant', status: 'completed',
|
|
260
|
+
content: [{ type: 'output_text', text: full_text, annotations: [] }] }
|
|
261
|
+
out << sse_event('response.output_item.done', {
|
|
262
|
+
type: 'response.output_item.done',
|
|
263
|
+
sequence_number: seq += 1,
|
|
264
|
+
output_index: 0,
|
|
265
|
+
item: completed_item
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
out << sse_event('response.completed', {
|
|
269
|
+
type: 'response.completed',
|
|
270
|
+
sequence_number: seq + 1,
|
|
271
|
+
response: {
|
|
272
|
+
id: request_id,
|
|
273
|
+
object: 'response',
|
|
274
|
+
created_at: created_at,
|
|
275
|
+
status: 'completed',
|
|
276
|
+
model: resolved_model,
|
|
277
|
+
output: [completed_item],
|
|
278
|
+
usage: usage
|
|
279
|
+
}
|
|
280
|
+
})
|
|
233
281
|
|
|
234
282
|
log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
|
|
235
283
|
end
|
|
236
284
|
|
|
285
|
+
def self.sse_event(name, payload)
|
|
286
|
+
"event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
|
|
287
|
+
end
|
|
288
|
+
|
|
237
289
|
def self.build_output_tool_calls(pipeline_response)
|
|
238
290
|
tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
|
|
239
291
|
return [] unless tools_data.is_a?(Array) && !tools_data.empty?
|
|
@@ -257,13 +309,57 @@ module Legion
|
|
|
257
309
|
|
|
258
310
|
def self.extract_token(tokens, key)
|
|
259
311
|
return 0 if tokens.nil?
|
|
260
|
-
return (tokens[key] || tokens[key.to_s] || 0).to_i if tokens.is_a?(Hash)
|
|
261
312
|
|
|
262
|
-
|
|
263
|
-
|
|
313
|
+
aliases = token_aliases(key)
|
|
314
|
+
|
|
315
|
+
if tokens.is_a?(Hash)
|
|
316
|
+
aliases.each do |candidate|
|
|
317
|
+
value = tokens[candidate]
|
|
318
|
+
value = tokens[candidate.to_s] if value.nil?
|
|
319
|
+
return value.to_i unless value.nil?
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
return 0
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
aliases.each do |candidate|
|
|
326
|
+
method_name = token_method(candidate)
|
|
327
|
+
return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
|
|
328
|
+
end
|
|
264
329
|
|
|
265
330
|
0
|
|
266
331
|
end
|
|
332
|
+
|
|
333
|
+
def self.build_usage(tokens)
|
|
334
|
+
input_tokens = extract_token(tokens, :input_tokens)
|
|
335
|
+
output_tokens = extract_token(tokens, :output_tokens)
|
|
336
|
+
|
|
337
|
+
{
|
|
338
|
+
input_tokens: input_tokens,
|
|
339
|
+
output_tokens: output_tokens,
|
|
340
|
+
total_tokens: input_tokens + output_tokens
|
|
341
|
+
}
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def self.token_aliases(key)
|
|
345
|
+
case key.to_sym
|
|
346
|
+
when :input, :input_tokens
|
|
347
|
+
%i[input_tokens input]
|
|
348
|
+
when :output, :output_tokens
|
|
349
|
+
%i[output_tokens output]
|
|
350
|
+
else
|
|
351
|
+
[key.to_sym]
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def self.token_method(key)
|
|
356
|
+
{
|
|
357
|
+
input: :input_tokens,
|
|
358
|
+
input_tokens: :input_tokens,
|
|
359
|
+
output: :output_tokens,
|
|
360
|
+
output_tokens: :output_tokens
|
|
361
|
+
}[key.to_sym]
|
|
362
|
+
end
|
|
267
363
|
end
|
|
268
364
|
end
|
|
269
365
|
end
|
data/lib/legion/llm/version.rb
CHANGED