legion-llm 0.9.34 → 0.9.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/lib/legion/llm/api/openai/responses.rb +133 -42
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0f260c2a456cc265c9ae615a84021ad73c8a82dd8bc7969e0c0d6afdab6c18e6
|
|
4
|
+
data.tar.gz: 38c4fac7b3fdb5ca97857317f17beda3fc202542aac0aed954d2959bb12c4beb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37ccea0f649857ee3492c4df9fb9c6c58667d5899bcba2fac5eb2daef119c2bf361c1ae58fe37815a952625ecb8d8722d760ca5860ec99644f04d423da47c6f0
|
|
7
|
+
data.tar.gz: 48c4845b82db666058aea36a4d98e20108c6754ba52cd15598c9e18f7b5a7921a9c37f34b81d8b3ece8f9e733f42dc0bf965c4c8120ecf40212e4b75692baa1a
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.35] - 2026-05-22
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- API: OpenAI Responses streaming now emits the full gateway-compatible SSE envelope format (`{ type:, response:, sequence_number: }` for lifecycle events, `{ type:, item_id:, ... }` for content events), matching `llm-gateway.uhg.com` wire format exactly so Codex CLI accepts the stream
|
|
7
|
+
- API: `response.completed` now correctly includes `usage.input_tokens` from `pipeline_response.tokens` via `build_usage` helper; token aliases (`:input`/`:input_tokens`, `:output`/`:output_tokens`) resolved across hash and object-backed payloads
|
|
8
|
+
- API: Added missing `response.in_progress` and `response.content_part.*` SSE events to streaming sequence
|
|
9
|
+
|
|
3
10
|
## [0.9.34] - 2026-05-22
|
|
4
11
|
|
|
5
12
|
### Fixed
|
|
@@ -168,30 +168,53 @@ module Legion
|
|
|
168
168
|
status: 'completed'
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
-
input_tokens = extract_token(tokens, :input)
|
|
172
|
-
output_tokens = extract_token(tokens, :output)
|
|
173
|
-
|
|
174
171
|
{
|
|
175
172
|
id: request_id,
|
|
176
173
|
object: 'response',
|
|
177
174
|
created_at: Time.now.to_i,
|
|
178
175
|
model: resolved_model,
|
|
179
176
|
output: output,
|
|
180
|
-
usage:
|
|
181
|
-
input_tokens: input_tokens,
|
|
182
|
-
output_tokens: output_tokens,
|
|
183
|
-
total_tokens: input_tokens.to_i + output_tokens.to_i
|
|
184
|
-
},
|
|
177
|
+
usage: build_usage(tokens),
|
|
185
178
|
status: 'completed'
|
|
186
179
|
}
|
|
187
180
|
end
|
|
188
181
|
|
|
189
|
-
def self.stream_response(out, executor, request_id:, model:)
|
|
190
|
-
|
|
182
|
+
def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
|
|
183
|
+
created_at = Time.now.to_i
|
|
184
|
+
seq = 0
|
|
185
|
+
in_progress_response = { id: request_id, object: 'response', created_at: created_at,
|
|
186
|
+
status: 'in_progress', model: model, output: [], usage: nil }
|
|
187
|
+
|
|
188
|
+
# response.created — envelope matches gateway format: { type:, response:, sequence_number: }
|
|
189
|
+
out << sse_event('response.created', {
|
|
190
|
+
type: 'response.created',
|
|
191
|
+
sequence_number: seq += 1,
|
|
192
|
+
response: in_progress_response
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
out << sse_event('response.in_progress', {
|
|
196
|
+
type: 'response.in_progress',
|
|
197
|
+
sequence_number: seq += 1,
|
|
198
|
+
response: in_progress_response
|
|
199
|
+
})
|
|
191
200
|
|
|
192
201
|
msg_id = "msg_#{SecureRandom.hex(12)}"
|
|
193
|
-
|
|
194
|
-
|
|
202
|
+
out << sse_event('response.output_item.added', {
|
|
203
|
+
type: 'response.output_item.added',
|
|
204
|
+
sequence_number: seq += 1,
|
|
205
|
+
output_index: 0,
|
|
206
|
+
item: { id: msg_id, type: 'message', role: 'assistant',
|
|
207
|
+
content: [], status: 'in_progress' }
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
out << sse_event('response.content_part.added', {
|
|
211
|
+
type: 'response.content_part.added',
|
|
212
|
+
sequence_number: seq += 1,
|
|
213
|
+
output_index: 0,
|
|
214
|
+
content_index: 0,
|
|
215
|
+
item_id: msg_id,
|
|
216
|
+
part: { type: 'output_text', text: '', annotations: [] }
|
|
217
|
+
})
|
|
195
218
|
|
|
196
219
|
full_text = +''
|
|
197
220
|
|
|
@@ -200,40 +223,69 @@ module Legion
|
|
|
200
223
|
next if text.empty?
|
|
201
224
|
|
|
202
225
|
full_text << text
|
|
203
|
-
|
|
204
|
-
|
|
226
|
+
out << sse_event('response.output_text.delta', {
|
|
227
|
+
type: 'response.output_text.delta',
|
|
228
|
+
sequence_number: seq += 1,
|
|
229
|
+
output_index: 0,
|
|
230
|
+
content_index: 0,
|
|
231
|
+
item_id: msg_id,
|
|
232
|
+
delta: text
|
|
233
|
+
})
|
|
205
234
|
end
|
|
206
235
|
|
|
207
236
|
routing = pipeline_response.routing || {}
|
|
208
|
-
tokens
|
|
237
|
+
tokens = pipeline_response.tokens || {}
|
|
209
238
|
resolved_model = (routing[:model] || routing['model'] || model).to_s
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
out <<
|
|
239
|
+
usage = build_usage(tokens)
|
|
240
|
+
|
|
241
|
+
out << sse_event('response.output_text.done', {
|
|
242
|
+
type: 'response.output_text.done',
|
|
243
|
+
sequence_number: seq += 1,
|
|
244
|
+
output_index: 0,
|
|
245
|
+
content_index: 0,
|
|
246
|
+
item_id: msg_id,
|
|
247
|
+
text: full_text
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
out << sse_event('response.content_part.done', {
|
|
251
|
+
type: 'response.content_part.done',
|
|
252
|
+
sequence_number: seq += 1,
|
|
253
|
+
output_index: 0,
|
|
254
|
+
content_index: 0,
|
|
255
|
+
item_id: msg_id,
|
|
256
|
+
part: { type: 'output_text', text: full_text, annotations: [] }
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
completed_item = { id: msg_id, type: 'message', role: 'assistant', status: 'completed',
|
|
260
|
+
content: [{ type: 'output_text', text: full_text, annotations: [] }] }
|
|
261
|
+
out << sse_event('response.output_item.done', {
|
|
262
|
+
type: 'response.output_item.done',
|
|
263
|
+
sequence_number: seq += 1,
|
|
264
|
+
output_index: 0,
|
|
265
|
+
item: completed_item
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
out << sse_event('response.completed', {
|
|
269
|
+
type: 'response.completed',
|
|
270
|
+
sequence_number: seq + 1,
|
|
271
|
+
response: {
|
|
272
|
+
id: request_id,
|
|
273
|
+
object: 'response',
|
|
274
|
+
created_at: created_at,
|
|
275
|
+
status: 'completed',
|
|
276
|
+
model: resolved_model,
|
|
277
|
+
output: [completed_item],
|
|
278
|
+
usage: usage
|
|
279
|
+
}
|
|
280
|
+
})
|
|
233
281
|
|
|
234
282
|
log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
|
|
235
283
|
end
|
|
236
284
|
|
|
285
|
+
def self.sse_event(name, payload)
|
|
286
|
+
"event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
|
|
287
|
+
end
|
|
288
|
+
|
|
237
289
|
def self.build_output_tool_calls(pipeline_response)
|
|
238
290
|
tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
|
|
239
291
|
return [] unless tools_data.is_a?(Array) && !tools_data.empty?
|
|
@@ -258,17 +310,56 @@ module Legion
|
|
|
258
310
|
def self.extract_token(tokens, key)
|
|
259
311
|
return 0 if tokens.nil?
|
|
260
312
|
|
|
261
|
-
|
|
313
|
+
aliases = token_aliases(key)
|
|
262
314
|
|
|
263
315
|
if tokens.is_a?(Hash)
|
|
264
|
-
|
|
265
|
-
|
|
316
|
+
aliases.each do |candidate|
|
|
317
|
+
value = tokens[candidate]
|
|
318
|
+
value = tokens[candidate.to_s] if value.nil?
|
|
319
|
+
return value.to_i unless value.nil?
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
return 0
|
|
266
323
|
end
|
|
267
324
|
|
|
268
|
-
|
|
325
|
+
aliases.each do |candidate|
|
|
326
|
+
method_name = token_method(candidate)
|
|
327
|
+
return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
|
|
328
|
+
end
|
|
269
329
|
|
|
270
330
|
0
|
|
271
331
|
end
|
|
332
|
+
|
|
333
|
+
def self.build_usage(tokens)
|
|
334
|
+
input_tokens = extract_token(tokens, :input_tokens)
|
|
335
|
+
output_tokens = extract_token(tokens, :output_tokens)
|
|
336
|
+
|
|
337
|
+
{
|
|
338
|
+
input_tokens: input_tokens,
|
|
339
|
+
output_tokens: output_tokens,
|
|
340
|
+
total_tokens: input_tokens + output_tokens
|
|
341
|
+
}
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def self.token_aliases(key)
|
|
345
|
+
case key.to_sym
|
|
346
|
+
when :input, :input_tokens
|
|
347
|
+
%i[input_tokens input]
|
|
348
|
+
when :output, :output_tokens
|
|
349
|
+
%i[output_tokens output]
|
|
350
|
+
else
|
|
351
|
+
[key.to_sym]
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def self.token_method(key)
|
|
356
|
+
{
|
|
357
|
+
input: :input_tokens,
|
|
358
|
+
input_tokens: :input_tokens,
|
|
359
|
+
output: :output_tokens,
|
|
360
|
+
output_tokens: :output_tokens
|
|
361
|
+
}[key.to_sym]
|
|
362
|
+
end
|
|
272
363
|
end
|
|
273
364
|
end
|
|
274
365
|
end
|
data/lib/legion/llm/version.rb
CHANGED