legion-llm 0.9.34 → 0.9.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca7cfa8bdb01bbb162989ba4ce84f8735eb46e34f36cfea29d4504357d6eaa38
4
- data.tar.gz: 2b8866db7f799f242a070fa590c18f64cbee278c7fc2e827a9fe604cab6ce7d1
3
+ metadata.gz: 0f260c2a456cc265c9ae615a84021ad73c8a82dd8bc7969e0c0d6afdab6c18e6
4
+ data.tar.gz: 38c4fac7b3fdb5ca97857317f17beda3fc202542aac0aed954d2959bb12c4beb
5
5
  SHA512:
6
- metadata.gz: 2b3ba4cb577aa0a44b166ef7ee1299db98b5dbfa3759f76fb158ffa5e2c4eca7999198f47d87e70d8ebf79f21e1395137e75f26b033f609b939f70b2faa0b1f6
7
- data.tar.gz: c77913f743c3075977419811d8c20fe7d7c1b48b77cc234b1bdcd4e1c1642734c9f203c60f3d8ec0c021f9cffe9fc79fd18a0abc14f452bdd152175d8a7fe820
6
+ metadata.gz: 37ccea0f649857ee3492c4df9fb9c6c58667d5899bcba2fac5eb2daef119c2bf361c1ae58fe37815a952625ecb8d8722d760ca5860ec99644f04d423da47c6f0
7
+ data.tar.gz: 48c4845b82db666058aea36a4d98e20108c6754ba52cd15598c9e18f7b5a7921a9c37f34b81d8b3ece8f9e733f42dc0bf965c4c8120ecf40212e4b75692baa1a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.35] - 2026-05-22
4
+
5
+ ### Fixed
6
+ - API: OpenAI Responses streaming now emits the full gateway-compatible SSE envelope format (`{ type:, response:, sequence_number: }` for lifecycle events, `{ type:, item_id:, ... }` for content events), matching `llm-gateway.uhg.com` wire format exactly so Codex CLI accepts the stream
7
+ - API: `response.completed` now correctly includes `usage.input_tokens` from `pipeline_response.tokens` via `build_usage` helper; token aliases (`:input`/`:input_tokens`, `:output`/`:output_tokens`) resolved across hash and object-backed payloads
8
+ - API: Added missing `response.in_progress` and `response.content_part.*` SSE events to streaming sequence
9
+
3
10
  ## [0.9.34] - 2026-05-22
4
11
 
5
12
  ### Fixed
@@ -168,30 +168,53 @@ module Legion
168
168
  status: 'completed'
169
169
  }
170
170
 
171
- input_tokens = extract_token(tokens, :input)
172
- output_tokens = extract_token(tokens, :output)
173
-
174
171
  {
175
172
  id: request_id,
176
173
  object: 'response',
177
174
  created_at: Time.now.to_i,
178
175
  model: resolved_model,
179
176
  output: output,
180
- usage: {
181
- input_tokens: input_tokens,
182
- output_tokens: output_tokens,
183
- total_tokens: input_tokens.to_i + output_tokens.to_i
184
- },
177
+ usage: build_usage(tokens),
185
178
  status: 'completed'
186
179
  }
187
180
  end
188
181
 
189
- def self.stream_response(out, executor, request_id:, model:)
190
- out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
182
+ def self.stream_response(out, executor, request_id:, model:) # rubocop:disable Metrics/MethodLength
183
+ created_at = Time.now.to_i
184
+ seq = 0
185
+ in_progress_response = { id: request_id, object: 'response', created_at: created_at,
186
+ status: 'in_progress', model: model, output: [], usage: nil }
187
+
188
+ # response.created — envelope matches gateway format: { type:, response:, sequence_number: }
189
+ out << sse_event('response.created', {
190
+ type: 'response.created',
191
+ sequence_number: seq += 1,
192
+ response: in_progress_response
193
+ })
194
+
195
+ out << sse_event('response.in_progress', {
196
+ type: 'response.in_progress',
197
+ sequence_number: seq += 1,
198
+ response: in_progress_response
199
+ })
191
200
 
192
201
  msg_id = "msg_#{SecureRandom.hex(12)}"
193
- item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
194
- out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
202
+ out << sse_event('response.output_item.added', {
203
+ type: 'response.output_item.added',
204
+ sequence_number: seq += 1,
205
+ output_index: 0,
206
+ item: { id: msg_id, type: 'message', role: 'assistant',
207
+ content: [], status: 'in_progress' }
208
+ })
209
+
210
+ out << sse_event('response.content_part.added', {
211
+ type: 'response.content_part.added',
212
+ sequence_number: seq += 1,
213
+ output_index: 0,
214
+ content_index: 0,
215
+ item_id: msg_id,
216
+ part: { type: 'output_text', text: '', annotations: [] }
217
+ })
195
218
 
196
219
  full_text = +''
197
220
 
@@ -200,40 +223,69 @@ module Legion
200
223
  next if text.empty?
201
224
 
202
225
  full_text << text
203
- delta_event = { content_index: 0, delta: text }
204
- out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
226
+ out << sse_event('response.output_text.delta', {
227
+ type: 'response.output_text.delta',
228
+ sequence_number: seq += 1,
229
+ output_index: 0,
230
+ content_index: 0,
231
+ item_id: msg_id,
232
+ delta: text
233
+ })
205
234
  end
206
235
 
207
236
  routing = pipeline_response.routing || {}
208
- tokens = pipeline_response.tokens || {}
237
+ tokens = pipeline_response.tokens || {}
209
238
  resolved_model = (routing[:model] || routing['model'] || model).to_s
210
- input_tokens = extract_token(tokens, :input)
211
- output_tokens = extract_token(tokens, :output)
212
-
213
- out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
214
- done_item = {
215
- output_index: 0,
216
- item: { type: 'message', id: msg_id, role: 'assistant',
217
- content: [{ type: 'output_text', text: full_text }], status: 'completed' }
218
- }
219
- out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
220
-
221
- done_data = {
222
- id: request_id,
223
- object: 'response',
224
- model: resolved_model,
225
- status: 'completed',
226
- usage: {
227
- input_tokens: input_tokens,
228
- output_tokens: output_tokens,
229
- total_tokens: input_tokens.to_i + output_tokens.to_i
230
- }
231
- }
232
- out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
239
+ usage = build_usage(tokens)
240
+
241
+ out << sse_event('response.output_text.done', {
242
+ type: 'response.output_text.done',
243
+ sequence_number: seq += 1,
244
+ output_index: 0,
245
+ content_index: 0,
246
+ item_id: msg_id,
247
+ text: full_text
248
+ })
249
+
250
+ out << sse_event('response.content_part.done', {
251
+ type: 'response.content_part.done',
252
+ sequence_number: seq += 1,
253
+ output_index: 0,
254
+ content_index: 0,
255
+ item_id: msg_id,
256
+ part: { type: 'output_text', text: full_text, annotations: [] }
257
+ })
258
+
259
+ completed_item = { id: msg_id, type: 'message', role: 'assistant', status: 'completed',
260
+ content: [{ type: 'output_text', text: full_text, annotations: [] }] }
261
+ out << sse_event('response.output_item.done', {
262
+ type: 'response.output_item.done',
263
+ sequence_number: seq += 1,
264
+ output_index: 0,
265
+ item: completed_item
266
+ })
267
+
268
+ out << sse_event('response.completed', {
269
+ type: 'response.completed',
270
+ sequence_number: seq + 1,
271
+ response: {
272
+ id: request_id,
273
+ object: 'response',
274
+ created_at: created_at,
275
+ status: 'completed',
276
+ model: resolved_model,
277
+ output: [completed_item],
278
+ usage: usage
279
+ }
280
+ })
233
281
 
234
282
  log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
235
283
  end
236
284
 
285
+ def self.sse_event(name, payload)
286
+ "event: #{name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
287
+ end
288
+
237
289
  def self.build_output_tool_calls(pipeline_response)
238
290
  tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
239
291
  return [] unless tools_data.is_a?(Array) && !tools_data.empty?
@@ -258,17 +310,56 @@ module Legion
258
310
  def self.extract_token(tokens, key)
259
311
  return 0 if tokens.nil?
260
312
 
261
- method_name = { input: :input_tokens, output: :output_tokens }[key]
313
+ aliases = token_aliases(key)
262
314
 
263
315
  if tokens.is_a?(Hash)
264
- return (tokens[method_name] || tokens[method_name.to_s] ||
265
- tokens[key] || tokens[key.to_s] || 0).to_i
316
+ aliases.each do |candidate|
317
+ value = tokens[candidate]
318
+ value = tokens[candidate.to_s] if value.nil?
319
+ return value.to_i unless value.nil?
320
+ end
321
+
322
+ return 0
266
323
  end
267
324
 
268
- return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
325
+ aliases.each do |candidate|
326
+ method_name = token_method(candidate)
327
+ return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
328
+ end
269
329
 
270
330
  0
271
331
  end
332
+
333
+ def self.build_usage(tokens)
334
+ input_tokens = extract_token(tokens, :input_tokens)
335
+ output_tokens = extract_token(tokens, :output_tokens)
336
+
337
+ {
338
+ input_tokens: input_tokens,
339
+ output_tokens: output_tokens,
340
+ total_tokens: input_tokens + output_tokens
341
+ }
342
+ end
343
+
344
+ def self.token_aliases(key)
345
+ case key.to_sym
346
+ when :input, :input_tokens
347
+ %i[input_tokens input]
348
+ when :output, :output_tokens
349
+ %i[output_tokens output]
350
+ else
351
+ [key.to_sym]
352
+ end
353
+ end
354
+
355
+ def self.token_method(key)
356
+ {
357
+ input: :input_tokens,
358
+ input_tokens: :input_tokens,
359
+ output: :output_tokens,
360
+ output_tokens: :output_tokens
361
+ }[key.to_sym]
362
+ end
272
363
  end
273
364
  end
274
365
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.34'
5
+ VERSION = '0.9.35'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.34
4
+ version: 0.9.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity