legion-llm 0.9.32 → 0.9.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 715bd8c0918939545eda0cf832d81aa23e69c807ed6055fdb4d95f4177c99449
4
- data.tar.gz: 12e42d3d2fdc02c4ca7764a264af90631141952982d9726c02d0e9ea7de87a92
3
+ metadata.gz: ca7cfa8bdb01bbb162989ba4ce84f8735eb46e34f36cfea29d4504357d6eaa38
4
+ data.tar.gz: 2b8866db7f799f242a070fa590c18f64cbee278c7fc2e827a9fe604cab6ce7d1
5
5
  SHA512:
6
- metadata.gz: 6f2fd1a0ea8b18ed222f2713adb4f4a48ce57e90d5a3ac2242f7ae648ed297b6375d1143c0e29777e4c5ade60e479e09997c2af04cf0e1d3b81225ef3a14276f
7
- data.tar.gz: 41d0daa21a98518c4192881bd3231ca1e308b6231d06e22b6102531feee4f6aef1ced50d9e43b1f65b7e8f344e95406ebb682bf445d356011aafd6d4ce241a37
6
+ metadata.gz: 2b3ba4cb577aa0a44b166ef7ee1299db98b5dbfa3759f76fb158ffa5e2c4eca7999198f47d87e70d8ebf79f21e1395137e75f26b033f609b939f70b2faa0b1f6
7
+ data.tar.gz: c77913f743c3075977419811d8c20fe7d7c1b48b77cc234b1bdcd4e1c1642734c9f203c60f3d8ec0c021f9cffe9fc79fd18a0abc14f452bdd152175d8a7fe820
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.34] - 2026-05-22
4
+
5
+ ### Fixed
6
+ - API: `extract_token` now correctly reads `:input_tokens` / `:output_tokens` hash keys from `pipeline_response.tokens`, fixing `input_tokens: 0` in streaming `response.completed` events (caused Codex CLI `stream disconnected before completion` error)
7
+
8
+ ## [0.9.33] - 2026-05-22
9
+
10
+ ### Added
11
+ - API: OpenAI Responses API endpoint (`POST /v1/responses` and `POST /api/llm/inference/v1/responses`) for Codex CLI compatibility
12
+ - API: Supports streaming (SSE with `response.*` events), tool declarations, and `instructions` parameter
13
+
3
14
  ## [0.9.31] - 2026-05-18
4
15
 
5
16
  ### Added
@@ -0,0 +1,276 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'legion/logging/helper'
5
+ require 'legion/llm/types'
6
+
7
+ module Legion
8
+ module LLM
9
+ module API
10
+ module OpenAI
11
+ module Responses
12
+ extend Legion::Logging::Helper
13
+
14
+ def self.registered(app)
15
+ log.debug('[llm][api][openai][responses] registering POST /v1/responses + /api/llm/inference/v1/responses')
16
+
17
+ handler = build_handler
18
+
19
+ app.post('/v1/responses') { instance_exec(&handler) }
20
+ app.post('/api/llm/inference/v1/responses') { instance_exec(&handler) }
21
+
22
+ log.debug('[llm][api][openai][responses] routes registered')
23
+ end
24
+
25
+ def self.build_handler # rubocop:disable Metrics/MethodLength
26
+ proc do # rubocop:disable Metrics/BlockLength
27
+ require_llm!
28
+ body = parse_request_body
29
+ request_id = "resp_#{SecureRandom.hex(16)}"
30
+
31
+ input = body[:input]
32
+ messages = case input
33
+ when Array
34
+ Responses.normalize_input_array(input)
35
+ when String
36
+ [{ role: 'user', content: input }]
37
+ else
38
+ halt 400, { 'Content-Type' => 'application/json' },
39
+ Legion::JSON.dump({ error: { message: 'input is required (string or array)',
40
+ type: 'invalid_request_error', code: nil } })
41
+ end
42
+
43
+ messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
44
+
45
+ model = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
46
+ streaming = body[:stream] == true
47
+
48
+ tool_declarations = Responses.build_tool_declarations(body[:tools])
49
+
50
+ log.info(
51
+ "[llm][api][openai][responses] action=accepted request_id=#{request_id} " \
52
+ "model=#{model} stream=#{streaming} tools=#{tool_declarations.size}"
53
+ )
54
+
55
+ effective_caller = build_server_caller(source: 'openai_responses', path: request.path, env: env)
56
+
57
+ require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
58
+ require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
59
+
60
+ inference_request = Legion::LLM::Inference::Request.build(
61
+ id: request_id,
62
+ messages: messages,
63
+ routing: { model: model },
64
+ tools: tool_declarations,
65
+ caller: effective_caller,
66
+ stream: streaming,
67
+ cache: { strategy: :default, cacheable: true }
68
+ )
69
+
70
+ executor = Legion::LLM::Inference::Executor.new(inference_request)
71
+
72
+ if streaming
73
+ content_type 'text/event-stream'
74
+ headers 'Cache-Control' => 'no-cache',
75
+ 'Connection' => 'keep-alive',
76
+ 'X-Accel-Buffering' => 'no'
77
+
78
+ stream do |out|
79
+ Responses.stream_response(out, executor, request_id: request_id, model: model)
80
+ rescue StandardError => e
81
+ handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
82
+ out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
83
+ end
84
+ else
85
+ pipeline_response = executor.call
86
+ response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
87
+
88
+ log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
89
+ content_type :json
90
+ status 200
91
+ Legion::JSON.dump(response_body)
92
+ end
93
+ rescue Legion::LLM::AuthError => e
94
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.auth')
95
+ halt 401, { 'Content-Type' => 'application/json' },
96
+ Legion::JSON.dump({ error: { message: e.message, type: 'authentication_error' } })
97
+ rescue Legion::LLM::RateLimitError => e
98
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.rate_limit')
99
+ halt 429, { 'Content-Type' => 'application/json' },
100
+ Legion::JSON.dump({ error: { message: e.message, type: 'rate_limit_error' } })
101
+ rescue Legion::LLM::ProviderDown, Legion::LLM::ProviderError => e
102
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.provider')
103
+ halt 502, { 'Content-Type' => 'application/json' },
104
+ Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
105
+ rescue StandardError => e
106
+ handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses')
107
+ halt 500, { 'Content-Type' => 'application/json' },
108
+ Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
109
+ end
110
+ end
111
+
112
+ def self.normalize_input_array(input)
113
+ input.filter_map do |item|
114
+ item = item.transform_keys(&:to_sym) if item.respond_to?(:transform_keys)
115
+
116
+ case item[:type]&.to_s
117
+ when 'function_call_output'
118
+ { role: 'tool', tool_call_id: item[:call_id], content: item[:output].to_s }
119
+ else
120
+ role = item[:role]&.to_s
121
+ next unless role
122
+
123
+ content = item[:content]
124
+ content = content.to_s if content && !content.is_a?(Array)
125
+ { role: role, content: content }.compact
126
+ end
127
+ end
128
+ end
129
+
130
+ def self.build_tool_declarations(tools)
131
+ return [] if tools.nil? || !tools.is_a?(Array) || tools.empty?
132
+
133
+ tools.filter_map do |tool|
134
+ t = tool.respond_to?(:transform_keys) ? tool.transform_keys(&:to_sym) : tool
135
+ fn = t[:function] || t
136
+ fn = fn.transform_keys(&:to_sym) if fn.respond_to?(:transform_keys)
137
+ next unless fn[:name].to_s.length.positive?
138
+
139
+ Legion::LLM::Types::ToolDefinition.build(
140
+ name: fn[:name].to_s,
141
+ description: fn[:description].to_s,
142
+ parameters: fn[:parameters] || {},
143
+ source: { type: :client, executable: true }
144
+ )
145
+ rescue StandardError => e
146
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.build_tool')
147
+ nil
148
+ end
149
+ end
150
+
151
+ def self.format_response(pipeline_response, request_id:, model:)
152
+ routing = pipeline_response.routing || {}
153
+ tokens = pipeline_response.tokens || {}
154
+ raw_msg = pipeline_response.message
155
+ content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']).to_s : raw_msg.to_s
156
+ resolved_model = (routing[:model] || routing['model'] || model).to_s
157
+
158
+ output = []
159
+
160
+ tool_calls = build_output_tool_calls(pipeline_response)
161
+ output.concat(tool_calls)
162
+
163
+ output << {
164
+ type: 'message',
165
+ id: "msg_#{SecureRandom.hex(12)}",
166
+ role: 'assistant',
167
+ content: [{ type: 'output_text', text: content }],
168
+ status: 'completed'
169
+ }
170
+
171
+ input_tokens = extract_token(tokens, :input)
172
+ output_tokens = extract_token(tokens, :output)
173
+
174
+ {
175
+ id: request_id,
176
+ object: 'response',
177
+ created_at: Time.now.to_i,
178
+ model: resolved_model,
179
+ output: output,
180
+ usage: {
181
+ input_tokens: input_tokens,
182
+ output_tokens: output_tokens,
183
+ total_tokens: input_tokens.to_i + output_tokens.to_i
184
+ },
185
+ status: 'completed'
186
+ }
187
+ end
188
+
189
+ def self.stream_response(out, executor, request_id:, model:)
190
+ out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
191
+
192
+ msg_id = "msg_#{SecureRandom.hex(12)}"
193
+ item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
194
+ out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
195
+
196
+ full_text = +''
197
+
198
+ pipeline_response = executor.call_stream do |chunk|
199
+ text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
200
+ next if text.empty?
201
+
202
+ full_text << text
203
+ delta_event = { content_index: 0, delta: text }
204
+ out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
205
+ end
206
+
207
+ routing = pipeline_response.routing || {}
208
+ tokens = pipeline_response.tokens || {}
209
+ resolved_model = (routing[:model] || routing['model'] || model).to_s
210
+ input_tokens = extract_token(tokens, :input)
211
+ output_tokens = extract_token(tokens, :output)
212
+
213
+ out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
214
+ done_item = {
215
+ output_index: 0,
216
+ item: { type: 'message', id: msg_id, role: 'assistant',
217
+ content: [{ type: 'output_text', text: full_text }], status: 'completed' }
218
+ }
219
+ out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
220
+
221
+ done_data = {
222
+ id: request_id,
223
+ object: 'response',
224
+ model: resolved_model,
225
+ status: 'completed',
226
+ usage: {
227
+ input_tokens: input_tokens,
228
+ output_tokens: output_tokens,
229
+ total_tokens: input_tokens.to_i + output_tokens.to_i
230
+ }
231
+ }
232
+ out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
233
+
234
+ log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
235
+ end
236
+
237
+ def self.build_output_tool_calls(pipeline_response)
238
+ tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
239
+ return [] unless tools_data.is_a?(Array) && !tools_data.empty?
240
+
241
+ tools_data.filter_map do |tc|
242
+ name = tc.respond_to?(:name) ? tc.name : (tc[:name] || tc['name'])
243
+ args = tc.respond_to?(:arguments) ? tc.arguments : (tc[:arguments] || tc['arguments'] || {})
244
+ tc_id = tc.respond_to?(:id) ? tc.id : (tc[:id] || tc['id'] || "call_#{SecureRandom.hex(8)}")
245
+ next unless name
246
+
247
+ {
248
+ type: 'function_call',
249
+ id: "fc_#{SecureRandom.hex(12)}",
250
+ call_id: tc_id,
251
+ name: name.to_s,
252
+ arguments: args.is_a?(String) ? args : Legion::JSON.dump(args),
253
+ status: 'completed'
254
+ }
255
+ end
256
+ end
257
+
258
+ def self.extract_token(tokens, key)
259
+ return 0 if tokens.nil?
260
+
261
+ method_name = { input: :input_tokens, output: :output_tokens }[key]
262
+
263
+ if tokens.is_a?(Hash)
264
+ return (tokens[method_name] || tokens[method_name.to_s] ||
265
+ tokens[key] || tokens[key.to_s] || 0).to_i
266
+ end
267
+
268
+ return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
269
+
270
+ 0
271
+ end
272
+ end
273
+ end
274
+ end
275
+ end
276
+ end
@@ -15,6 +15,7 @@ require_relative 'api/translators/openai_response'
15
15
  require_relative 'api/openai/chat_completions'
16
16
  require_relative 'api/openai/models'
17
17
  require_relative 'api/openai/embeddings'
18
+ require_relative 'api/openai/responses'
18
19
  require_relative 'api/translators/anthropic_request'
19
20
  require_relative 'api/translators/anthropic_response'
20
21
  require_relative 'api/anthropic/messages'
@@ -41,6 +42,7 @@ module Legion
41
42
  OpenAI::ChatCompletions.registered(app)
42
43
  OpenAI::Models.registered(app)
43
44
  OpenAI::Embeddings.registered(app)
45
+ OpenAI::Responses.registered(app)
44
46
  Anthropic::Messages.registered(app)
45
47
  log.debug('[llm][api] all routes registered')
46
48
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.32'
5
+ VERSION = '0.9.34'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.32
4
+ version: 0.9.34
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -204,6 +204,7 @@ files:
204
204
  - lib/legion/llm/api/openai/chat_completions.rb
205
205
  - lib/legion/llm/api/openai/embeddings.rb
206
206
  - lib/legion/llm/api/openai/models.rb
207
+ - lib/legion/llm/api/openai/responses.rb
207
208
  - lib/legion/llm/api/translators/anthropic_request.rb
208
209
  - lib/legion/llm/api/translators/anthropic_response.rb
209
210
  - lib/legion/llm/api/translators/openai_request.rb