legion-llm 0.9.32 → 0.9.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 715bd8c0918939545eda0cf832d81aa23e69c807ed6055fdb4d95f4177c99449
4
- data.tar.gz: 12e42d3d2fdc02c4ca7764a264af90631141952982d9726c02d0e9ea7de87a92
3
+ metadata.gz: 768503fd466d914e0cc3f4bb713ac0a710b392ebc0098eab5d6c2def05f5f9f5
4
+ data.tar.gz: 9b0823a06222164a83123d0b895c701da3415aee51bca8b8ab303e6f1b72a532
5
5
  SHA512:
6
- metadata.gz: 6f2fd1a0ea8b18ed222f2713adb4f4a48ce57e90d5a3ac2242f7ae648ed297b6375d1143c0e29777e4c5ade60e479e09997c2af04cf0e1d3b81225ef3a14276f
7
- data.tar.gz: 41d0daa21a98518c4192881bd3231ca1e308b6231d06e22b6102531feee4f6aef1ced50d9e43b1f65b7e8f344e95406ebb682bf445d356011aafd6d4ce241a37
6
+ metadata.gz: 8b85a0b079619ed107b79044b6cfaa29f58a11881cd67a883cc2d107efebbf19240241d892daf983abd89876391fb3152434da27ce7e3a043f9170bde75d747e
7
+ data.tar.gz: 3c64a0f8744c10882cf02510480dd9757c68053454dabfd639340b16561f8640011fba2d4cb120d32c4ee532ac497db10167c516656562f722aa415f2800e49a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.33] - 2026-05-22
4
+
5
+ ### Added
6
+ - API: OpenAI Responses API endpoint (`POST /v1/responses` and `POST /api/llm/inference/v1/responses`) for Codex CLI compatibility
7
+ - API: Supports streaming (SSE with `response.*` events), tool declarations, and `instructions` parameter
8
+
3
9
  ## [0.9.31] - 2026-05-18
4
10
 
5
11
  ### Added
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'legion/logging/helper'
5
+ require 'legion/llm/types'
6
+
7
+ module Legion
8
+ module LLM
9
+ module API
10
+ module OpenAI
11
+ module Responses
12
+ extend Legion::Logging::Helper
13
+
14
+ def self.registered(app)
15
+ log.debug('[llm][api][openai][responses] registering POST /v1/responses + /api/llm/inference/v1/responses')
16
+
17
+ handler = build_handler
18
+
19
+ app.post('/v1/responses') { instance_exec(&handler) }
20
+ app.post('/api/llm/inference/v1/responses') { instance_exec(&handler) }
21
+
22
+ log.debug('[llm][api][openai][responses] routes registered')
23
+ end
24
+
25
+ def self.build_handler # rubocop:disable Metrics/MethodLength
26
+ proc do # rubocop:disable Metrics/BlockLength
27
+ require_llm!
28
+ body = parse_request_body
29
+ request_id = "resp_#{SecureRandom.hex(16)}"
30
+
31
+ input = body[:input]
32
+ messages = case input
33
+ when Array
34
+ Responses.normalize_input_array(input)
35
+ when String
36
+ [{ role: 'user', content: input }]
37
+ else
38
+ halt 400, { 'Content-Type' => 'application/json' },
39
+ Legion::JSON.dump({ error: { message: 'input is required (string or array)',
40
+ type: 'invalid_request_error', code: nil } })
41
+ end
42
+
43
+ messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
44
+
45
+ model = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
46
+ streaming = body[:stream] == true
47
+
48
+ tool_declarations = Responses.build_tool_declarations(body[:tools])
49
+
50
+ log.info(
51
+ "[llm][api][openai][responses] action=accepted request_id=#{request_id} " \
52
+ "model=#{model} stream=#{streaming} tools=#{tool_declarations.size}"
53
+ )
54
+
55
+ effective_caller = build_server_caller(source: 'openai_responses', path: request.path, env: env)
56
+
57
+ require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
58
+ require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
59
+
60
+ inference_request = Legion::LLM::Inference::Request.build(
61
+ id: request_id,
62
+ messages: messages,
63
+ routing: { model: model },
64
+ tools: tool_declarations,
65
+ caller: effective_caller,
66
+ stream: streaming,
67
+ cache: { strategy: :default, cacheable: true }
68
+ )
69
+
70
+ executor = Legion::LLM::Inference::Executor.new(inference_request)
71
+
72
+ if streaming
73
+ content_type 'text/event-stream'
74
+ headers 'Cache-Control' => 'no-cache',
75
+ 'Connection' => 'keep-alive',
76
+ 'X-Accel-Buffering' => 'no'
77
+
78
+ stream do |out|
79
+ Responses.stream_response(out, executor, request_id: request_id, model: model)
80
+ rescue StandardError => e
81
+ handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
82
+ out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
83
+ end
84
+ else
85
+ pipeline_response = executor.call
86
+ response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
87
+
88
+ log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
89
+ content_type :json
90
+ status 200
91
+ Legion::JSON.dump(response_body)
92
+ end
93
+ rescue Legion::LLM::AuthError => e
94
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.auth')
95
+ halt 401, { 'Content-Type' => 'application/json' },
96
+ Legion::JSON.dump({ error: { message: e.message, type: 'authentication_error' } })
97
+ rescue Legion::LLM::RateLimitError => e
98
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.rate_limit')
99
+ halt 429, { 'Content-Type' => 'application/json' },
100
+ Legion::JSON.dump({ error: { message: e.message, type: 'rate_limit_error' } })
101
+ rescue Legion::LLM::ProviderDown, Legion::LLM::ProviderError => e
102
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.provider')
103
+ halt 502, { 'Content-Type' => 'application/json' },
104
+ Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
105
+ rescue StandardError => e
106
+ handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses')
107
+ halt 500, { 'Content-Type' => 'application/json' },
108
+ Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
109
+ end
110
+ end
111
+
112
+ def self.normalize_input_array(input)
113
+ input.filter_map do |item|
114
+ item = item.transform_keys(&:to_sym) if item.respond_to?(:transform_keys)
115
+
116
+ case item[:type]&.to_s
117
+ when 'function_call_output'
118
+ { role: 'tool', tool_call_id: item[:call_id], content: item[:output].to_s }
119
+ else
120
+ role = item[:role]&.to_s
121
+ next unless role
122
+
123
+ content = item[:content]
124
+ content = content.to_s if content && !content.is_a?(Array)
125
+ { role: role, content: content }.compact
126
+ end
127
+ end
128
+ end
129
+
130
+ def self.build_tool_declarations(tools)
131
+ return [] if tools.nil? || !tools.is_a?(Array) || tools.empty?
132
+
133
+ tools.filter_map do |tool|
134
+ t = tool.respond_to?(:transform_keys) ? tool.transform_keys(&:to_sym) : tool
135
+ fn = t[:function] || t
136
+ fn = fn.transform_keys(&:to_sym) if fn.respond_to?(:transform_keys)
137
+ next unless fn[:name].to_s.length.positive?
138
+
139
+ Legion::LLM::Types::ToolDefinition.build(
140
+ name: fn[:name].to_s,
141
+ description: fn[:description].to_s,
142
+ parameters: fn[:parameters] || {},
143
+ source: { type: :client, executable: true }
144
+ )
145
+ rescue StandardError => e
146
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.build_tool')
147
+ nil
148
+ end
149
+ end
150
+
151
+ def self.format_response(pipeline_response, request_id:, model:)
152
+ routing = pipeline_response.routing || {}
153
+ tokens = pipeline_response.tokens || {}
154
+ raw_msg = pipeline_response.message
155
+ content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']).to_s : raw_msg.to_s
156
+ resolved_model = (routing[:model] || routing['model'] || model).to_s
157
+
158
+ output = []
159
+
160
+ tool_calls = build_output_tool_calls(pipeline_response)
161
+ output.concat(tool_calls)
162
+
163
+ output << {
164
+ type: 'message',
165
+ id: "msg_#{SecureRandom.hex(12)}",
166
+ role: 'assistant',
167
+ content: [{ type: 'output_text', text: content }],
168
+ status: 'completed'
169
+ }
170
+
171
+ input_tokens = extract_token(tokens, :input)
172
+ output_tokens = extract_token(tokens, :output)
173
+
174
+ {
175
+ id: request_id,
176
+ object: 'response',
177
+ created_at: Time.now.to_i,
178
+ model: resolved_model,
179
+ output: output,
180
+ usage: {
181
+ input_tokens: input_tokens,
182
+ output_tokens: output_tokens,
183
+ total_tokens: input_tokens.to_i + output_tokens.to_i
184
+ },
185
+ status: 'completed'
186
+ }
187
+ end
188
+
189
+ def self.stream_response(out, executor, request_id:, model:)
190
+ out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
191
+
192
+ msg_id = "msg_#{SecureRandom.hex(12)}"
193
+ item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
194
+ out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
195
+
196
+ full_text = +''
197
+
198
+ pipeline_response = executor.call_stream do |chunk|
199
+ text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
200
+ next if text.empty?
201
+
202
+ full_text << text
203
+ delta_event = { content_index: 0, delta: text }
204
+ out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
205
+ end
206
+
207
+ routing = pipeline_response.routing || {}
208
+ tokens = pipeline_response.tokens || {}
209
+ resolved_model = (routing[:model] || routing['model'] || model).to_s
210
+ input_tokens = extract_token(tokens, :input)
211
+ output_tokens = extract_token(tokens, :output)
212
+
213
+ out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
214
+ done_item = {
215
+ output_index: 0,
216
+ item: { type: 'message', id: msg_id, role: 'assistant',
217
+ content: [{ type: 'output_text', text: full_text }], status: 'completed' }
218
+ }
219
+ out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
220
+
221
+ done_data = {
222
+ id: request_id,
223
+ object: 'response',
224
+ model: resolved_model,
225
+ status: 'completed',
226
+ usage: {
227
+ input_tokens: input_tokens,
228
+ output_tokens: output_tokens,
229
+ total_tokens: input_tokens.to_i + output_tokens.to_i
230
+ }
231
+ }
232
+ out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
233
+
234
+ log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
235
+ end
236
+
237
+ def self.build_output_tool_calls(pipeline_response)
238
+ tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
239
+ return [] unless tools_data.is_a?(Array) && !tools_data.empty?
240
+
241
+ tools_data.filter_map do |tc|
242
+ name = tc.respond_to?(:name) ? tc.name : (tc[:name] || tc['name'])
243
+ args = tc.respond_to?(:arguments) ? tc.arguments : (tc[:arguments] || tc['arguments'] || {})
244
+ tc_id = tc.respond_to?(:id) ? tc.id : (tc[:id] || tc['id'] || "call_#{SecureRandom.hex(8)}")
245
+ next unless name
246
+
247
+ {
248
+ type: 'function_call',
249
+ id: "fc_#{SecureRandom.hex(12)}",
250
+ call_id: tc_id,
251
+ name: name.to_s,
252
+ arguments: args.is_a?(String) ? args : Legion::JSON.dump(args),
253
+ status: 'completed'
254
+ }
255
+ end
256
+ end
257
+
258
+ def self.extract_token(tokens, key)
259
+ return 0 if tokens.nil?
260
+ return (tokens[key] || tokens[key.to_s] || 0).to_i if tokens.is_a?(Hash)
261
+
262
+ method_name = { input: :input_tokens, output: :output_tokens }[key]
263
+ return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
264
+
265
+ 0
266
+ end
267
+ end
268
+ end
269
+ end
270
+ end
271
+ end
@@ -15,6 +15,7 @@ require_relative 'api/translators/openai_response'
15
15
  require_relative 'api/openai/chat_completions'
16
16
  require_relative 'api/openai/models'
17
17
  require_relative 'api/openai/embeddings'
18
+ require_relative 'api/openai/responses'
18
19
  require_relative 'api/translators/anthropic_request'
19
20
  require_relative 'api/translators/anthropic_response'
20
21
  require_relative 'api/anthropic/messages'
@@ -41,6 +42,7 @@ module Legion
41
42
  OpenAI::ChatCompletions.registered(app)
42
43
  OpenAI::Models.registered(app)
43
44
  OpenAI::Embeddings.registered(app)
45
+ OpenAI::Responses.registered(app)
44
46
  Anthropic::Messages.registered(app)
45
47
  log.debug('[llm][api] all routes registered')
46
48
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.32'
5
+ VERSION = '0.9.33'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.32
4
+ version: 0.9.33
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -204,6 +204,7 @@ files:
204
204
  - lib/legion/llm/api/openai/chat_completions.rb
205
205
  - lib/legion/llm/api/openai/embeddings.rb
206
206
  - lib/legion/llm/api/openai/models.rb
207
+ - lib/legion/llm/api/openai/responses.rb
207
208
  - lib/legion/llm/api/translators/anthropic_request.rb
208
209
  - lib/legion/llm/api/translators/anthropic_response.rb
209
210
  - lib/legion/llm/api/translators/openai_request.rb