legion-llm 0.9.32 → 0.9.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/legion/llm/api/openai/responses.rb +271 -0
- data/lib/legion/llm/api.rb +2 -0
- data/lib/legion/llm/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 768503fd466d914e0cc3f4bb713ac0a710b392ebc0098eab5d6c2def05f5f9f5
|
|
4
|
+
data.tar.gz: 9b0823a06222164a83123d0b895c701da3415aee51bca8b8ab303e6f1b72a532
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8b85a0b079619ed107b79044b6cfaa29f58a11881cd67a883cc2d107efebbf19240241d892daf983abd89876391fb3152434da27ce7e3a043f9170bde75d747e
|
|
7
|
+
data.tar.gz: 3c64a0f8744c10882cf02510480dd9757c68053454dabfd639340b16561f8640011fba2d4cb120d32c4ee532ac497db10167c516656562f722aa415f2800e49a
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.33] - 2026-05-22
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- API: OpenAI Responses API endpoint (`POST /v1/responses` and `POST /api/llm/inference/v1/responses`) for Codex CLI compatibility
|
|
7
|
+
- API: Supports streaming (SSE with `response.*` events), tool declarations, and `instructions` parameter
|
|
8
|
+
|
|
3
9
|
## [0.9.31] - 2026-05-18
|
|
4
10
|
|
|
5
11
|
### Added
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require 'legion/logging/helper'
|
|
5
|
+
require 'legion/llm/types'
|
|
6
|
+
|
|
7
|
+
module Legion
|
|
8
|
+
module LLM
|
|
9
|
+
module API
|
|
10
|
+
module OpenAI
|
|
11
|
+
module Responses
|
|
12
|
+
extend Legion::Logging::Helper
|
|
13
|
+
|
|
14
|
+
def self.registered(app)
|
|
15
|
+
log.debug('[llm][api][openai][responses] registering POST /v1/responses + /api/llm/inference/v1/responses')
|
|
16
|
+
|
|
17
|
+
handler = build_handler
|
|
18
|
+
|
|
19
|
+
app.post('/v1/responses') { instance_exec(&handler) }
|
|
20
|
+
app.post('/api/llm/inference/v1/responses') { instance_exec(&handler) }
|
|
21
|
+
|
|
22
|
+
log.debug('[llm][api][openai][responses] routes registered')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.build_handler # rubocop:disable Metrics/MethodLength
|
|
26
|
+
proc do # rubocop:disable Metrics/BlockLength
|
|
27
|
+
require_llm!
|
|
28
|
+
body = parse_request_body
|
|
29
|
+
request_id = "resp_#{SecureRandom.hex(16)}"
|
|
30
|
+
|
|
31
|
+
input = body[:input]
|
|
32
|
+
messages = case input
|
|
33
|
+
when Array
|
|
34
|
+
Responses.normalize_input_array(input)
|
|
35
|
+
when String
|
|
36
|
+
[{ role: 'user', content: input }]
|
|
37
|
+
else
|
|
38
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
39
|
+
Legion::JSON.dump({ error: { message: 'input is required (string or array)',
|
|
40
|
+
type: 'invalid_request_error', code: nil } })
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
messages = [{ role: 'system', content: body[:instructions].to_s }] + messages if body[:instructions]
|
|
44
|
+
|
|
45
|
+
model = body[:model] || Legion::LLM::Settings.value(:default_model) || 'default'
|
|
46
|
+
streaming = body[:stream] == true
|
|
47
|
+
|
|
48
|
+
tool_declarations = Responses.build_tool_declarations(body[:tools])
|
|
49
|
+
|
|
50
|
+
log.info(
|
|
51
|
+
"[llm][api][openai][responses] action=accepted request_id=#{request_id} " \
|
|
52
|
+
"model=#{model} stream=#{streaming} tools=#{tool_declarations.size}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
effective_caller = build_server_caller(source: 'openai_responses', path: request.path, env: env)
|
|
56
|
+
|
|
57
|
+
require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
|
|
58
|
+
require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
|
|
59
|
+
|
|
60
|
+
inference_request = Legion::LLM::Inference::Request.build(
|
|
61
|
+
id: request_id,
|
|
62
|
+
messages: messages,
|
|
63
|
+
routing: { model: model },
|
|
64
|
+
tools: tool_declarations,
|
|
65
|
+
caller: effective_caller,
|
|
66
|
+
stream: streaming,
|
|
67
|
+
cache: { strategy: :default, cacheable: true }
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
executor = Legion::LLM::Inference::Executor.new(inference_request)
|
|
71
|
+
|
|
72
|
+
if streaming
|
|
73
|
+
content_type 'text/event-stream'
|
|
74
|
+
headers 'Cache-Control' => 'no-cache',
|
|
75
|
+
'Connection' => 'keep-alive',
|
|
76
|
+
'X-Accel-Buffering' => 'no'
|
|
77
|
+
|
|
78
|
+
stream do |out|
|
|
79
|
+
Responses.stream_response(out, executor, request_id: request_id, model: model)
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses.stream', request_id: request_id)
|
|
82
|
+
out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
|
|
83
|
+
end
|
|
84
|
+
else
|
|
85
|
+
pipeline_response = executor.call
|
|
86
|
+
response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
|
|
87
|
+
|
|
88
|
+
log.info("[llm][api][openai][responses] action=complete request_id=#{request_id} model=#{response_body[:model]}")
|
|
89
|
+
content_type :json
|
|
90
|
+
status 200
|
|
91
|
+
Legion::JSON.dump(response_body)
|
|
92
|
+
end
|
|
93
|
+
rescue Legion::LLM::AuthError => e
|
|
94
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.auth')
|
|
95
|
+
halt 401, { 'Content-Type' => 'application/json' },
|
|
96
|
+
Legion::JSON.dump({ error: { message: e.message, type: 'authentication_error' } })
|
|
97
|
+
rescue Legion::LLM::RateLimitError => e
|
|
98
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.rate_limit')
|
|
99
|
+
halt 429, { 'Content-Type' => 'application/json' },
|
|
100
|
+
Legion::JSON.dump({ error: { message: e.message, type: 'rate_limit_error' } })
|
|
101
|
+
rescue Legion::LLM::ProviderDown, Legion::LLM::ProviderError => e
|
|
102
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.openai.responses.provider')
|
|
103
|
+
halt 502, { 'Content-Type' => 'application/json' },
|
|
104
|
+
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
105
|
+
rescue StandardError => e
|
|
106
|
+
handle_exception(e, level: :error, handled: false, operation: 'llm.api.openai.responses')
|
|
107
|
+
halt 500, { 'Content-Type' => 'application/json' },
|
|
108
|
+
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def self.normalize_input_array(input)
|
|
113
|
+
input.filter_map do |item|
|
|
114
|
+
item = item.transform_keys(&:to_sym) if item.respond_to?(:transform_keys)
|
|
115
|
+
|
|
116
|
+
case item[:type]&.to_s
|
|
117
|
+
when 'function_call_output'
|
|
118
|
+
{ role: 'tool', tool_call_id: item[:call_id], content: item[:output].to_s }
|
|
119
|
+
else
|
|
120
|
+
role = item[:role]&.to_s
|
|
121
|
+
next unless role
|
|
122
|
+
|
|
123
|
+
content = item[:content]
|
|
124
|
+
content = content.to_s if content && !content.is_a?(Array)
|
|
125
|
+
{ role: role, content: content }.compact
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def self.build_tool_declarations(tools)
|
|
131
|
+
return [] if tools.nil? || !tools.is_a?(Array) || tools.empty?
|
|
132
|
+
|
|
133
|
+
tools.filter_map do |tool|
|
|
134
|
+
t = tool.respond_to?(:transform_keys) ? tool.transform_keys(&:to_sym) : tool
|
|
135
|
+
fn = t[:function] || t
|
|
136
|
+
fn = fn.transform_keys(&:to_sym) if fn.respond_to?(:transform_keys)
|
|
137
|
+
next unless fn[:name].to_s.length.positive?
|
|
138
|
+
|
|
139
|
+
Legion::LLM::Types::ToolDefinition.build(
|
|
140
|
+
name: fn[:name].to_s,
|
|
141
|
+
description: fn[:description].to_s,
|
|
142
|
+
parameters: fn[:parameters] || {},
|
|
143
|
+
source: { type: :client, executable: true }
|
|
144
|
+
)
|
|
145
|
+
rescue StandardError => e
|
|
146
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.api.openai.responses.build_tool')
|
|
147
|
+
nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def self.format_response(pipeline_response, request_id:, model:)
|
|
152
|
+
routing = pipeline_response.routing || {}
|
|
153
|
+
tokens = pipeline_response.tokens || {}
|
|
154
|
+
raw_msg = pipeline_response.message
|
|
155
|
+
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']).to_s : raw_msg.to_s
|
|
156
|
+
resolved_model = (routing[:model] || routing['model'] || model).to_s
|
|
157
|
+
|
|
158
|
+
output = []
|
|
159
|
+
|
|
160
|
+
tool_calls = build_output_tool_calls(pipeline_response)
|
|
161
|
+
output.concat(tool_calls)
|
|
162
|
+
|
|
163
|
+
output << {
|
|
164
|
+
type: 'message',
|
|
165
|
+
id: "msg_#{SecureRandom.hex(12)}",
|
|
166
|
+
role: 'assistant',
|
|
167
|
+
content: [{ type: 'output_text', text: content }],
|
|
168
|
+
status: 'completed'
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
input_tokens = extract_token(tokens, :input)
|
|
172
|
+
output_tokens = extract_token(tokens, :output)
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
id: request_id,
|
|
176
|
+
object: 'response',
|
|
177
|
+
created_at: Time.now.to_i,
|
|
178
|
+
model: resolved_model,
|
|
179
|
+
output: output,
|
|
180
|
+
usage: {
|
|
181
|
+
input_tokens: input_tokens,
|
|
182
|
+
output_tokens: output_tokens,
|
|
183
|
+
total_tokens: input_tokens.to_i + output_tokens.to_i
|
|
184
|
+
},
|
|
185
|
+
status: 'completed'
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def self.stream_response(out, executor, request_id:, model:)
|
|
190
|
+
out << "event: response.created\ndata: #{Legion::JSON.dump({ id: request_id, object: 'response', status: 'in_progress' })}\n\n"
|
|
191
|
+
|
|
192
|
+
msg_id = "msg_#{SecureRandom.hex(12)}"
|
|
193
|
+
item_event = { type: 'message', id: msg_id, role: 'assistant', content: [], status: 'in_progress' }
|
|
194
|
+
out << "event: response.output_item.added\ndata: #{Legion::JSON.dump({ output_index: 0, item: item_event })}\n\n"
|
|
195
|
+
|
|
196
|
+
full_text = +''
|
|
197
|
+
|
|
198
|
+
pipeline_response = executor.call_stream do |chunk|
|
|
199
|
+
text = chunk.respond_to?(:content) ? chunk.content.to_s : chunk.to_s
|
|
200
|
+
next if text.empty?
|
|
201
|
+
|
|
202
|
+
full_text << text
|
|
203
|
+
delta_event = { content_index: 0, delta: text }
|
|
204
|
+
out << "event: response.output_text.delta\ndata: #{Legion::JSON.dump(delta_event)}\n\n"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
routing = pipeline_response.routing || {}
|
|
208
|
+
tokens = pipeline_response.tokens || {}
|
|
209
|
+
resolved_model = (routing[:model] || routing['model'] || model).to_s
|
|
210
|
+
input_tokens = extract_token(tokens, :input)
|
|
211
|
+
output_tokens = extract_token(tokens, :output)
|
|
212
|
+
|
|
213
|
+
out << "event: response.output_text.done\ndata: #{Legion::JSON.dump({ content_index: 0, text: full_text })}\n\n"
|
|
214
|
+
done_item = {
|
|
215
|
+
output_index: 0,
|
|
216
|
+
item: { type: 'message', id: msg_id, role: 'assistant',
|
|
217
|
+
content: [{ type: 'output_text', text: full_text }], status: 'completed' }
|
|
218
|
+
}
|
|
219
|
+
out << "event: response.output_item.done\ndata: #{Legion::JSON.dump(done_item)}\n\n"
|
|
220
|
+
|
|
221
|
+
done_data = {
|
|
222
|
+
id: request_id,
|
|
223
|
+
object: 'response',
|
|
224
|
+
model: resolved_model,
|
|
225
|
+
status: 'completed',
|
|
226
|
+
usage: {
|
|
227
|
+
input_tokens: input_tokens,
|
|
228
|
+
output_tokens: output_tokens,
|
|
229
|
+
total_tokens: input_tokens.to_i + output_tokens.to_i
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
out << "event: response.completed\ndata: #{Legion::JSON.dump(done_data)}\n\n"
|
|
233
|
+
|
|
234
|
+
log.info("[llm][api][openai][responses] action=stream_complete request_id=#{request_id} model=#{resolved_model}")
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def self.build_output_tool_calls(pipeline_response)
|
|
238
|
+
tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
|
|
239
|
+
return [] unless tools_data.is_a?(Array) && !tools_data.empty?
|
|
240
|
+
|
|
241
|
+
tools_data.filter_map do |tc|
|
|
242
|
+
name = tc.respond_to?(:name) ? tc.name : (tc[:name] || tc['name'])
|
|
243
|
+
args = tc.respond_to?(:arguments) ? tc.arguments : (tc[:arguments] || tc['arguments'] || {})
|
|
244
|
+
tc_id = tc.respond_to?(:id) ? tc.id : (tc[:id] || tc['id'] || "call_#{SecureRandom.hex(8)}")
|
|
245
|
+
next unless name
|
|
246
|
+
|
|
247
|
+
{
|
|
248
|
+
type: 'function_call',
|
|
249
|
+
id: "fc_#{SecureRandom.hex(12)}",
|
|
250
|
+
call_id: tc_id,
|
|
251
|
+
name: name.to_s,
|
|
252
|
+
arguments: args.is_a?(String) ? args : Legion::JSON.dump(args),
|
|
253
|
+
status: 'completed'
|
|
254
|
+
}
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def self.extract_token(tokens, key)
|
|
259
|
+
return 0 if tokens.nil?
|
|
260
|
+
return (tokens[key] || tokens[key.to_s] || 0).to_i if tokens.is_a?(Hash)
|
|
261
|
+
|
|
262
|
+
method_name = { input: :input_tokens, output: :output_tokens }[key]
|
|
263
|
+
return tokens.public_send(method_name).to_i if method_name && tokens.respond_to?(method_name)
|
|
264
|
+
|
|
265
|
+
0
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
data/lib/legion/llm/api.rb
CHANGED
|
@@ -15,6 +15,7 @@ require_relative 'api/translators/openai_response'
|
|
|
15
15
|
require_relative 'api/openai/chat_completions'
|
|
16
16
|
require_relative 'api/openai/models'
|
|
17
17
|
require_relative 'api/openai/embeddings'
|
|
18
|
+
require_relative 'api/openai/responses'
|
|
18
19
|
require_relative 'api/translators/anthropic_request'
|
|
19
20
|
require_relative 'api/translators/anthropic_response'
|
|
20
21
|
require_relative 'api/anthropic/messages'
|
|
@@ -41,6 +42,7 @@ module Legion
|
|
|
41
42
|
OpenAI::ChatCompletions.registered(app)
|
|
42
43
|
OpenAI::Models.registered(app)
|
|
43
44
|
OpenAI::Embeddings.registered(app)
|
|
45
|
+
OpenAI::Responses.registered(app)
|
|
44
46
|
Anthropic::Messages.registered(app)
|
|
45
47
|
log.debug('[llm][api] all routes registered')
|
|
46
48
|
end
|
data/lib/legion/llm/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.33
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -204,6 +204,7 @@ files:
|
|
|
204
204
|
- lib/legion/llm/api/openai/chat_completions.rb
|
|
205
205
|
- lib/legion/llm/api/openai/embeddings.rb
|
|
206
206
|
- lib/legion/llm/api/openai/models.rb
|
|
207
|
+
- lib/legion/llm/api/openai/responses.rb
|
|
207
208
|
- lib/legion/llm/api/translators/anthropic_request.rb
|
|
208
209
|
- lib/legion/llm/api/translators/anthropic_response.rb
|
|
209
210
|
- lib/legion/llm/api/translators/openai_request.rb
|