ruby_llm-responses_api 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +28 -0
- data/LICENSE.txt +21 -0
- data/README.md +108 -0
- data/lib/ruby_llm/providers/openai_responses/active_record_extension.rb +76 -0
- data/lib/ruby_llm/providers/openai_responses/background.rb +98 -0
- data/lib/ruby_llm/providers/openai_responses/base.rb +14 -0
- data/lib/ruby_llm/providers/openai_responses/built_in_tools.rb +184 -0
- data/lib/ruby_llm/providers/openai_responses/capabilities.rb +226 -0
- data/lib/ruby_llm/providers/openai_responses/chat.rb +265 -0
- data/lib/ruby_llm/providers/openai_responses/media.rb +114 -0
- data/lib/ruby_llm/providers/openai_responses/message_extension.rb +32 -0
- data/lib/ruby_llm/providers/openai_responses/model_registry.rb +257 -0
- data/lib/ruby_llm/providers/openai_responses/models.rb +48 -0
- data/lib/ruby_llm/providers/openai_responses/state.rb +56 -0
- data/lib/ruby_llm/providers/openai_responses/streaming.rb +128 -0
- data/lib/ruby_llm/providers/openai_responses/tools.rb +193 -0
- data/lib/ruby_llm/providers/openai_responses.rb +94 -0
- data/lib/ruby_llm-responses_api.rb +4 -0
- data/lib/rubyllm_responses_api.rb +44 -0
- metadata +177 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAIResponses
|
|
6
|
+
# Model capabilities for OpenAI Responses API models.
|
|
7
|
+
# Defines which models support which features.
|
|
8
|
+
module Capabilities
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
# Models that support the Responses API
|
|
12
|
+
RESPONSES_API_MODELS = %w[
|
|
13
|
+
gpt-4o gpt-4o-mini gpt-4o-2024-05-13 gpt-4o-2024-08-06 gpt-4o-2024-11-20
|
|
14
|
+
gpt-4o-mini-2024-07-18
|
|
15
|
+
gpt-4.1 gpt-4.1-mini gpt-4.1-nano
|
|
16
|
+
gpt-4-turbo gpt-4-turbo-2024-04-09 gpt-4-turbo-preview
|
|
17
|
+
o1 o1-mini o1-preview o1-2024-12-17
|
|
18
|
+
o3 o3-mini o4-mini
|
|
19
|
+
chatgpt-4o-latest
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
# Models with vision capabilities
|
|
23
|
+
VISION_MODELS = %w[
|
|
24
|
+
gpt-4o gpt-4o-mini gpt-4o-2024-05-13 gpt-4o-2024-08-06 gpt-4o-2024-11-20
|
|
25
|
+
gpt-4o-mini-2024-07-18
|
|
26
|
+
gpt-4.1 gpt-4.1-mini gpt-4.1-nano
|
|
27
|
+
gpt-4-turbo gpt-4-turbo-2024-04-09
|
|
28
|
+
o1 o3 o4-mini
|
|
29
|
+
chatgpt-4o-latest
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
# Reasoning models (o-series)
|
|
33
|
+
REASONING_MODELS = %w[o1 o1-mini o1-preview o1-2024-12-17 o3 o3-mini o4-mini].freeze
|
|
34
|
+
|
|
35
|
+
# Models that support web search
|
|
36
|
+
WEB_SEARCH_MODELS = %w[
|
|
37
|
+
gpt-4o gpt-4o-mini gpt-4.1 gpt-4.1-mini gpt-4.1-nano
|
|
38
|
+
o1 o3 o3-mini o4-mini
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
# Models that support code interpreter
|
|
42
|
+
CODE_INTERPRETER_MODELS = %w[
|
|
43
|
+
gpt-4o gpt-4o-mini gpt-4.1 gpt-4.1-mini gpt-4.1-nano
|
|
44
|
+
o1 o3 o3-mini o4-mini
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
47
|
+
# Context windows by model
|
|
48
|
+
CONTEXT_WINDOWS = {
|
|
49
|
+
'gpt-4o' => 128_000,
|
|
50
|
+
'gpt-4o-mini' => 128_000,
|
|
51
|
+
'gpt-4o-2024-05-13' => 128_000,
|
|
52
|
+
'gpt-4o-2024-08-06' => 128_000,
|
|
53
|
+
'gpt-4o-2024-11-20' => 128_000,
|
|
54
|
+
'gpt-4o-mini-2024-07-18' => 128_000,
|
|
55
|
+
'gpt-4.1' => 1_000_000,
|
|
56
|
+
'gpt-4.1-mini' => 1_000_000,
|
|
57
|
+
'gpt-4.1-nano' => 1_000_000,
|
|
58
|
+
'gpt-4-turbo' => 128_000,
|
|
59
|
+
'gpt-4-turbo-2024-04-09' => 128_000,
|
|
60
|
+
'o1' => 200_000,
|
|
61
|
+
'o1-mini' => 128_000,
|
|
62
|
+
'o1-preview' => 128_000,
|
|
63
|
+
'o3' => 200_000,
|
|
64
|
+
'o3-mini' => 200_000,
|
|
65
|
+
'o4-mini' => 200_000
|
|
66
|
+
}.freeze
|
|
67
|
+
|
|
68
|
+
# Max output tokens by model
|
|
69
|
+
MAX_OUTPUT_TOKENS = {
|
|
70
|
+
'gpt-4o' => 16_384,
|
|
71
|
+
'gpt-4o-mini' => 16_384,
|
|
72
|
+
'gpt-4o-2024-05-13' => 4_096,
|
|
73
|
+
'gpt-4o-2024-08-06' => 16_384,
|
|
74
|
+
'gpt-4o-2024-11-20' => 16_384,
|
|
75
|
+
'gpt-4o-mini-2024-07-18' => 16_384,
|
|
76
|
+
'gpt-4.1' => 32_768,
|
|
77
|
+
'gpt-4.1-mini' => 32_768,
|
|
78
|
+
'gpt-4.1-nano' => 32_768,
|
|
79
|
+
'gpt-4-turbo' => 4_096,
|
|
80
|
+
'o1' => 100_000,
|
|
81
|
+
'o1-mini' => 65_536,
|
|
82
|
+
'o3' => 100_000,
|
|
83
|
+
'o3-mini' => 100_000,
|
|
84
|
+
'o4-mini' => 100_000
|
|
85
|
+
}.freeze
|
|
86
|
+
|
|
87
|
+
# Pricing per million tokens (as of late 2024)
|
|
88
|
+
PRICING = {
|
|
89
|
+
'gpt-4o' => { input: 2.50, output: 10.00, cached_input: 1.25 },
|
|
90
|
+
'gpt-4o-mini' => { input: 0.15, output: 0.60, cached_input: 0.075 },
|
|
91
|
+
'gpt-4.1' => { input: 2.00, output: 8.00, cached_input: 0.50 },
|
|
92
|
+
'gpt-4.1-mini' => { input: 0.40, output: 1.60, cached_input: 0.10 },
|
|
93
|
+
'gpt-4.1-nano' => { input: 0.10, output: 0.40, cached_input: 0.025 },
|
|
94
|
+
'o1' => { input: 15.00, output: 60.00, cached_input: 7.50 },
|
|
95
|
+
'o1-mini' => { input: 1.10, output: 4.40, cached_input: 0.55 },
|
|
96
|
+
'o3' => { input: 10.00, output: 40.00, cached_input: 2.50 },
|
|
97
|
+
'o3-mini' => { input: 1.10, output: 4.40, cached_input: 0.275 },
|
|
98
|
+
'o4-mini' => { input: 1.10, output: 4.40, cached_input: 0.275 }
|
|
99
|
+
}.freeze
|
|
100
|
+
|
|
101
|
+
def supports_responses_api?(model_id)
|
|
102
|
+
model_matches?(model_id, RESPONSES_API_MODELS)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def supports_vision?(model_id)
|
|
106
|
+
model_matches?(model_id, VISION_MODELS)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def supports_functions?(model_id)
|
|
110
|
+
supports_responses_api?(model_id)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def supports_structured_output?(model_id)
|
|
114
|
+
supports_responses_api?(model_id)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def supports_web_search?(model_id)
|
|
118
|
+
model_matches?(model_id, WEB_SEARCH_MODELS)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def supports_code_interpreter?(model_id)
|
|
122
|
+
model_matches?(model_id, CODE_INTERPRETER_MODELS)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def reasoning_model?(model_id)
|
|
126
|
+
model_matches?(model_id, REASONING_MODELS)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def context_window_for(model_id)
|
|
130
|
+
find_capability(model_id, CONTEXT_WINDOWS) || 128_000
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def max_tokens_for(model_id)
|
|
134
|
+
find_capability(model_id, MAX_OUTPUT_TOKENS) || 16_384
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def input_price_for(model_id)
|
|
138
|
+
pricing = find_capability(model_id, PRICING)
|
|
139
|
+
pricing ? pricing[:input] : 0.0
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def output_price_for(model_id)
|
|
143
|
+
pricing = find_capability(model_id, PRICING)
|
|
144
|
+
pricing ? pricing[:output] : 0.0
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def pricing_for(model_id)
|
|
148
|
+
pricing = find_capability(model_id, PRICING) || { input: 0.0, output: 0.0 }
|
|
149
|
+
{
|
|
150
|
+
text_tokens: {
|
|
151
|
+
standard: {
|
|
152
|
+
input_per_million: pricing[:input],
|
|
153
|
+
output_per_million: pricing[:output],
|
|
154
|
+
cached_input_per_million: pricing[:cached_input] || (pricing[:input] / 2)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def modalities_for(model_id)
|
|
161
|
+
input = ['text']
|
|
162
|
+
input << 'image' if supports_vision?(model_id)
|
|
163
|
+
|
|
164
|
+
{
|
|
165
|
+
input: input,
|
|
166
|
+
output: ['text']
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def capabilities_for(model_id)
|
|
171
|
+
caps = %w[streaming function_calling structured_output]
|
|
172
|
+
caps << 'vision' if supports_vision?(model_id)
|
|
173
|
+
caps << 'web_search' if supports_web_search?(model_id)
|
|
174
|
+
caps << 'code_interpreter' if supports_code_interpreter?(model_id)
|
|
175
|
+
caps << 'reasoning' if reasoning_model?(model_id)
|
|
176
|
+
caps
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def model_family(model_id)
|
|
180
|
+
case model_id
|
|
181
|
+
when /^gpt-4\.1/ then 'gpt-4.1'
|
|
182
|
+
when /^gpt-4o-mini/ then 'gpt-4o-mini'
|
|
183
|
+
when /^gpt-4o/ then 'gpt-4o'
|
|
184
|
+
when /^gpt-4-turbo/ then 'gpt-4-turbo'
|
|
185
|
+
when /^o1/ then 'o1'
|
|
186
|
+
when /^o3/ then 'o3'
|
|
187
|
+
when /^o4/ then 'o4'
|
|
188
|
+
else 'other'
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def format_display_name(model_id)
|
|
193
|
+
model_id
|
|
194
|
+
.gsub(/[-_]/, ' ')
|
|
195
|
+
.split
|
|
196
|
+
.map(&:capitalize)
|
|
197
|
+
.join(' ')
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Temperature is not supported for reasoning models
|
|
201
|
+
def normalize_temperature(temperature, model_id)
|
|
202
|
+
return nil if reasoning_model?(model_id)
|
|
203
|
+
|
|
204
|
+
temperature
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
private_class_method def find_capability(model_id, mapping)
|
|
208
|
+
# Direct match
|
|
209
|
+
return mapping[model_id] if mapping.key?(model_id)
|
|
210
|
+
|
|
211
|
+
# Try base model name (without date suffix)
|
|
212
|
+
base_model = model_id.gsub(/-\d{4}-\d{2}-\d{2}$/, '')
|
|
213
|
+
return mapping[base_model] if mapping.key?(base_model)
|
|
214
|
+
|
|
215
|
+
nil
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
private_class_method def model_matches?(model_id, model_list)
|
|
219
|
+
model_list.any? do |pattern|
|
|
220
|
+
model_id == pattern || model_id.start_with?("#{pattern}-")
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAIResponses
|
|
6
|
+
# Chat completion methods for the OpenAI Responses API.
|
|
7
|
+
# Handles converting RubyLLM messages to Responses API format and parsing responses.
|
|
8
|
+
module Chat
|
|
9
|
+
def completion_url
|
|
10
|
+
'responses'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
|
16
|
+
# Extract system messages for instructions
|
|
17
|
+
system_messages = messages.select { |m| m.role == :system }
|
|
18
|
+
non_system_messages = messages.reject { |m| m.role == :system }
|
|
19
|
+
|
|
20
|
+
instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n")
|
|
21
|
+
|
|
22
|
+
payload = {
|
|
23
|
+
model: model.id,
|
|
24
|
+
input: format_input(non_system_messages),
|
|
25
|
+
stream: stream
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
payload[:instructions] = instructions unless instructions.empty?
|
|
29
|
+
payload[:temperature] = temperature unless temperature.nil?
|
|
30
|
+
|
|
31
|
+
payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
|
|
32
|
+
|
|
33
|
+
if schema
|
|
34
|
+
payload[:text] = {
|
|
35
|
+
format: {
|
|
36
|
+
type: 'json_schema',
|
|
37
|
+
name: 'response',
|
|
38
|
+
schema: schema,
|
|
39
|
+
strict: schema[:strict] != false
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Auto-chain conversations: find the last response_id from assistant messages
|
|
45
|
+
# This enables automatic stateful conversations without manual tracking
|
|
46
|
+
last_response_id = extract_last_response_id(messages)
|
|
47
|
+
payload[:previous_response_id] = last_response_id if last_response_id
|
|
48
|
+
|
|
49
|
+
payload
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def extract_last_response_id(messages)
|
|
53
|
+
messages
|
|
54
|
+
.select { |m| m.role == :assistant && m.respond_to?(:response_id) }
|
|
55
|
+
.map(&:response_id)
|
|
56
|
+
.compact
|
|
57
|
+
.last
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def parse_completion_response(response)
|
|
61
|
+
data = response.body
|
|
62
|
+
return if data.nil? || data.empty?
|
|
63
|
+
|
|
64
|
+
data = JSON.parse(data) if data.is_a?(String)
|
|
65
|
+
|
|
66
|
+
raise RubyLLM::Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
|
|
67
|
+
|
|
68
|
+
output = data['output'] || []
|
|
69
|
+
|
|
70
|
+
# Extract text content from output
|
|
71
|
+
content = extract_output_text(output)
|
|
72
|
+
|
|
73
|
+
# Extract tool calls from function_call outputs
|
|
74
|
+
tool_calls = extract_tool_calls(output)
|
|
75
|
+
|
|
76
|
+
usage = data['usage'] || {}
|
|
77
|
+
cached_tokens = usage.dig('input_tokens_details', 'cached_tokens')
|
|
78
|
+
|
|
79
|
+
Message.new(
|
|
80
|
+
role: :assistant,
|
|
81
|
+
content: content,
|
|
82
|
+
tool_calls: tool_calls,
|
|
83
|
+
input_tokens: usage['input_tokens'],
|
|
84
|
+
output_tokens: usage['output_tokens'],
|
|
85
|
+
cached_tokens: cached_tokens,
|
|
86
|
+
cache_creation_tokens: 0,
|
|
87
|
+
model_id: data['model'],
|
|
88
|
+
response_id: data['id'],
|
|
89
|
+
raw: response
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def format_input(messages) # rubocop:disable Metrics/MethodLength
|
|
94
|
+
result = []
|
|
95
|
+
|
|
96
|
+
messages.each do |msg|
|
|
97
|
+
if msg.tool_call_id
|
|
98
|
+
# Tool result message - function_call_output type
|
|
99
|
+
result << {
|
|
100
|
+
type: 'function_call_output',
|
|
101
|
+
call_id: msg.tool_call_id,
|
|
102
|
+
output: extract_text_content(msg.content)
|
|
103
|
+
}
|
|
104
|
+
elsif msg.tool_calls&.any?
|
|
105
|
+
# Assistant message with tool calls
|
|
106
|
+
# First add any text content as a message
|
|
107
|
+
text = extract_text_content(msg.content)
|
|
108
|
+
if text && !text.empty?
|
|
109
|
+
result << {
|
|
110
|
+
type: 'message',
|
|
111
|
+
role: 'assistant',
|
|
112
|
+
content: text
|
|
113
|
+
}
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Then add each function call as a separate item
|
|
117
|
+
msg.tool_calls.each_value do |tc|
|
|
118
|
+
result << {
|
|
119
|
+
type: 'function_call',
|
|
120
|
+
call_id: tc.id,
|
|
121
|
+
name: tc.name,
|
|
122
|
+
arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
|
|
123
|
+
}
|
|
124
|
+
end
|
|
125
|
+
else
|
|
126
|
+
# Regular message
|
|
127
|
+
result << {
|
|
128
|
+
type: 'message',
|
|
129
|
+
role: format_role(msg.role),
|
|
130
|
+
content: format_message_content(msg.content, nil)
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
result
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def format_message_content(content, tool_calls = nil)
|
|
139
|
+
parts = []
|
|
140
|
+
|
|
141
|
+
# Add text content
|
|
142
|
+
text = extract_text_content(content)
|
|
143
|
+
parts << { type: 'input_text', text: text } if text && !text.empty?
|
|
144
|
+
|
|
145
|
+
# Add attachments if present
|
|
146
|
+
if content.is_a?(RubyLLM::Content)
|
|
147
|
+
content.attachments.each do |attachment|
|
|
148
|
+
parts << format_attachment(attachment)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Add tool calls if present (for assistant messages)
|
|
153
|
+
if tool_calls&.any?
|
|
154
|
+
tool_calls.each_value do |tc|
|
|
155
|
+
parts << {
|
|
156
|
+
type: 'function_call',
|
|
157
|
+
call_id: tc.id,
|
|
158
|
+
name: tc.name,
|
|
159
|
+
arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
|
|
160
|
+
}
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Return simple text for single text content
|
|
165
|
+
return parts.first[:text] if parts.length == 1 && parts.first[:type] == 'input_text'
|
|
166
|
+
|
|
167
|
+
parts
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def format_attachment(attachment)
|
|
171
|
+
case attachment.type
|
|
172
|
+
when :image
|
|
173
|
+
if attachment.url?
|
|
174
|
+
{ type: 'input_image', image_url: attachment.source }
|
|
175
|
+
else
|
|
176
|
+
{ type: 'input_image', image_url: attachment.for_llm }
|
|
177
|
+
end
|
|
178
|
+
when :pdf
|
|
179
|
+
{
|
|
180
|
+
type: 'input_file',
|
|
181
|
+
filename: File.basename(attachment.source.to_s),
|
|
182
|
+
file_data: attachment.for_llm
|
|
183
|
+
}
|
|
184
|
+
when :audio
|
|
185
|
+
{
|
|
186
|
+
type: 'input_audio',
|
|
187
|
+
data: attachment.for_llm,
|
|
188
|
+
format: detect_audio_format(attachment.source)
|
|
189
|
+
}
|
|
190
|
+
else
|
|
191
|
+
{ type: 'input_text', text: "[Unsupported attachment: #{attachment.type}]" }
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def detect_audio_format(source)
|
|
196
|
+
ext = File.extname(source.to_s).downcase
|
|
197
|
+
case ext
|
|
198
|
+
when '.mp3' then 'mp3'
|
|
199
|
+
when '.wav' then 'wav'
|
|
200
|
+
when '.webm' then 'webm'
|
|
201
|
+
when '.ogg' then 'ogg'
|
|
202
|
+
when '.flac' then 'flac'
|
|
203
|
+
else 'mp3'
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def extract_text_content(content)
|
|
208
|
+
case content
|
|
209
|
+
when String
|
|
210
|
+
content
|
|
211
|
+
when RubyLLM::Content
|
|
212
|
+
content.text
|
|
213
|
+
when Hash
|
|
214
|
+
content[:text] || content['text']
|
|
215
|
+
else
|
|
216
|
+
content.to_s
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def format_role(role)
|
|
221
|
+
case role
|
|
222
|
+
when :system then 'developer'
|
|
223
|
+
when :assistant then 'assistant'
|
|
224
|
+
when :tool then 'user' # Tool results come from user perspective
|
|
225
|
+
else role.to_s
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def extract_output_text(output)
|
|
230
|
+
output
|
|
231
|
+
.select { |item| item['type'] == 'message' }
|
|
232
|
+
.flat_map { |item| item['content'] || [] }
|
|
233
|
+
.select { |c| c['type'] == 'output_text' }
|
|
234
|
+
.map { |c| c['text'] }
|
|
235
|
+
.join
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def extract_tool_calls(output)
|
|
239
|
+
function_calls = output.select { |item| item['type'] == 'function_call' }
|
|
240
|
+
return nil if function_calls.empty?
|
|
241
|
+
|
|
242
|
+
function_calls.to_h do |fc|
|
|
243
|
+
[
|
|
244
|
+
fc['call_id'],
|
|
245
|
+
ToolCall.new(
|
|
246
|
+
id: fc['call_id'],
|
|
247
|
+
name: fc['name'],
|
|
248
|
+
arguments: parse_arguments(fc['arguments'])
|
|
249
|
+
)
|
|
250
|
+
]
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def parse_arguments(arguments)
|
|
255
|
+
return {} if arguments.nil? || arguments.empty?
|
|
256
|
+
return arguments if arguments.is_a?(Hash)
|
|
257
|
+
|
|
258
|
+
JSON.parse(arguments)
|
|
259
|
+
rescue JSON::ParserError
|
|
260
|
+
{ raw: arguments }
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAIResponses
|
|
6
|
+
# Media handling methods for the OpenAI Responses API.
|
|
7
|
+
# Handles images, audio, PDFs, and other file types.
|
|
8
|
+
module Media
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def format_content(content)
|
|
12
|
+
return content if content.is_a?(RubyLLM::Content::Raw)
|
|
13
|
+
return content unless content.is_a?(RubyLLM::Content)
|
|
14
|
+
|
|
15
|
+
parts = []
|
|
16
|
+
parts << format_text(content.text) if content.text && !content.text.empty?
|
|
17
|
+
|
|
18
|
+
content.attachments.each do |attachment|
|
|
19
|
+
parts << format_attachment(attachment)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Return simple string for text-only content
|
|
23
|
+
return content.text if parts.length == 1 && parts.first[:type] == 'input_text'
|
|
24
|
+
|
|
25
|
+
parts
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def format_text(text)
|
|
29
|
+
{ type: 'input_text', text: text }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def format_attachment(attachment)
|
|
33
|
+
case attachment.type
|
|
34
|
+
when :image
|
|
35
|
+
format_image(attachment)
|
|
36
|
+
when :pdf
|
|
37
|
+
format_pdf(attachment)
|
|
38
|
+
when :audio
|
|
39
|
+
format_audio(attachment)
|
|
40
|
+
else
|
|
41
|
+
format_unknown(attachment)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def format_image(image)
|
|
46
|
+
if image.url?
|
|
47
|
+
{
|
|
48
|
+
type: 'input_image',
|
|
49
|
+
image_url: image.source
|
|
50
|
+
}
|
|
51
|
+
else
|
|
52
|
+
{
|
|
53
|
+
type: 'input_image',
|
|
54
|
+
image_url: image.for_llm
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def format_pdf(pdf)
|
|
60
|
+
{
|
|
61
|
+
type: 'input_file',
|
|
62
|
+
filename: extract_filename(pdf.source),
|
|
63
|
+
file_data: pdf.for_llm
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def format_audio(audio)
|
|
68
|
+
{
|
|
69
|
+
type: 'input_audio',
|
|
70
|
+
data: audio.for_llm,
|
|
71
|
+
format: detect_audio_format(audio.source)
|
|
72
|
+
}
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def format_unknown(attachment)
|
|
76
|
+
{
|
|
77
|
+
type: 'input_text',
|
|
78
|
+
text: "[Attachment: #{attachment.type}]"
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def extract_filename(source)
|
|
83
|
+
return 'file' unless source
|
|
84
|
+
|
|
85
|
+
if source.respond_to?(:path)
|
|
86
|
+
File.basename(source.path)
|
|
87
|
+
else
|
|
88
|
+
File.basename(source.to_s)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def detect_audio_format(source)
|
|
93
|
+
return 'mp3' unless source
|
|
94
|
+
|
|
95
|
+
ext = if source.respond_to?(:path)
|
|
96
|
+
File.extname(source.path)
|
|
97
|
+
else
|
|
98
|
+
File.extname(source.to_s)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
case ext.downcase
|
|
102
|
+
when '.mp3' then 'mp3'
|
|
103
|
+
when '.wav' then 'wav'
|
|
104
|
+
when '.webm' then 'webm'
|
|
105
|
+
when '.ogg' then 'ogg'
|
|
106
|
+
when '.flac' then 'flac'
|
|
107
|
+
when '.m4a' then 'm4a'
|
|
108
|
+
else 'mp3'
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAIResponses
|
|
6
|
+
# Extends RubyLLM::Message to support response_id for stateful conversations
|
|
7
|
+
module MessageExtension
|
|
8
|
+
attr_accessor :response_id
|
|
9
|
+
|
|
10
|
+
def self.included(base)
|
|
11
|
+
base.class_eval do
|
|
12
|
+
alias_method :original_initialize, :initialize
|
|
13
|
+
|
|
14
|
+
define_method(:initialize) do |options = {}|
|
|
15
|
+
original_initialize(options)
|
|
16
|
+
@response_id = options[:response_id]
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
alias_method :original_to_h, :to_h
|
|
20
|
+
|
|
21
|
+
define_method(:to_h) do
|
|
22
|
+
original_to_h.merge(response_id: response_id).compact
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Apply the extension to RubyLLM::Message
|
|
32
|
+
RubyLLM::Message.include(RubyLLM::Providers::OpenAIResponses::MessageExtension)
|