ruby_llm-responses_api 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAIResponses
6
+ # Model capabilities for OpenAI Responses API models.
7
+ # Defines which models support which features.
8
+ module Capabilities
9
+ module_function
10
+
11
+ # Models that support the Responses API
12
+ RESPONSES_API_MODELS = %w[
13
+ gpt-4o gpt-4o-mini gpt-4o-2024-05-13 gpt-4o-2024-08-06 gpt-4o-2024-11-20
14
+ gpt-4o-mini-2024-07-18
15
+ gpt-4.1 gpt-4.1-mini gpt-4.1-nano
16
+ gpt-4-turbo gpt-4-turbo-2024-04-09 gpt-4-turbo-preview
17
+ o1 o1-mini o1-preview o1-2024-12-17
18
+ o3 o3-mini o4-mini
19
+ chatgpt-4o-latest
20
+ ].freeze
21
+
22
+ # Models with vision capabilities
23
+ VISION_MODELS = %w[
24
+ gpt-4o gpt-4o-mini gpt-4o-2024-05-13 gpt-4o-2024-08-06 gpt-4o-2024-11-20
25
+ gpt-4o-mini-2024-07-18
26
+ gpt-4.1 gpt-4.1-mini gpt-4.1-nano
27
+ gpt-4-turbo gpt-4-turbo-2024-04-09
28
+ o1 o3 o4-mini
29
+ chatgpt-4o-latest
30
+ ].freeze
31
+
32
+ # Reasoning models (o-series)
33
+ REASONING_MODELS = %w[o1 o1-mini o1-preview o1-2024-12-17 o3 o3-mini o4-mini].freeze
34
+
35
+ # Models that support web search
36
+ WEB_SEARCH_MODELS = %w[
37
+ gpt-4o gpt-4o-mini gpt-4.1 gpt-4.1-mini gpt-4.1-nano
38
+ o1 o3 o3-mini o4-mini
39
+ ].freeze
40
+
41
+ # Models that support code interpreter
42
+ CODE_INTERPRETER_MODELS = %w[
43
+ gpt-4o gpt-4o-mini gpt-4.1 gpt-4.1-mini gpt-4.1-nano
44
+ o1 o3 o3-mini o4-mini
45
+ ].freeze
46
+
47
+ # Context windows by model
48
+ CONTEXT_WINDOWS = {
49
+ 'gpt-4o' => 128_000,
50
+ 'gpt-4o-mini' => 128_000,
51
+ 'gpt-4o-2024-05-13' => 128_000,
52
+ 'gpt-4o-2024-08-06' => 128_000,
53
+ 'gpt-4o-2024-11-20' => 128_000,
54
+ 'gpt-4o-mini-2024-07-18' => 128_000,
55
+ 'gpt-4.1' => 1_000_000,
56
+ 'gpt-4.1-mini' => 1_000_000,
57
+ 'gpt-4.1-nano' => 1_000_000,
58
+ 'gpt-4-turbo' => 128_000,
59
+ 'gpt-4-turbo-2024-04-09' => 128_000,
60
+ 'o1' => 200_000,
61
+ 'o1-mini' => 128_000,
62
+ 'o1-preview' => 128_000,
63
+ 'o3' => 200_000,
64
+ 'o3-mini' => 200_000,
65
+ 'o4-mini' => 200_000
66
+ }.freeze
67
+
68
+ # Max output tokens by model
69
+ MAX_OUTPUT_TOKENS = {
70
+ 'gpt-4o' => 16_384,
71
+ 'gpt-4o-mini' => 16_384,
72
+ 'gpt-4o-2024-05-13' => 4_096,
73
+ 'gpt-4o-2024-08-06' => 16_384,
74
+ 'gpt-4o-2024-11-20' => 16_384,
75
+ 'gpt-4o-mini-2024-07-18' => 16_384,
76
+ 'gpt-4.1' => 32_768,
77
+ 'gpt-4.1-mini' => 32_768,
78
+ 'gpt-4.1-nano' => 32_768,
79
+ 'gpt-4-turbo' => 4_096,
80
+ 'o1' => 100_000,
81
+ 'o1-mini' => 65_536,
82
+ 'o3' => 100_000,
83
+ 'o3-mini' => 100_000,
84
+ 'o4-mini' => 100_000
85
+ }.freeze
86
+
87
+ # Pricing per million tokens (as of late 2024)
88
+ PRICING = {
89
+ 'gpt-4o' => { input: 2.50, output: 10.00, cached_input: 1.25 },
90
+ 'gpt-4o-mini' => { input: 0.15, output: 0.60, cached_input: 0.075 },
91
+ 'gpt-4.1' => { input: 2.00, output: 8.00, cached_input: 0.50 },
92
+ 'gpt-4.1-mini' => { input: 0.40, output: 1.60, cached_input: 0.10 },
93
+ 'gpt-4.1-nano' => { input: 0.10, output: 0.40, cached_input: 0.025 },
94
+ 'o1' => { input: 15.00, output: 60.00, cached_input: 7.50 },
95
+ 'o1-mini' => { input: 1.10, output: 4.40, cached_input: 0.55 },
96
+ 'o3' => { input: 10.00, output: 40.00, cached_input: 2.50 },
97
+ 'o3-mini' => { input: 1.10, output: 4.40, cached_input: 0.275 },
98
+ 'o4-mini' => { input: 1.10, output: 4.40, cached_input: 0.275 }
99
+ }.freeze
100
+
101
+ def supports_responses_api?(model_id)
102
+ model_matches?(model_id, RESPONSES_API_MODELS)
103
+ end
104
+
105
+ def supports_vision?(model_id)
106
+ model_matches?(model_id, VISION_MODELS)
107
+ end
108
+
109
+ def supports_functions?(model_id)
110
+ supports_responses_api?(model_id)
111
+ end
112
+
113
+ def supports_structured_output?(model_id)
114
+ supports_responses_api?(model_id)
115
+ end
116
+
117
+ def supports_web_search?(model_id)
118
+ model_matches?(model_id, WEB_SEARCH_MODELS)
119
+ end
120
+
121
+ def supports_code_interpreter?(model_id)
122
+ model_matches?(model_id, CODE_INTERPRETER_MODELS)
123
+ end
124
+
125
+ def reasoning_model?(model_id)
126
+ model_matches?(model_id, REASONING_MODELS)
127
+ end
128
+
129
+ def context_window_for(model_id)
130
+ find_capability(model_id, CONTEXT_WINDOWS) || 128_000
131
+ end
132
+
133
+ def max_tokens_for(model_id)
134
+ find_capability(model_id, MAX_OUTPUT_TOKENS) || 16_384
135
+ end
136
+
137
+ def input_price_for(model_id)
138
+ pricing = find_capability(model_id, PRICING)
139
+ pricing ? pricing[:input] : 0.0
140
+ end
141
+
142
+ def output_price_for(model_id)
143
+ pricing = find_capability(model_id, PRICING)
144
+ pricing ? pricing[:output] : 0.0
145
+ end
146
+
147
+ def pricing_for(model_id)
148
+ pricing = find_capability(model_id, PRICING) || { input: 0.0, output: 0.0 }
149
+ {
150
+ text_tokens: {
151
+ standard: {
152
+ input_per_million: pricing[:input],
153
+ output_per_million: pricing[:output],
154
+ cached_input_per_million: pricing[:cached_input] || (pricing[:input] / 2)
155
+ }
156
+ }
157
+ }
158
+ end
159
+
160
+ def modalities_for(model_id)
161
+ input = ['text']
162
+ input << 'image' if supports_vision?(model_id)
163
+
164
+ {
165
+ input: input,
166
+ output: ['text']
167
+ }
168
+ end
169
+
170
+ def capabilities_for(model_id)
171
+ caps = %w[streaming function_calling structured_output]
172
+ caps << 'vision' if supports_vision?(model_id)
173
+ caps << 'web_search' if supports_web_search?(model_id)
174
+ caps << 'code_interpreter' if supports_code_interpreter?(model_id)
175
+ caps << 'reasoning' if reasoning_model?(model_id)
176
+ caps
177
+ end
178
+
179
+ def model_family(model_id)
180
+ case model_id
181
+ when /^gpt-4\.1/ then 'gpt-4.1'
182
+ when /^gpt-4o-mini/ then 'gpt-4o-mini'
183
+ when /^gpt-4o/ then 'gpt-4o'
184
+ when /^gpt-4-turbo/ then 'gpt-4-turbo'
185
+ when /^o1/ then 'o1'
186
+ when /^o3/ then 'o3'
187
+ when /^o4/ then 'o4'
188
+ else 'other'
189
+ end
190
+ end
191
+
192
+ def format_display_name(model_id)
193
+ model_id
194
+ .gsub(/[-_]/, ' ')
195
+ .split
196
+ .map(&:capitalize)
197
+ .join(' ')
198
+ end
199
+
200
+ # Temperature is not supported for reasoning models
201
+ def normalize_temperature(temperature, model_id)
202
+ return nil if reasoning_model?(model_id)
203
+
204
+ temperature
205
+ end
206
+
207
+ private_class_method def find_capability(model_id, mapping)
208
+ # Direct match
209
+ return mapping[model_id] if mapping.key?(model_id)
210
+
211
+ # Try base model name (without date suffix)
212
+ base_model = model_id.gsub(/-\d{4}-\d{2}-\d{2}$/, '')
213
+ return mapping[base_model] if mapping.key?(base_model)
214
+
215
+ nil
216
+ end
217
+
218
+ private_class_method def model_matches?(model_id, model_list)
219
+ model_list.any? do |pattern|
220
+ model_id == pattern || model_id.start_with?("#{pattern}-")
221
+ end
222
+ end
223
+ end
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAIResponses
6
+ # Chat completion methods for the OpenAI Responses API.
7
+ # Handles converting RubyLLM messages to Responses API format and parsing responses.
8
+ module Chat
9
+ def completion_url
10
+ 'responses'
11
+ end
12
+
13
+ module_function
14
+
15
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
16
+ # Extract system messages for instructions
17
+ system_messages = messages.select { |m| m.role == :system }
18
+ non_system_messages = messages.reject { |m| m.role == :system }
19
+
20
+ instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n")
21
+
22
+ payload = {
23
+ model: model.id,
24
+ input: format_input(non_system_messages),
25
+ stream: stream
26
+ }
27
+
28
+ payload[:instructions] = instructions unless instructions.empty?
29
+ payload[:temperature] = temperature unless temperature.nil?
30
+
31
+ payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
32
+
33
+ if schema
34
+ payload[:text] = {
35
+ format: {
36
+ type: 'json_schema',
37
+ name: 'response',
38
+ schema: schema,
39
+ strict: schema[:strict] != false
40
+ }
41
+ }
42
+ end
43
+
44
+ # Auto-chain conversations: find the last response_id from assistant messages
45
+ # This enables automatic stateful conversations without manual tracking
46
+ last_response_id = extract_last_response_id(messages)
47
+ payload[:previous_response_id] = last_response_id if last_response_id
48
+
49
+ payload
50
+ end
51
+
52
+ def extract_last_response_id(messages)
53
+ messages
54
+ .select { |m| m.role == :assistant && m.respond_to?(:response_id) }
55
+ .map(&:response_id)
56
+ .compact
57
+ .last
58
+ end
59
+
60
+ def parse_completion_response(response)
61
+ data = response.body
62
+ return if data.nil? || data.empty?
63
+
64
+ data = JSON.parse(data) if data.is_a?(String)
65
+
66
+ raise RubyLLM::Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
67
+
68
+ output = data['output'] || []
69
+
70
+ # Extract text content from output
71
+ content = extract_output_text(output)
72
+
73
+ # Extract tool calls from function_call outputs
74
+ tool_calls = extract_tool_calls(output)
75
+
76
+ usage = data['usage'] || {}
77
+ cached_tokens = usage.dig('input_tokens_details', 'cached_tokens')
78
+
79
+ Message.new(
80
+ role: :assistant,
81
+ content: content,
82
+ tool_calls: tool_calls,
83
+ input_tokens: usage['input_tokens'],
84
+ output_tokens: usage['output_tokens'],
85
+ cached_tokens: cached_tokens,
86
+ cache_creation_tokens: 0,
87
+ model_id: data['model'],
88
+ response_id: data['id'],
89
+ raw: response
90
+ )
91
+ end
92
+
93
+ def format_input(messages) # rubocop:disable Metrics/MethodLength
94
+ result = []
95
+
96
+ messages.each do |msg|
97
+ if msg.tool_call_id
98
+ # Tool result message - function_call_output type
99
+ result << {
100
+ type: 'function_call_output',
101
+ call_id: msg.tool_call_id,
102
+ output: extract_text_content(msg.content)
103
+ }
104
+ elsif msg.tool_calls&.any?
105
+ # Assistant message with tool calls
106
+ # First add any text content as a message
107
+ text = extract_text_content(msg.content)
108
+ if text && !text.empty?
109
+ result << {
110
+ type: 'message',
111
+ role: 'assistant',
112
+ content: text
113
+ }
114
+ end
115
+
116
+ # Then add each function call as a separate item
117
+ msg.tool_calls.each_value do |tc|
118
+ result << {
119
+ type: 'function_call',
120
+ call_id: tc.id,
121
+ name: tc.name,
122
+ arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
123
+ }
124
+ end
125
+ else
126
+ # Regular message
127
+ result << {
128
+ type: 'message',
129
+ role: format_role(msg.role),
130
+ content: format_message_content(msg.content, nil)
131
+ }
132
+ end
133
+ end
134
+
135
+ result
136
+ end
137
+
138
+ def format_message_content(content, tool_calls = nil)
139
+ parts = []
140
+
141
+ # Add text content
142
+ text = extract_text_content(content)
143
+ parts << { type: 'input_text', text: text } if text && !text.empty?
144
+
145
+ # Add attachments if present
146
+ if content.is_a?(RubyLLM::Content)
147
+ content.attachments.each do |attachment|
148
+ parts << format_attachment(attachment)
149
+ end
150
+ end
151
+
152
+ # Add tool calls if present (for assistant messages)
153
+ if tool_calls&.any?
154
+ tool_calls.each_value do |tc|
155
+ parts << {
156
+ type: 'function_call',
157
+ call_id: tc.id,
158
+ name: tc.name,
159
+ arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
160
+ }
161
+ end
162
+ end
163
+
164
+ # Return simple text for single text content
165
+ return parts.first[:text] if parts.length == 1 && parts.first[:type] == 'input_text'
166
+
167
+ parts
168
+ end
169
+
170
+ def format_attachment(attachment)
171
+ case attachment.type
172
+ when :image
173
+ if attachment.url?
174
+ { type: 'input_image', image_url: attachment.source }
175
+ else
176
+ { type: 'input_image', image_url: attachment.for_llm }
177
+ end
178
+ when :pdf
179
+ {
180
+ type: 'input_file',
181
+ filename: File.basename(attachment.source.to_s),
182
+ file_data: attachment.for_llm
183
+ }
184
+ when :audio
185
+ {
186
+ type: 'input_audio',
187
+ data: attachment.for_llm,
188
+ format: detect_audio_format(attachment.source)
189
+ }
190
+ else
191
+ { type: 'input_text', text: "[Unsupported attachment: #{attachment.type}]" }
192
+ end
193
+ end
194
+
195
+ def detect_audio_format(source)
196
+ ext = File.extname(source.to_s).downcase
197
+ case ext
198
+ when '.mp3' then 'mp3'
199
+ when '.wav' then 'wav'
200
+ when '.webm' then 'webm'
201
+ when '.ogg' then 'ogg'
202
+ when '.flac' then 'flac'
203
+ else 'mp3'
204
+ end
205
+ end
206
+
207
+ def extract_text_content(content)
208
+ case content
209
+ when String
210
+ content
211
+ when RubyLLM::Content
212
+ content.text
213
+ when Hash
214
+ content[:text] || content['text']
215
+ else
216
+ content.to_s
217
+ end
218
+ end
219
+
220
+ def format_role(role)
221
+ case role
222
+ when :system then 'developer'
223
+ when :assistant then 'assistant'
224
+ when :tool then 'user' # Tool results come from user perspective
225
+ else role.to_s
226
+ end
227
+ end
228
+
229
+ def extract_output_text(output)
230
+ output
231
+ .select { |item| item['type'] == 'message' }
232
+ .flat_map { |item| item['content'] || [] }
233
+ .select { |c| c['type'] == 'output_text' }
234
+ .map { |c| c['text'] }
235
+ .join
236
+ end
237
+
238
+ def extract_tool_calls(output)
239
+ function_calls = output.select { |item| item['type'] == 'function_call' }
240
+ return nil if function_calls.empty?
241
+
242
+ function_calls.to_h do |fc|
243
+ [
244
+ fc['call_id'],
245
+ ToolCall.new(
246
+ id: fc['call_id'],
247
+ name: fc['name'],
248
+ arguments: parse_arguments(fc['arguments'])
249
+ )
250
+ ]
251
+ end
252
+ end
253
+
254
+ def parse_arguments(arguments)
255
+ return {} if arguments.nil? || arguments.empty?
256
+ return arguments if arguments.is_a?(Hash)
257
+
258
+ JSON.parse(arguments)
259
+ rescue JSON::ParserError
260
+ { raw: arguments }
261
+ end
262
+ end
263
+ end
264
+ end
265
+ end
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAIResponses
6
+ # Media handling methods for the OpenAI Responses API.
7
+ # Handles images, audio, PDFs, and other file types.
8
+ module Media
9
+ module_function
10
+
11
+ def format_content(content)
12
+ return content if content.is_a?(RubyLLM::Content::Raw)
13
+ return content unless content.is_a?(RubyLLM::Content)
14
+
15
+ parts = []
16
+ parts << format_text(content.text) if content.text && !content.text.empty?
17
+
18
+ content.attachments.each do |attachment|
19
+ parts << format_attachment(attachment)
20
+ end
21
+
22
+ # Return simple string for text-only content
23
+ return content.text if parts.length == 1 && parts.first[:type] == 'input_text'
24
+
25
+ parts
26
+ end
27
+
28
+ def format_text(text)
29
+ { type: 'input_text', text: text }
30
+ end
31
+
32
+ def format_attachment(attachment)
33
+ case attachment.type
34
+ when :image
35
+ format_image(attachment)
36
+ when :pdf
37
+ format_pdf(attachment)
38
+ when :audio
39
+ format_audio(attachment)
40
+ else
41
+ format_unknown(attachment)
42
+ end
43
+ end
44
+
45
+ def format_image(image)
46
+ if image.url?
47
+ {
48
+ type: 'input_image',
49
+ image_url: image.source
50
+ }
51
+ else
52
+ {
53
+ type: 'input_image',
54
+ image_url: image.for_llm
55
+ }
56
+ end
57
+ end
58
+
59
+ def format_pdf(pdf)
60
+ {
61
+ type: 'input_file',
62
+ filename: extract_filename(pdf.source),
63
+ file_data: pdf.for_llm
64
+ }
65
+ end
66
+
67
+ def format_audio(audio)
68
+ {
69
+ type: 'input_audio',
70
+ data: audio.for_llm,
71
+ format: detect_audio_format(audio.source)
72
+ }
73
+ end
74
+
75
+ def format_unknown(attachment)
76
+ {
77
+ type: 'input_text',
78
+ text: "[Attachment: #{attachment.type}]"
79
+ }
80
+ end
81
+
82
+ def extract_filename(source)
83
+ return 'file' unless source
84
+
85
+ if source.respond_to?(:path)
86
+ File.basename(source.path)
87
+ else
88
+ File.basename(source.to_s)
89
+ end
90
+ end
91
+
92
+ def detect_audio_format(source)
93
+ return 'mp3' unless source
94
+
95
+ ext = if source.respond_to?(:path)
96
+ File.extname(source.path)
97
+ else
98
+ File.extname(source.to_s)
99
+ end
100
+
101
+ case ext.downcase
102
+ when '.mp3' then 'mp3'
103
+ when '.wav' then 'wav'
104
+ when '.webm' then 'webm'
105
+ when '.ogg' then 'ogg'
106
+ when '.flac' then 'flac'
107
+ when '.m4a' then 'm4a'
108
+ else 'mp3'
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAIResponses
6
+ # Extends RubyLLM::Message to support response_id for stateful conversations
7
+ module MessageExtension
8
+ attr_accessor :response_id
9
+
10
+ def self.included(base)
11
+ base.class_eval do
12
+ alias_method :original_initialize, :initialize
13
+
14
+ define_method(:initialize) do |options = {}|
15
+ original_initialize(options)
16
+ @response_id = options[:response_id]
17
+ end
18
+
19
+ alias_method :original_to_h, :to_h
20
+
21
+ define_method(:to_h) do
22
+ original_to_h.merge(response_id: response_id).compact
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ # Apply the extension to RubyLLM::Message
32
+ RubyLLM::Message.include(RubyLLM::Providers::OpenAIResponses::MessageExtension)