ruby_llm 1.14.1 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -3
- data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
- data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
- data/lib/ruby_llm/active_record/acts_as.rb +3 -0
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
- data/lib/ruby_llm/active_record/chat_methods.rb +39 -22
- data/lib/ruby_llm/active_record/message_methods.rb +17 -1
- data/lib/ruby_llm/active_record/model_methods.rb +7 -9
- data/lib/ruby_llm/active_record/payload_helpers.rb +3 -0
- data/lib/ruby_llm/active_record/tool_call_methods.rb +3 -0
- data/lib/ruby_llm/agent.rb +3 -2
- data/lib/ruby_llm/aliases.json +34 -4
- data/lib/ruby_llm/attachment.rb +11 -27
- data/lib/ruby_llm/chat.rb +62 -21
- data/lib/ruby_llm/cost.rb +224 -0
- data/lib/ruby_llm/image.rb +37 -4
- data/lib/ruby_llm/message.rb +20 -0
- data/lib/ruby_llm/model/info.rb +17 -0
- data/lib/ruby_llm/model/pricing_category.rb +13 -2
- data/lib/ruby_llm/models.json +25168 -20374
- data/lib/ruby_llm/models.rb +2 -1
- data/lib/ruby_llm/models_schema.json +3 -0
- data/lib/ruby_llm/provider.rb +10 -3
- data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
- data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
- data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
- data/lib/ruby_llm/providers/gemini/images.rb +2 -2
- data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
- data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
- data/lib/ruby_llm/providers/openai/capabilities.rb +82 -12
- data/lib/ruby_llm/providers/openai/chat.rb +45 -6
- data/lib/ruby_llm/providers/openai/images.rb +58 -6
- data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
- data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
- data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
- data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
- data/lib/ruby_llm/railtie.rb +6 -0
- data/lib/ruby_llm/tokens.rb +8 -0
- data/lib/ruby_llm/tool.rb +24 -7
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm.rb +2 -4
- data/lib/tasks/models.rake +13 -12
- metadata +19 -4
data/lib/ruby_llm/models.rb
CHANGED
|
@@ -356,7 +356,8 @@ module RubyLLM
|
|
|
356
356
|
text_standard = {
|
|
357
357
|
input_per_million: cost[:input],
|
|
358
358
|
output_per_million: cost[:output],
|
|
359
|
-
|
|
359
|
+
cache_read_input_per_million: cost[:cache_read],
|
|
360
|
+
cache_write_input_per_million: cost[:cache_write],
|
|
360
361
|
reasoning_output_per_million: cost[:reasoning]
|
|
361
362
|
}.compact
|
|
362
363
|
|
|
@@ -87,7 +87,10 @@
|
|
|
87
87
|
"type": "object",
|
|
88
88
|
"properties": {
|
|
89
89
|
"input_per_million": {"type": "number", "minimum": 0},
|
|
90
|
+
"cache_read_input_per_million": {"type": "number", "minimum": 0},
|
|
91
|
+
"cache_write_input_per_million": {"type": "number", "minimum": 0},
|
|
90
92
|
"cached_input_per_million": {"type": "number", "minimum": 0},
|
|
93
|
+
"cache_creation_input_per_million": {"type": "number", "minimum": 0},
|
|
91
94
|
"output_per_million": {"type": "number", "minimum": 0},
|
|
92
95
|
"reasoning_output_per_million": {"type": "number", "minimum": 0}
|
|
93
96
|
}
|
data/lib/ruby_llm/provider.rb
CHANGED
|
@@ -81,9 +81,10 @@ module RubyLLM
|
|
|
81
81
|
parse_moderation_response(response, model:)
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
-
def paint(prompt, model:, size:)
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
def paint(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
|
|
85
|
+
validate_paint_inputs!(with:, mask:)
|
|
86
|
+
payload = render_image_payload(prompt, model:, size:, with:, mask:, params:)
|
|
87
|
+
response = @connection.post images_url(with:, mask:), payload
|
|
87
88
|
parse_image_response(response, model:)
|
|
88
89
|
end
|
|
89
90
|
|
|
@@ -225,6 +226,12 @@ module RubyLLM
|
|
|
225
226
|
|
|
226
227
|
private
|
|
227
228
|
|
|
229
|
+
def validate_paint_inputs!(with:, mask:)
|
|
230
|
+
return if with.nil? && mask.nil?
|
|
231
|
+
|
|
232
|
+
raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
|
|
233
|
+
end
|
|
234
|
+
|
|
228
235
|
def build_audio_file_part(file_path)
|
|
229
236
|
expanded_path = File.expand_path(file_path)
|
|
230
237
|
mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))
|
|
@@ -45,10 +45,13 @@ module RubyLLM
|
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def format_tool_result_block(msg)
|
|
48
|
+
content = msg.content
|
|
49
|
+
content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
|
|
50
|
+
|
|
48
51
|
{
|
|
49
52
|
type: 'tool_result',
|
|
50
53
|
tool_use_id: msg.tool_call_id,
|
|
51
|
-
content: Media.format_content(
|
|
54
|
+
content: Media.format_content(content)
|
|
52
55
|
}
|
|
53
56
|
end
|
|
54
57
|
|
|
@@ -56,7 +56,7 @@ module RubyLLM
|
|
|
56
56
|
content: parse_text_content(content_blocks),
|
|
57
57
|
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
58
58
|
tool_calls: parse_tool_calls(content_blocks),
|
|
59
|
-
input_tokens: usage
|
|
59
|
+
input_tokens: input_tokens(usage),
|
|
60
60
|
output_tokens: usage['outputTokens'],
|
|
61
61
|
cached_tokens: usage['cacheReadInputTokens'],
|
|
62
62
|
cache_creation_tokens: usage['cacheWriteInputTokens'],
|
|
@@ -66,6 +66,13 @@ module RubyLLM
|
|
|
66
66
|
)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
def input_tokens(usage)
|
|
70
|
+
input_tokens = usage['inputTokens']
|
|
71
|
+
return unless input_tokens
|
|
72
|
+
|
|
73
|
+
[input_tokens.to_i - usage['cacheReadInputTokens'].to_i - usage['cacheWriteInputTokens'].to_i, 0].max
|
|
74
|
+
end
|
|
75
|
+
|
|
69
76
|
def render_messages(messages)
|
|
70
77
|
rendered = []
|
|
71
78
|
tool_result_blocks = []
|
|
@@ -154,19 +161,23 @@ module RubyLLM
|
|
|
154
161
|
|
|
155
162
|
def render_tool_result_content(content)
|
|
156
163
|
return render_raw_tool_result_content(content.value) if content.is_a?(RubyLLM::Content::Raw)
|
|
164
|
+
return [{ json: content }] if content.is_a?(Hash) || content.is_a?(Array)
|
|
165
|
+
return render_content_tool_result_content(content) if content.is_a?(RubyLLM::Content)
|
|
157
166
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
167
|
+
[text_tool_result_block(content)]
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def render_content_tool_result_content(content)
|
|
171
|
+
blocks = []
|
|
172
|
+
blocks << text_tool_result_block(content.text) unless content.text.to_s.empty?
|
|
173
|
+
content.attachments.each { |attachment| blocks << text_tool_result_block(attachment.for_llm) }
|
|
174
|
+
blocks.empty? ? [text_tool_result_block(nil)] : blocks
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def text_tool_result_block(text)
|
|
178
|
+
text = text.to_s
|
|
179
|
+
text = '(no output)' if text.empty?
|
|
180
|
+
{ text: text }
|
|
170
181
|
end
|
|
171
182
|
|
|
172
183
|
def render_raw_tool_result_content(raw_value)
|
|
@@ -158,7 +158,10 @@ module RubyLLM
|
|
|
158
158
|
end
|
|
159
159
|
|
|
160
160
|
def extract_input_tokens(metadata_usage, usage, message_usage)
|
|
161
|
-
metadata_usage['inputTokens']
|
|
161
|
+
bedrock_usage = metadata_usage['inputTokens'] ? metadata_usage : usage
|
|
162
|
+
return Bedrock::Chat.input_tokens(bedrock_usage) if bedrock_usage['inputTokens']
|
|
163
|
+
|
|
164
|
+
message_usage['input_tokens']
|
|
162
165
|
end
|
|
163
166
|
|
|
164
167
|
def extract_output_tokens(metadata_usage, usage)
|
|
@@ -118,7 +118,7 @@ module RubyLLM
|
|
|
118
118
|
signature: extract_thought_signature(parts)
|
|
119
119
|
),
|
|
120
120
|
tool_calls: tool_calls,
|
|
121
|
-
input_tokens: data
|
|
121
|
+
input_tokens: input_tokens(data),
|
|
122
122
|
output_tokens: calculate_output_tokens(data),
|
|
123
123
|
cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
|
|
124
124
|
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
|
|
@@ -127,6 +127,13 @@ module RubyLLM
|
|
|
127
127
|
)
|
|
128
128
|
end
|
|
129
129
|
|
|
130
|
+
def input_tokens(data)
|
|
131
|
+
prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
|
|
132
|
+
return unless prompt_tokens
|
|
133
|
+
|
|
134
|
+
[prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
|
|
135
|
+
end
|
|
136
|
+
|
|
130
137
|
def convert_schema_to_gemini(schema)
|
|
131
138
|
return nil unless schema
|
|
132
139
|
|
|
@@ -5,11 +5,11 @@ module RubyLLM
|
|
|
5
5
|
class Gemini
|
|
6
6
|
# Image generation methods for the Gemini API implementation
|
|
7
7
|
module Images
|
|
8
|
-
def images_url
|
|
8
|
+
def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
9
9
|
"models/#{@model}:predict"
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
def render_image_payload(prompt, model:, size:)
|
|
12
|
+
def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
|
|
13
13
|
RubyLLM.logger.debug { "Ignoring size #{size}. Gemini does not support image size customization." }
|
|
14
14
|
@model = model
|
|
15
15
|
{
|
|
@@ -70,7 +70,10 @@ module RubyLLM
|
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
def extract_input_tokens(data)
|
|
73
|
-
data.dig('usageMetadata', 'promptTokenCount')
|
|
73
|
+
prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
|
|
74
|
+
return unless prompt_tokens
|
|
75
|
+
|
|
76
|
+
[prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
|
|
74
77
|
end
|
|
75
78
|
|
|
76
79
|
def extract_output_tokens(data)
|
|
@@ -46,13 +46,15 @@ module RubyLLM
|
|
|
46
46
|
|
|
47
47
|
def format_tool_result(msg, function_name = nil)
|
|
48
48
|
function_name ||= msg.tool_call_id
|
|
49
|
+
content = msg.content
|
|
50
|
+
content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
|
|
49
51
|
|
|
50
52
|
[{
|
|
51
53
|
functionResponse: {
|
|
52
54
|
name: function_name,
|
|
53
55
|
response: {
|
|
54
56
|
name: function_name,
|
|
55
|
-
content: Media.format_content(
|
|
57
|
+
content: Media.format_content(content)
|
|
56
58
|
}
|
|
57
59
|
}
|
|
58
60
|
}]
|
|
@@ -31,6 +31,11 @@ module RubyLLM
|
|
|
31
31
|
!model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
def supports_reasoning?(model_id)
|
|
35
|
+
model_id.match?(/magistral/) ||
|
|
36
|
+
model_id.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
|
|
37
|
+
end
|
|
38
|
+
|
|
34
39
|
def format_display_name(model_id)
|
|
35
40
|
case model_id
|
|
36
41
|
when /mistral-large/ then 'Mistral Large'
|
|
@@ -101,7 +106,7 @@ module RubyLLM
|
|
|
101
106
|
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
|
102
107
|
capabilities << 'vision' if supports_vision?(model_id)
|
|
103
108
|
|
|
104
|
-
capabilities << 'reasoning' if
|
|
109
|
+
capabilities << 'reasoning' if supports_reasoning?(model_id)
|
|
105
110
|
capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
|
|
106
111
|
capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
|
|
107
112
|
capabilities << 'distillation' if model_id.match?(/ministral/)
|
|
@@ -27,12 +27,30 @@ module RubyLLM
|
|
|
27
27
|
schema: nil, thinking: nil, tool_prefs: nil)
|
|
28
28
|
payload = super
|
|
29
29
|
payload.delete(:stream_options)
|
|
30
|
-
payload
|
|
31
|
-
|
|
30
|
+
configure_thinking_payload(payload, model, thinking)
|
|
31
|
+
normalize_required_tool_choice(payload)
|
|
32
32
|
payload
|
|
33
33
|
end
|
|
34
34
|
# rubocop:enable Metrics/ParameterLists
|
|
35
35
|
|
|
36
|
+
def build_tool_choice(tool_choice)
|
|
37
|
+
return 'any' if tool_choice == :required
|
|
38
|
+
|
|
39
|
+
OpenAI::Tools.build_tool_choice(tool_choice)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def normalize_required_tool_choice(payload)
|
|
43
|
+
return unless payload[:tool_choice] == 'any' && Array(payload[:tools]).one?
|
|
44
|
+
|
|
45
|
+
function_name = payload.dig(:tools, 0, :function, :name)
|
|
46
|
+
return unless function_name
|
|
47
|
+
|
|
48
|
+
payload[:tool_choice] = {
|
|
49
|
+
type: 'function',
|
|
50
|
+
function: { name: function_name }
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
36
54
|
def format_content_with_thinking(msg)
|
|
37
55
|
formatted_content = OpenAI::Media.format_content(msg.content)
|
|
38
56
|
return formatted_content unless msg.role == :assistant && msg.thinking
|
|
@@ -45,14 +63,47 @@ module RubyLLM
|
|
|
45
63
|
|
|
46
64
|
def warn_on_unsupported_thinking(model, thinking)
|
|
47
65
|
return unless thinking&.enabled?
|
|
48
|
-
return if model.id
|
|
66
|
+
return if native_reasoning_model?(model.id) || adjustable_reasoning_model?(model.id)
|
|
49
67
|
|
|
50
68
|
RubyLLM.logger.warn(
|
|
51
|
-
'Mistral thinking is only supported on Magistral models. ' \
|
|
69
|
+
'Mistral thinking is only supported on Magistral and adjustable-reasoning models. ' \
|
|
52
70
|
"Ignoring thinking settings for #{model.id}."
|
|
53
71
|
)
|
|
54
72
|
end
|
|
55
73
|
|
|
74
|
+
def configure_thinking_payload(payload, model, thinking)
|
|
75
|
+
return unless thinking&.enabled?
|
|
76
|
+
|
|
77
|
+
if native_reasoning_model?(model.id)
|
|
78
|
+
configure_native_reasoning_payload(payload, thinking)
|
|
79
|
+
elsif adjustable_reasoning_model?(model.id)
|
|
80
|
+
payload[:reasoning_effort] = reasoning_effort_for(thinking)
|
|
81
|
+
else
|
|
82
|
+
payload.delete(:reasoning_effort)
|
|
83
|
+
warn_on_unsupported_thinking(model, thinking)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def configure_native_reasoning_payload(payload, thinking)
|
|
88
|
+
payload.delete(:reasoning_effort)
|
|
89
|
+
payload[:prompt_mode] = thinking.effort == 'none' ? nil : 'reasoning'
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def reasoning_effort_for(thinking)
|
|
93
|
+
effort = thinking.respond_to?(:effort) ? thinking.effort : nil
|
|
94
|
+
return effort if %w[high none].include?(effort)
|
|
95
|
+
|
|
96
|
+
'high'
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def native_reasoning_model?(model_id)
|
|
100
|
+
model_id.to_s.include?('magistral')
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def adjustable_reasoning_model?(model_id)
|
|
104
|
+
model_id.to_s.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
|
|
105
|
+
end
|
|
106
|
+
|
|
56
107
|
def build_thinking_blocks(thinking)
|
|
57
108
|
return [] unless thinking
|
|
58
109
|
|
|
@@ -8,6 +8,9 @@ module RubyLLM
|
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
10
|
MODEL_PATTERNS = {
|
|
11
|
+
gpt_image15: /^gpt-image-1\.5/,
|
|
12
|
+
gpt_image_mini: /^gpt-image-1-mini/,
|
|
13
|
+
gpt_image: /^gpt-image-1(?:$|-)/,
|
|
11
14
|
gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
|
|
12
15
|
gpt41_mini: /^gpt-4\.1-mini/,
|
|
13
16
|
gpt41_nano: /^gpt-4\.1-nano/,
|
|
@@ -43,6 +46,18 @@ module RubyLLM
|
|
|
43
46
|
}.freeze
|
|
44
47
|
|
|
45
48
|
PRICES = {
|
|
49
|
+
gpt_image: {
|
|
50
|
+
text: { input: 5.0, cached_input: 1.25 },
|
|
51
|
+
images: { input: 10.0, cached_input: 2.5, output: 40.0 }
|
|
52
|
+
},
|
|
53
|
+
gpt_image_mini: {
|
|
54
|
+
text: { input: 2.0, cached_input: 0.2 },
|
|
55
|
+
images: { input: 2.5, cached_input: 0.25, output: 8.0 }
|
|
56
|
+
},
|
|
57
|
+
gpt_image15: {
|
|
58
|
+
text: { input: 5.0, cached_input: 1.25, output: 10.0 },
|
|
59
|
+
images: { input: 8.0, cached_input: 2.0, output: 32.0 }
|
|
60
|
+
},
|
|
46
61
|
gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
|
|
47
62
|
gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
|
|
48
63
|
gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
|
|
@@ -77,6 +92,20 @@ module RubyLLM
|
|
|
77
92
|
moderation: { price: 0.0 }
|
|
78
93
|
}.freeze
|
|
79
94
|
|
|
95
|
+
NIL_LIMIT_FAMILIES = %w[
|
|
96
|
+
gpt_image
|
|
97
|
+
gpt_image_mini
|
|
98
|
+
gpt_image15
|
|
99
|
+
gpt4o_mini_tts
|
|
100
|
+
tts1
|
|
101
|
+
tts1_hd
|
|
102
|
+
whisper
|
|
103
|
+
moderation
|
|
104
|
+
embedding3_large
|
|
105
|
+
embedding3_small
|
|
106
|
+
embedding_ada
|
|
107
|
+
].freeze
|
|
108
|
+
|
|
80
109
|
def supports_tool_choice?(_model_id)
|
|
81
110
|
true
|
|
82
111
|
end
|
|
@@ -86,7 +115,10 @@ module RubyLLM
|
|
|
86
115
|
end
|
|
87
116
|
|
|
88
117
|
def context_window_for(model_id)
|
|
89
|
-
|
|
118
|
+
family = model_family(model_id)
|
|
119
|
+
return nil if NIL_LIMIT_FAMILIES.include?(family)
|
|
120
|
+
|
|
121
|
+
case family
|
|
90
122
|
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
|
|
91
123
|
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
|
|
92
124
|
'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
|
|
@@ -95,14 +127,15 @@ module RubyLLM
|
|
|
95
127
|
when 'gpt4o_mini_transcribe' then 16_000
|
|
96
128
|
when 'o1', 'o1_pro', 'o3_mini' then 200_000
|
|
97
129
|
when 'gpt35_turbo' then 16_385
|
|
98
|
-
when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
|
|
99
|
-
'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
|
|
100
130
|
else 4_096
|
|
101
131
|
end
|
|
102
132
|
end
|
|
103
133
|
|
|
104
134
|
def max_tokens_for(model_id)
|
|
105
|
-
|
|
135
|
+
family = model_family(model_id)
|
|
136
|
+
return nil if NIL_LIMIT_FAMILIES.include?(family)
|
|
137
|
+
|
|
138
|
+
case family
|
|
106
139
|
when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
|
|
107
140
|
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
|
|
108
141
|
when 'gpt4' then 8_192
|
|
@@ -110,8 +143,6 @@ module RubyLLM
|
|
|
110
143
|
when 'gpt4o_mini_transcribe' then 2_000
|
|
111
144
|
when 'o1', 'o1_pro', 'o3_mini' then 100_000
|
|
112
145
|
when 'o1_mini' then 65_536
|
|
113
|
-
when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
|
|
114
|
-
'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
|
|
115
146
|
else 16_384
|
|
116
147
|
end
|
|
117
148
|
end
|
|
@@ -126,13 +157,15 @@ module RubyLLM
|
|
|
126
157
|
end
|
|
127
158
|
|
|
128
159
|
def pricing_for(model_id)
|
|
160
|
+
return image_pricing_for(model_id) if image_model?(model_id)
|
|
161
|
+
|
|
129
162
|
standard_pricing = {
|
|
130
163
|
input_per_million: input_price_for(model_id),
|
|
131
164
|
output_per_million: output_price_for(model_id)
|
|
132
165
|
}
|
|
133
166
|
|
|
134
167
|
cached_price = cached_input_price_for(model_id)
|
|
135
|
-
standard_pricing[:
|
|
168
|
+
standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
|
|
136
169
|
|
|
137
170
|
{ text_tokens: { standard: standard_pricing } }
|
|
138
171
|
end
|
|
@@ -147,8 +180,9 @@ module RubyLLM
|
|
|
147
180
|
|
|
148
181
|
def supports_vision?(model_id)
|
|
149
182
|
case model_family(model_id)
|
|
150
|
-
when '
|
|
151
|
-
'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
|
|
183
|
+
when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
|
|
184
|
+
'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
|
|
185
|
+
'gpt4o_search'
|
|
152
186
|
true
|
|
153
187
|
else
|
|
154
188
|
false
|
|
@@ -176,27 +210,63 @@ module RubyLLM
|
|
|
176
210
|
end
|
|
177
211
|
|
|
178
212
|
def input_price_for(model_id)
|
|
213
|
+
return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
|
|
214
|
+
|
|
179
215
|
price_for(model_id, :input, 0.50)
|
|
180
216
|
end
|
|
181
217
|
|
|
182
218
|
def output_price_for(model_id)
|
|
219
|
+
return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
|
|
220
|
+
|
|
183
221
|
price_for(model_id, :output, 1.50)
|
|
184
222
|
end
|
|
185
223
|
|
|
186
224
|
def cached_input_price_for(model_id)
|
|
225
|
+
return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
|
|
226
|
+
|
|
187
227
|
family = model_family(model_id).to_sym
|
|
188
228
|
PRICES.fetch(family, {})[:cached_input]
|
|
189
229
|
end
|
|
190
230
|
|
|
231
|
+
def image_model?(model_id)
|
|
232
|
+
%w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def image_pricing_for(model_id)
|
|
236
|
+
text_pricing = {
|
|
237
|
+
input_per_million: input_price_for(model_id)
|
|
238
|
+
}
|
|
239
|
+
cached_text_price = cached_input_price_for(model_id)
|
|
240
|
+
text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
|
|
241
|
+
|
|
242
|
+
image_pricing = {
|
|
243
|
+
input_per_million: family_prices(model_id).dig(:images, :input),
|
|
244
|
+
output_per_million: family_prices(model_id).dig(:images, :output)
|
|
245
|
+
}
|
|
246
|
+
cached_image_price = family_prices(model_id).dig(:images, :cached_input)
|
|
247
|
+
image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
|
|
248
|
+
|
|
249
|
+
{
|
|
250
|
+
text_tokens: { standard: text_pricing },
|
|
251
|
+
images: { standard: image_pricing }
|
|
252
|
+
}
|
|
253
|
+
end
|
|
254
|
+
|
|
191
255
|
def price_for(model_id, key, fallback)
|
|
192
|
-
|
|
193
|
-
prices =
|
|
256
|
+
prices = family_prices(model_id)
|
|
257
|
+
prices = { key => fallback } if prices.empty?
|
|
194
258
|
prices[key] || prices[:price] || fallback
|
|
195
259
|
end
|
|
196
260
|
|
|
261
|
+
def family_prices(model_id)
|
|
262
|
+
family = model_family(model_id).to_sym
|
|
263
|
+
PRICES.fetch(family, {})
|
|
264
|
+
end
|
|
265
|
+
|
|
197
266
|
module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
|
|
198
267
|
:model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
|
|
199
|
-
:input_price_for, :output_price_for, :cached_input_price_for, :
|
|
268
|
+
:input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
|
|
269
|
+
:image_pricing_for, :price_for, :family_prices
|
|
200
270
|
end
|
|
201
271
|
end
|
|
202
272
|
end
|
|
@@ -61,8 +61,7 @@ module RubyLLM
|
|
|
61
61
|
return unless message_data
|
|
62
62
|
|
|
63
63
|
usage = data['usage'] || {}
|
|
64
|
-
|
|
65
|
-
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
64
|
+
thinking_tokens = thinking_tokens(usage)
|
|
66
65
|
content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
|
|
67
66
|
thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
|
|
68
67
|
thinking_signature = extract_thinking_signature(message_data)
|
|
@@ -72,16 +71,56 @@ module RubyLLM
|
|
|
72
71
|
content: content,
|
|
73
72
|
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
74
73
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
|
75
|
-
input_tokens: usage
|
|
76
|
-
output_tokens: usage
|
|
77
|
-
cached_tokens:
|
|
78
|
-
cache_creation_tokens:
|
|
74
|
+
input_tokens: input_tokens(usage),
|
|
75
|
+
output_tokens: output_tokens(usage),
|
|
76
|
+
cached_tokens: cache_read_tokens(usage),
|
|
77
|
+
cache_creation_tokens: cache_write_tokens(usage),
|
|
79
78
|
thinking_tokens: thinking_tokens,
|
|
80
79
|
model_id: data['model'],
|
|
81
80
|
raw: response
|
|
82
81
|
)
|
|
83
82
|
end
|
|
84
83
|
|
|
84
|
+
def input_tokens(usage)
|
|
85
|
+
return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
|
|
86
|
+
|
|
87
|
+
prompt_tokens = usage['prompt_tokens']
|
|
88
|
+
return unless prompt_tokens
|
|
89
|
+
|
|
90
|
+
[prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def output_tokens(usage)
|
|
94
|
+
completion_tokens = usage['completion_tokens']
|
|
95
|
+
return unless completion_tokens
|
|
96
|
+
|
|
97
|
+
completion_tokens = completion_tokens.to_i
|
|
98
|
+
generated_tokens = generated_tokens_from_total(usage)
|
|
99
|
+
return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
|
|
100
|
+
|
|
101
|
+
generated_tokens
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def generated_tokens_from_total(usage)
|
|
105
|
+
prompt_tokens = usage['prompt_tokens']
|
|
106
|
+
total_tokens = usage['total_tokens']
|
|
107
|
+
return unless prompt_tokens && total_tokens
|
|
108
|
+
|
|
109
|
+
[total_tokens.to_i - prompt_tokens.to_i, 0].max
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def cache_read_tokens(usage)
|
|
113
|
+
usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def cache_write_tokens(usage)
|
|
117
|
+
usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def thinking_tokens(usage)
|
|
121
|
+
usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
|
|
122
|
+
end
|
|
123
|
+
|
|
85
124
|
def format_messages(messages)
|
|
86
125
|
messages.map do |msg|
|
|
87
126
|
{
|
|
@@ -7,31 +7,83 @@ module RubyLLM
|
|
|
7
7
|
module Images
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def images_url
|
|
11
|
-
'images/generations'
|
|
10
|
+
def images_url(with: nil, mask: nil)
|
|
11
|
+
editing?(with, mask) ? 'images/edits' : 'images/generations'
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def render_image_payload(prompt, model:, size:)
|
|
14
|
+
def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
|
|
15
|
+
return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
|
|
16
|
+
|
|
15
17
|
{
|
|
16
18
|
model: model,
|
|
17
19
|
prompt: prompt,
|
|
18
20
|
n: 1,
|
|
19
21
|
size: size
|
|
20
|
-
}
|
|
22
|
+
}.merge(params)
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
def parse_image_response(response, model:)
|
|
24
26
|
data = response.body
|
|
25
|
-
image_data = data['data'].first
|
|
27
|
+
image_data = Array(data['data']).first
|
|
28
|
+
|
|
29
|
+
raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
|
|
26
30
|
|
|
27
31
|
Image.new(
|
|
28
32
|
url: image_data['url'],
|
|
29
33
|
mime_type: 'image/png', # DALL-E typically returns PNGs
|
|
30
34
|
revised_prompt: image_data['revised_prompt'],
|
|
31
35
|
model_id: model,
|
|
32
|
-
data: image_data['b64_json']
|
|
36
|
+
data: image_data['b64_json'],
|
|
37
|
+
usage: data['usage'] || {}
|
|
33
38
|
)
|
|
34
39
|
end
|
|
40
|
+
|
|
41
|
+
def validate_paint_inputs!(with:, mask:)
|
|
42
|
+
return unless editing?(with, mask)
|
|
43
|
+
|
|
44
|
+
raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def render_edit_payload(prompt, model:, with:, mask:, params:)
|
|
48
|
+
payload = params.merge(
|
|
49
|
+
model: model,
|
|
50
|
+
prompt: prompt,
|
|
51
|
+
image: build_upload_parts(with, label: 'images'),
|
|
52
|
+
n: 1
|
|
53
|
+
)
|
|
54
|
+
payload[:mask] = build_upload_part(mask, label: 'mask') if mask
|
|
55
|
+
payload
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_upload_parts(sources, label:)
|
|
59
|
+
Array(sources).filter_map do |source|
|
|
60
|
+
next if blank_attachment?(source)
|
|
61
|
+
|
|
62
|
+
build_upload_part(source, label:)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_upload_part(source, label:)
|
|
67
|
+
attachment = Attachment.new(source)
|
|
68
|
+
unless attachment.image?
|
|
69
|
+
raise UnsupportedAttachmentError,
|
|
70
|
+
"OpenAI image editing only supports image attachments for #{label}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def editing?(with, mask)
|
|
77
|
+
attachments?(with) || !mask.nil?
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def attachments?(value)
|
|
81
|
+
Array(value).any? { |item| !blank_attachment?(item) }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def blank_attachment?(value)
|
|
85
|
+
value.nil? || (value.is_a?(String) && value.strip.empty?)
|
|
86
|
+
end
|
|
35
87
|
end
|
|
36
88
|
end
|
|
37
89
|
end
|