ruby_llm 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -3
  3. data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
  4. data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
  5. data/lib/ruby_llm/active_record/acts_as.rb +3 -0
  6. data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
  7. data/lib/ruby_llm/active_record/chat_methods.rb +39 -22
  8. data/lib/ruby_llm/active_record/message_methods.rb +17 -1
  9. data/lib/ruby_llm/active_record/model_methods.rb +7 -9
  10. data/lib/ruby_llm/active_record/payload_helpers.rb +3 -0
  11. data/lib/ruby_llm/active_record/tool_call_methods.rb +3 -0
  12. data/lib/ruby_llm/agent.rb +3 -2
  13. data/lib/ruby_llm/aliases.json +34 -4
  14. data/lib/ruby_llm/attachment.rb +11 -27
  15. data/lib/ruby_llm/chat.rb +62 -21
  16. data/lib/ruby_llm/cost.rb +224 -0
  17. data/lib/ruby_llm/image.rb +37 -4
  18. data/lib/ruby_llm/message.rb +20 -0
  19. data/lib/ruby_llm/model/info.rb +17 -0
  20. data/lib/ruby_llm/model/pricing_category.rb +13 -2
  21. data/lib/ruby_llm/models.json +25168 -20374
  22. data/lib/ruby_llm/models.rb +2 -1
  23. data/lib/ruby_llm/models_schema.json +3 -0
  24. data/lib/ruby_llm/provider.rb +10 -3
  25. data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
  26. data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
  27. data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
  28. data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
  29. data/lib/ruby_llm/providers/gemini/images.rb +2 -2
  30. data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
  31. data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
  32. data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
  33. data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
  34. data/lib/ruby_llm/providers/openai/capabilities.rb +82 -12
  35. data/lib/ruby_llm/providers/openai/chat.rb +45 -6
  36. data/lib/ruby_llm/providers/openai/images.rb +58 -6
  37. data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
  38. data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
  39. data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
  40. data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
  41. data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
  42. data/lib/ruby_llm/railtie.rb +6 -0
  43. data/lib/ruby_llm/tokens.rb +8 -0
  44. data/lib/ruby_llm/tool.rb +24 -7
  45. data/lib/ruby_llm/version.rb +1 -1
  46. data/lib/ruby_llm.rb +2 -4
  47. data/lib/tasks/models.rake +13 -12
  48. metadata +19 -4
@@ -356,7 +356,8 @@ module RubyLLM
356
356
  text_standard = {
357
357
  input_per_million: cost[:input],
358
358
  output_per_million: cost[:output],
359
- cached_input_per_million: cost[:cache_read],
359
+ cache_read_input_per_million: cost[:cache_read],
360
+ cache_write_input_per_million: cost[:cache_write],
360
361
  reasoning_output_per_million: cost[:reasoning]
361
362
  }.compact
362
363
 
@@ -87,7 +87,10 @@
87
87
  "type": "object",
88
88
  "properties": {
89
89
  "input_per_million": {"type": "number", "minimum": 0},
90
+ "cache_read_input_per_million": {"type": "number", "minimum": 0},
91
+ "cache_write_input_per_million": {"type": "number", "minimum": 0},
90
92
  "cached_input_per_million": {"type": "number", "minimum": 0},
93
+ "cache_creation_input_per_million": {"type": "number", "minimum": 0},
91
94
  "output_per_million": {"type": "number", "minimum": 0},
92
95
  "reasoning_output_per_million": {"type": "number", "minimum": 0}
93
96
  }
@@ -81,9 +81,10 @@ module RubyLLM
81
81
  parse_moderation_response(response, model:)
82
82
  end
83
83
 
84
- def paint(prompt, model:, size:)
85
- payload = render_image_payload(prompt, model:, size:)
86
- response = @connection.post images_url, payload
84
+ def paint(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
85
+ validate_paint_inputs!(with:, mask:)
86
+ payload = render_image_payload(prompt, model:, size:, with:, mask:, params:)
87
+ response = @connection.post images_url(with:, mask:), payload
87
88
  parse_image_response(response, model:)
88
89
  end
89
90
 
@@ -225,6 +226,12 @@ module RubyLLM
225
226
 
226
227
  private
227
228
 
229
+ def validate_paint_inputs!(with:, mask:)
230
+ return if with.nil? && mask.nil?
231
+
232
+ raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
233
+ end
234
+
228
235
  def build_audio_file_part(file_path)
229
236
  expanded_path = File.expand_path(file_path)
230
237
  mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))
@@ -45,10 +45,13 @@ module RubyLLM
45
45
  end
46
46
 
47
47
  def format_tool_result_block(msg)
48
+ content = msg.content
49
+ content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
50
+
48
51
  {
49
52
  type: 'tool_result',
50
53
  tool_use_id: msg.tool_call_id,
51
- content: Media.format_content(msg.content)
54
+ content: Media.format_content(content)
52
55
  }
53
56
  end
54
57
 
@@ -56,7 +56,7 @@ module RubyLLM
56
56
  content: parse_text_content(content_blocks),
57
57
  thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
58
58
  tool_calls: parse_tool_calls(content_blocks),
59
- input_tokens: usage['inputTokens'],
59
+ input_tokens: input_tokens(usage),
60
60
  output_tokens: usage['outputTokens'],
61
61
  cached_tokens: usage['cacheReadInputTokens'],
62
62
  cache_creation_tokens: usage['cacheWriteInputTokens'],
@@ -66,6 +66,13 @@ module RubyLLM
66
66
  )
67
67
  end
68
68
 
69
+ def input_tokens(usage)
70
+ input_tokens = usage['inputTokens']
71
+ return unless input_tokens
72
+
73
+ [input_tokens.to_i - usage['cacheReadInputTokens'].to_i - usage['cacheWriteInputTokens'].to_i, 0].max
74
+ end
75
+
69
76
  def render_messages(messages)
70
77
  rendered = []
71
78
  tool_result_blocks = []
@@ -154,19 +161,23 @@ module RubyLLM
154
161
 
155
162
  def render_tool_result_content(content)
156
163
  return render_raw_tool_result_content(content.value) if content.is_a?(RubyLLM::Content::Raw)
164
+ return [{ json: content }] if content.is_a?(Hash) || content.is_a?(Array)
165
+ return render_content_tool_result_content(content) if content.is_a?(RubyLLM::Content)
157
166
 
158
- if content.is_a?(Hash) || content.is_a?(Array)
159
- [{ json: content }]
160
- elsif content.is_a?(RubyLLM::Content)
161
- blocks = []
162
- blocks << { text: content.text } if content.text
163
- content.attachments.each do |attachment|
164
- blocks << { text: attachment.for_llm }
165
- end
166
- blocks
167
- else
168
- [{ text: content.to_s }]
169
- end
167
+ [text_tool_result_block(content)]
168
+ end
169
+
170
+ def render_content_tool_result_content(content)
171
+ blocks = []
172
+ blocks << text_tool_result_block(content.text) unless content.text.to_s.empty?
173
+ content.attachments.each { |attachment| blocks << text_tool_result_block(attachment.for_llm) }
174
+ blocks.empty? ? [text_tool_result_block(nil)] : blocks
175
+ end
176
+
177
+ def text_tool_result_block(text)
178
+ text = text.to_s
179
+ text = '(no output)' if text.empty?
180
+ { text: text }
170
181
  end
171
182
 
172
183
  def render_raw_tool_result_content(raw_value)
@@ -158,7 +158,10 @@ module RubyLLM
158
158
  end
159
159
 
160
160
  def extract_input_tokens(metadata_usage, usage, message_usage)
161
- metadata_usage['inputTokens'] || usage['inputTokens'] || message_usage['input_tokens']
161
+ bedrock_usage = metadata_usage['inputTokens'] ? metadata_usage : usage
162
+ return Bedrock::Chat.input_tokens(bedrock_usage) if bedrock_usage['inputTokens']
163
+
164
+ message_usage['input_tokens']
162
165
  end
163
166
 
164
167
  def extract_output_tokens(metadata_usage, usage)
@@ -118,7 +118,7 @@ module RubyLLM
118
118
  signature: extract_thought_signature(parts)
119
119
  ),
120
120
  tool_calls: tool_calls,
121
- input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
121
+ input_tokens: input_tokens(data),
122
122
  output_tokens: calculate_output_tokens(data),
123
123
  cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
124
124
  thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
@@ -127,6 +127,13 @@ module RubyLLM
127
127
  )
128
128
  end
129
129
 
130
+ def input_tokens(data)
131
+ prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
132
+ return unless prompt_tokens
133
+
134
+ [prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
135
+ end
136
+
130
137
  def convert_schema_to_gemini(schema)
131
138
  return nil unless schema
132
139
 
@@ -5,11 +5,11 @@ module RubyLLM
5
5
  class Gemini
6
6
  # Image generation methods for the Gemini API implementation
7
7
  module Images
8
- def images_url
8
+ def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
9
9
  "models/#{@model}:predict"
10
10
  end
11
11
 
12
- def render_image_payload(prompt, model:, size:)
12
+ def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
13
13
  RubyLLM.logger.debug { "Ignoring size #{size}. Gemini does not support image size customization." }
14
14
  @model = model
15
15
  {
@@ -70,7 +70,10 @@ module RubyLLM
70
70
  end
71
71
 
72
72
  def extract_input_tokens(data)
73
- data.dig('usageMetadata', 'promptTokenCount')
73
+ prompt_tokens = data.dig('usageMetadata', 'promptTokenCount')
74
+ return unless prompt_tokens
75
+
76
+ [prompt_tokens.to_i - data.dig('usageMetadata', 'cachedContentTokenCount').to_i, 0].max
74
77
  end
75
78
 
76
79
  def extract_output_tokens(data)
@@ -46,13 +46,15 @@ module RubyLLM
46
46
 
47
47
  def format_tool_result(msg, function_name = nil)
48
48
  function_name ||= msg.tool_call_id
49
+ content = msg.content
50
+ content = '(no output)' if content.nil? || (content.respond_to?(:empty?) && content.empty?)
49
51
 
50
52
  [{
51
53
  functionResponse: {
52
54
  name: function_name,
53
55
  response: {
54
56
  name: function_name,
55
- content: Media.format_content(msg.content)
57
+ content: Media.format_content(content)
56
58
  }
57
59
  }
58
60
  }]
@@ -31,6 +31,11 @@ module RubyLLM
31
31
  !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
32
32
  end
33
33
 
34
+ def supports_reasoning?(model_id)
35
+ model_id.match?(/magistral/) ||
36
+ model_id.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
37
+ end
38
+
34
39
  def format_display_name(model_id)
35
40
  case model_id
36
41
  when /mistral-large/ then 'Mistral Large'
@@ -101,7 +106,7 @@ module RubyLLM
101
106
  capabilities << 'structured_output' if supports_json_mode?(model_id)
102
107
  capabilities << 'vision' if supports_vision?(model_id)
103
108
 
104
- capabilities << 'reasoning' if model_id.match?(/magistral/)
109
+ capabilities << 'reasoning' if supports_reasoning?(model_id)
105
110
  capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
106
111
  capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
107
112
  capabilities << 'distillation' if model_id.match?(/ministral/)
@@ -27,12 +27,30 @@ module RubyLLM
27
27
  schema: nil, thinking: nil, tool_prefs: nil)
28
28
  payload = super
29
29
  payload.delete(:stream_options)
30
- payload.delete(:reasoning_effort)
31
- warn_on_unsupported_thinking(model, thinking)
30
+ configure_thinking_payload(payload, model, thinking)
31
+ normalize_required_tool_choice(payload)
32
32
  payload
33
33
  end
34
34
  # rubocop:enable Metrics/ParameterLists
35
35
 
36
+ def build_tool_choice(tool_choice)
37
+ return 'any' if tool_choice == :required
38
+
39
+ OpenAI::Tools.build_tool_choice(tool_choice)
40
+ end
41
+
42
+ def normalize_required_tool_choice(payload)
43
+ return unless payload[:tool_choice] == 'any' && Array(payload[:tools]).one?
44
+
45
+ function_name = payload.dig(:tools, 0, :function, :name)
46
+ return unless function_name
47
+
48
+ payload[:tool_choice] = {
49
+ type: 'function',
50
+ function: { name: function_name }
51
+ }
52
+ end
53
+
36
54
  def format_content_with_thinking(msg)
37
55
  formatted_content = OpenAI::Media.format_content(msg.content)
38
56
  return formatted_content unless msg.role == :assistant && msg.thinking
@@ -45,14 +63,47 @@ module RubyLLM
45
63
 
46
64
  def warn_on_unsupported_thinking(model, thinking)
47
65
  return unless thinking&.enabled?
48
- return if model.id.to_s.include?('magistral')
66
+ return if native_reasoning_model?(model.id) || adjustable_reasoning_model?(model.id)
49
67
 
50
68
  RubyLLM.logger.warn(
51
- 'Mistral thinking is only supported on Magistral models. ' \
69
+ 'Mistral thinking is only supported on Magistral and adjustable-reasoning models. ' \
52
70
  "Ignoring thinking settings for #{model.id}."
53
71
  )
54
72
  end
55
73
 
74
+ def configure_thinking_payload(payload, model, thinking)
75
+ return unless thinking&.enabled?
76
+
77
+ if native_reasoning_model?(model.id)
78
+ configure_native_reasoning_payload(payload, thinking)
79
+ elsif adjustable_reasoning_model?(model.id)
80
+ payload[:reasoning_effort] = reasoning_effort_for(thinking)
81
+ else
82
+ payload.delete(:reasoning_effort)
83
+ warn_on_unsupported_thinking(model, thinking)
84
+ end
85
+ end
86
+
87
+ def configure_native_reasoning_payload(payload, thinking)
88
+ payload.delete(:reasoning_effort)
89
+ payload[:prompt_mode] = thinking.effort == 'none' ? nil : 'reasoning'
90
+ end
91
+
92
+ def reasoning_effort_for(thinking)
93
+ effort = thinking.respond_to?(:effort) ? thinking.effort : nil
94
+ return effort if %w[high none].include?(effort)
95
+
96
+ 'high'
97
+ end
98
+
99
+ def native_reasoning_model?(model_id)
100
+ model_id.to_s.include?('magistral')
101
+ end
102
+
103
+ def adjustable_reasoning_model?(model_id)
104
+ model_id.to_s.match?(/\Amistral-(?:small-latest|medium-(?:3(?:[.-]5)?|latest))\z/)
105
+ end
106
+
56
107
  def build_thinking_blocks(thinking)
57
108
  return [] unless thinking
58
109
 
@@ -8,6 +8,9 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  MODEL_PATTERNS = {
11
+ gpt_image15: /^gpt-image-1\.5/,
12
+ gpt_image_mini: /^gpt-image-1-mini/,
13
+ gpt_image: /^gpt-image-1(?:$|-)/,
11
14
  gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
12
15
  gpt41_mini: /^gpt-4\.1-mini/,
13
16
  gpt41_nano: /^gpt-4\.1-nano/,
@@ -43,6 +46,18 @@ module RubyLLM
43
46
  }.freeze
44
47
 
45
48
  PRICES = {
49
+ gpt_image: {
50
+ text: { input: 5.0, cached_input: 1.25 },
51
+ images: { input: 10.0, cached_input: 2.5, output: 40.0 }
52
+ },
53
+ gpt_image_mini: {
54
+ text: { input: 2.0, cached_input: 0.2 },
55
+ images: { input: 2.5, cached_input: 0.25, output: 8.0 }
56
+ },
57
+ gpt_image15: {
58
+ text: { input: 5.0, cached_input: 1.25, output: 10.0 },
59
+ images: { input: 8.0, cached_input: 2.0, output: 32.0 }
60
+ },
46
61
  gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
47
62
  gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
48
63
  gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
@@ -77,6 +92,20 @@ module RubyLLM
77
92
  moderation: { price: 0.0 }
78
93
  }.freeze
79
94
 
95
+ NIL_LIMIT_FAMILIES = %w[
96
+ gpt_image
97
+ gpt_image_mini
98
+ gpt_image15
99
+ gpt4o_mini_tts
100
+ tts1
101
+ tts1_hd
102
+ whisper
103
+ moderation
104
+ embedding3_large
105
+ embedding3_small
106
+ embedding_ada
107
+ ].freeze
108
+
80
109
  def supports_tool_choice?(_model_id)
81
110
  true
82
111
  end
@@ -86,7 +115,10 @@ module RubyLLM
86
115
  end
87
116
 
88
117
  def context_window_for(model_id)
89
- case model_family(model_id)
118
+ family = model_family(model_id)
119
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
120
+
121
+ case family
90
122
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
91
123
  when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
92
124
  'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
@@ -95,14 +127,15 @@ module RubyLLM
95
127
  when 'gpt4o_mini_transcribe' then 16_000
96
128
  when 'o1', 'o1_pro', 'o3_mini' then 200_000
97
129
  when 'gpt35_turbo' then 16_385
98
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
99
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
100
130
  else 4_096
101
131
  end
102
132
  end
103
133
 
104
134
  def max_tokens_for(model_id)
105
- case model_family(model_id)
135
+ family = model_family(model_id)
136
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
137
+
138
+ case family
106
139
  when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
107
140
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
108
141
  when 'gpt4' then 8_192
@@ -110,8 +143,6 @@ module RubyLLM
110
143
  when 'gpt4o_mini_transcribe' then 2_000
111
144
  when 'o1', 'o1_pro', 'o3_mini' then 100_000
112
145
  when 'o1_mini' then 65_536
113
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
114
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
115
146
  else 16_384
116
147
  end
117
148
  end
@@ -126,13 +157,15 @@ module RubyLLM
126
157
  end
127
158
 
128
159
  def pricing_for(model_id)
160
+ return image_pricing_for(model_id) if image_model?(model_id)
161
+
129
162
  standard_pricing = {
130
163
  input_per_million: input_price_for(model_id),
131
164
  output_per_million: output_price_for(model_id)
132
165
  }
133
166
 
134
167
  cached_price = cached_input_price_for(model_id)
135
- standard_pricing[:cached_input_per_million] = cached_price if cached_price
168
+ standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
136
169
 
137
170
  { text_tokens: { standard: standard_pricing } }
138
171
  end
@@ -147,8 +180,9 @@ module RubyLLM
147
180
 
148
181
  def supports_vision?(model_id)
149
182
  case model_family(model_id)
150
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
151
- 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
183
+ when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
184
+ 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
185
+ 'gpt4o_search'
152
186
  true
153
187
  else
154
188
  false
@@ -176,27 +210,63 @@ module RubyLLM
176
210
  end
177
211
 
178
212
  def input_price_for(model_id)
213
+ return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
214
+
179
215
  price_for(model_id, :input, 0.50)
180
216
  end
181
217
 
182
218
  def output_price_for(model_id)
219
+ return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
220
+
183
221
  price_for(model_id, :output, 1.50)
184
222
  end
185
223
 
186
224
  def cached_input_price_for(model_id)
225
+ return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
226
+
187
227
  family = model_family(model_id).to_sym
188
228
  PRICES.fetch(family, {})[:cached_input]
189
229
  end
190
230
 
231
+ def image_model?(model_id)
232
+ %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
233
+ end
234
+
235
+ def image_pricing_for(model_id)
236
+ text_pricing = {
237
+ input_per_million: input_price_for(model_id)
238
+ }
239
+ cached_text_price = cached_input_price_for(model_id)
240
+ text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
241
+
242
+ image_pricing = {
243
+ input_per_million: family_prices(model_id).dig(:images, :input),
244
+ output_per_million: family_prices(model_id).dig(:images, :output)
245
+ }
246
+ cached_image_price = family_prices(model_id).dig(:images, :cached_input)
247
+ image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
248
+
249
+ {
250
+ text_tokens: { standard: text_pricing },
251
+ images: { standard: image_pricing }
252
+ }
253
+ end
254
+
191
255
  def price_for(model_id, key, fallback)
192
- family = model_family(model_id).to_sym
193
- prices = PRICES.fetch(family, { key => fallback })
256
+ prices = family_prices(model_id)
257
+ prices = { key => fallback } if prices.empty?
194
258
  prices[key] || prices[:price] || fallback
195
259
  end
196
260
 
261
+ def family_prices(model_id)
262
+ family = model_family(model_id).to_sym
263
+ PRICES.fetch(family, {})
264
+ end
265
+
197
266
  module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
198
267
  :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
199
- :input_price_for, :output_price_for, :cached_input_price_for, :price_for
268
+ :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
269
+ :image_pricing_for, :price_for, :family_prices
200
270
  end
201
271
  end
202
272
  end
@@ -61,8 +61,7 @@ module RubyLLM
61
61
  return unless message_data
62
62
 
63
63
  usage = data['usage'] || {}
64
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
65
- thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
64
+ thinking_tokens = thinking_tokens(usage)
66
65
  content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
67
66
  thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
68
67
  thinking_signature = extract_thinking_signature(message_data)
@@ -72,16 +71,56 @@ module RubyLLM
72
71
  content: content,
73
72
  thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
74
73
  tool_calls: parse_tool_calls(message_data['tool_calls']),
75
- input_tokens: usage['prompt_tokens'],
76
- output_tokens: usage['completion_tokens'],
77
- cached_tokens: cached_tokens,
78
- cache_creation_tokens: 0,
74
+ input_tokens: input_tokens(usage),
75
+ output_tokens: output_tokens(usage),
76
+ cached_tokens: cache_read_tokens(usage),
77
+ cache_creation_tokens: cache_write_tokens(usage),
79
78
  thinking_tokens: thinking_tokens,
80
79
  model_id: data['model'],
81
80
  raw: response
82
81
  )
83
82
  end
84
83
 
84
+ def input_tokens(usage)
85
+ return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
86
+
87
+ prompt_tokens = usage['prompt_tokens']
88
+ return unless prompt_tokens
89
+
90
+ [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
91
+ end
92
+
93
+ def output_tokens(usage)
94
+ completion_tokens = usage['completion_tokens']
95
+ return unless completion_tokens
96
+
97
+ completion_tokens = completion_tokens.to_i
98
+ generated_tokens = generated_tokens_from_total(usage)
99
+ return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
100
+
101
+ generated_tokens
102
+ end
103
+
104
+ def generated_tokens_from_total(usage)
105
+ prompt_tokens = usage['prompt_tokens']
106
+ total_tokens = usage['total_tokens']
107
+ return unless prompt_tokens && total_tokens
108
+
109
+ [total_tokens.to_i - prompt_tokens.to_i, 0].max
110
+ end
111
+
112
+ def cache_read_tokens(usage)
113
+ usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
114
+ end
115
+
116
+ def cache_write_tokens(usage)
117
+ usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
118
+ end
119
+
120
+ def thinking_tokens(usage)
121
+ usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
122
+ end
123
+
85
124
  def format_messages(messages)
86
125
  messages.map do |msg|
87
126
  {
@@ -7,31 +7,83 @@ module RubyLLM
7
7
  module Images
8
8
  module_function
9
9
 
10
- def images_url
11
- 'images/generations'
10
+ def images_url(with: nil, mask: nil)
11
+ editing?(with, mask) ? 'images/edits' : 'images/generations'
12
12
  end
13
13
 
14
- def render_image_payload(prompt, model:, size:)
14
+ def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
15
+ return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
16
+
15
17
  {
16
18
  model: model,
17
19
  prompt: prompt,
18
20
  n: 1,
19
21
  size: size
20
- }
22
+ }.merge(params)
21
23
  end
22
24
 
23
25
  def parse_image_response(response, model:)
24
26
  data = response.body
25
- image_data = data['data'].first
27
+ image_data = Array(data['data']).first
28
+
29
+ raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
26
30
 
27
31
  Image.new(
28
32
  url: image_data['url'],
29
33
  mime_type: 'image/png', # DALL-E typically returns PNGs
30
34
  revised_prompt: image_data['revised_prompt'],
31
35
  model_id: model,
32
- data: image_data['b64_json']
36
+ data: image_data['b64_json'],
37
+ usage: data['usage'] || {}
33
38
  )
34
39
  end
40
+
41
+ def validate_paint_inputs!(with:, mask:)
42
+ return unless editing?(with, mask)
43
+
44
+ raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
45
+ end
46
+
47
+ def render_edit_payload(prompt, model:, with:, mask:, params:)
48
+ payload = params.merge(
49
+ model: model,
50
+ prompt: prompt,
51
+ image: build_upload_parts(with, label: 'images'),
52
+ n: 1
53
+ )
54
+ payload[:mask] = build_upload_part(mask, label: 'mask') if mask
55
+ payload
56
+ end
57
+
58
+ def build_upload_parts(sources, label:)
59
+ Array(sources).filter_map do |source|
60
+ next if blank_attachment?(source)
61
+
62
+ build_upload_part(source, label:)
63
+ end
64
+ end
65
+
66
+ def build_upload_part(source, label:)
67
+ attachment = Attachment.new(source)
68
+ unless attachment.image?
69
+ raise UnsupportedAttachmentError,
70
+ "OpenAI image editing only supports image attachments for #{label}"
71
+ end
72
+
73
+ Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
74
+ end
75
+
76
+ def editing?(with, mask)
77
+ attachments?(with) || !mask.nil?
78
+ end
79
+
80
+ def attachments?(value)
81
+ Array(value).any? { |item| !blank_attachment?(item) }
82
+ end
83
+
84
+ def blank_attachment?(value)
85
+ value.nil? || (value.is_a?(String) && value.strip.empty?)
86
+ end
35
87
  end
36
88
  end
37
89
  end