ruby_llm 1.14.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +7 -5
  3. data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
  4. data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
  5. data/lib/ruby_llm/active_record/acts_as.rb +3 -0
  6. data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
  7. data/lib/ruby_llm/active_record/chat_methods.rb +47 -23
  8. data/lib/ruby_llm/active_record/message_methods.rb +19 -14
  9. data/lib/ruby_llm/active_record/model_methods.rb +7 -9
  10. data/lib/ruby_llm/active_record/payload_helpers.rb +29 -0
  11. data/lib/ruby_llm/active_record/tool_call_methods.rb +5 -15
  12. data/lib/ruby_llm/agent.rb +3 -2
  13. data/lib/ruby_llm/aliases.json +53 -14
  14. data/lib/ruby_llm/attachment.rb +11 -27
  15. data/lib/ruby_llm/chat.rb +62 -21
  16. data/lib/ruby_llm/cost.rb +224 -0
  17. data/lib/ruby_llm/image.rb +37 -4
  18. data/lib/ruby_llm/message.rb +20 -0
  19. data/lib/ruby_llm/model/info.rb +17 -0
  20. data/lib/ruby_llm/model/pricing_category.rb +13 -2
  21. data/lib/ruby_llm/models.json +26511 -24930
  22. data/lib/ruby_llm/models.rb +2 -1
  23. data/lib/ruby_llm/models_schema.json +3 -0
  24. data/lib/ruby_llm/provider.rb +10 -3
  25. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -133
  26. data/lib/ruby_llm/providers/anthropic/models.rb +2 -8
  27. data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
  28. data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
  29. data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
  30. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -119
  31. data/lib/ruby_llm/providers/gemini/capabilities.rb +45 -215
  32. data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
  33. data/lib/ruby_llm/providers/gemini/images.rb +2 -2
  34. data/lib/ruby_llm/providers/gemini/models.rb +2 -4
  35. data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
  36. data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
  37. data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
  38. data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
  39. data/lib/ruby_llm/providers/openai/capabilities.rb +157 -195
  40. data/lib/ruby_llm/providers/openai/chat.rb +45 -6
  41. data/lib/ruby_llm/providers/openai/images.rb +58 -6
  42. data/lib/ruby_llm/providers/openai/models.rb +2 -4
  43. data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
  44. data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
  45. data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
  46. data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
  47. data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
  48. data/lib/ruby_llm/providers/perplexity/capabilities.rb +34 -99
  49. data/lib/ruby_llm/providers/perplexity/models.rb +12 -14
  50. data/lib/ruby_llm/railtie.rb +6 -0
  51. data/lib/ruby_llm/tokens.rb +8 -0
  52. data/lib/ruby_llm/tool.rb +24 -7
  53. data/lib/ruby_llm/version.rb +1 -1
  54. data/lib/ruby_llm.rb +2 -4
  55. data/lib/tasks/models.rake +13 -12
  56. metadata +21 -5
@@ -3,13 +3,14 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class OpenAI
6
- # Determines capabilities and pricing for OpenAI models
6
+ # Provider-level capability checks and narrow registry fallbacks.
7
7
  module Capabilities
8
8
  module_function
9
9
 
10
10
  MODEL_PATTERNS = {
11
- dall_e: /^dall-e/,
12
- chatgpt4o: /^chatgpt-4o/,
11
+ gpt_image15: /^gpt-image-1\.5/,
12
+ gpt_image_mini: /^gpt-image-1-mini/,
13
+ gpt_image: /^gpt-image-1(?:$|-)/,
13
14
  gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
14
15
  gpt41_mini: /^gpt-4\.1-mini/,
15
16
  gpt41_nano: /^gpt-4\.1-nano/,
@@ -26,9 +27,9 @@ module RubyLLM
26
27
  gpt4o_realtime: /^gpt-4o-realtime/,
27
28
  gpt4o_search: /^gpt-4o-search/,
28
29
  gpt4o_transcribe: /^gpt-4o-transcribe/,
29
- gpt5: /^gpt-5/,
30
- gpt5_mini: /^gpt-5-mini/,
31
- gpt5_nano: /^gpt-5-nano/,
30
+ gpt5: /^gpt-5(?!.*(?:mini|nano))/,
31
+ gpt5_mini: /^gpt-5.*mini/,
32
+ gpt5_nano: /^gpt-5.*nano/,
32
33
  o1: /^o1(?!-(?:mini|pro))/,
33
34
  o1_mini: /^o1-mini/,
34
35
  o1_pro: /^o1-pro/,
@@ -44,101 +45,38 @@ module RubyLLM
44
45
  moderation: /^(?:omni|text)-moderation/
45
46
  }.freeze
46
47
 
47
- def context_window_for(model_id)
48
- case model_family(model_id)
49
- when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
50
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
51
- 'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
52
- 'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
53
- when 'gpt4' then 8_192
54
- when 'gpt4o_mini_transcribe' then 16_000
55
- when 'o1', 'o1_pro', 'o3_mini' then 200_000
56
- when 'gpt35_turbo' then 16_385
57
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
58
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
59
- else 4_096
60
- end
61
- end
62
-
63
- def max_tokens_for(model_id)
64
- case model_family(model_id)
65
- when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
66
- when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
67
- when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
68
- when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
69
- when 'gpt4' then 8_192
70
- when 'gpt35_turbo' then 4_096
71
- when 'gpt4_turbo', 'gpt4o_realtime', 'gpt4o_mini_realtime' then 4_096 # rubocop:disable Lint/DuplicateBranch
72
- when 'gpt4o_mini_transcribe' then 2_000
73
- when 'o1', 'o1_pro', 'o3_mini' then 100_000
74
- when 'o1_mini' then 65_536
75
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
76
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
77
- else 16_384 # rubocop:disable Lint/DuplicateBranch
78
- end
79
- end
80
-
81
- def supports_vision?(model_id)
82
- case model_family(model_id)
83
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
84
- 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
85
- 'gpt4o_mini_search' then true
86
- else false
87
- end
88
- end
89
-
90
- def supports_functions?(model_id)
91
- case model_family(model_id)
92
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
93
- 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
94
- when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
95
- 'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
96
- else false # rubocop:disable Lint/DuplicateBranch
97
- end
98
- end
99
-
100
- def supports_tool_choice?(_model_id)
101
- true
102
- end
103
-
104
- def supports_tool_parallel_control?(_model_id)
105
- true
106
- end
107
-
108
- def supports_structured_output?(model_id)
109
- case model_family(model_id)
110
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
111
- 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
112
- else false
113
- end
114
- end
115
-
116
- def supports_json_mode?(model_id)
117
- supports_structured_output?(model_id)
118
- end
119
-
120
48
  PRICES = {
49
+ gpt_image: {
50
+ text: { input: 5.0, cached_input: 1.25 },
51
+ images: { input: 10.0, cached_input: 2.5, output: 40.0 }
52
+ },
53
+ gpt_image_mini: {
54
+ text: { input: 2.0, cached_input: 0.2 },
55
+ images: { input: 2.5, cached_input: 0.25, output: 8.0 }
56
+ },
57
+ gpt_image15: {
58
+ text: { input: 5.0, cached_input: 1.25, output: 10.0 },
59
+ images: { input: 8.0, cached_input: 2.0, output: 32.0 }
60
+ },
121
61
  gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
122
62
  gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
123
63
  gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
124
64
  gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
125
65
  gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
126
66
  gpt41_nano: { input: 0.1, output: 0.4 },
127
- chatgpt4o: { input: 5.0, output: 15.0 },
128
67
  gpt4: { input: 10.0, output: 30.0 },
129
68
  gpt4_turbo: { input: 10.0, output: 30.0 },
130
- gpt45: { input: 75.0, output: 150.0 },
131
69
  gpt35_turbo: { input: 0.5, output: 1.5 },
132
70
  gpt4o: { input: 2.5, output: 10.0 },
133
- gpt4o_audio: { input: 2.5, output: 10.0, audio_input: 40.0, audio_output: 80.0 },
71
+ gpt4o_audio: { input: 2.5, output: 10.0 },
134
72
  gpt4o_mini: { input: 0.15, output: 0.6 },
135
- gpt4o_mini_audio: { input: 0.15, output: 0.6, audio_input: 10.0, audio_output: 20.0 },
73
+ gpt4o_mini_audio: { input: 0.15, output: 0.6 },
136
74
  gpt4o_mini_realtime: { input: 0.6, output: 2.4 },
137
- gpt4o_mini_transcribe: { input: 1.25, output: 5.0, audio_input: 3.0 },
75
+ gpt4o_mini_transcribe: { input: 1.25, output: 5.0 },
138
76
  gpt4o_mini_tts: { input: 0.6, output: 12.0 },
139
77
  gpt4o_realtime: { input: 5.0, output: 20.0 },
140
78
  gpt4o_search: { input: 2.5, output: 10.0 },
141
- gpt4o_transcribe: { input: 2.5, output: 10.0, audio_input: 6.0 },
79
+ gpt4o_transcribe: { input: 2.5, output: 10.0 },
142
80
  o1: { input: 15.0, output: 60.0 },
143
81
  o1_mini: { input: 1.1, output: 4.4 },
144
82
  o1_pro: { input: 150.0, output: 600.0 },
@@ -154,157 +92,181 @@ module RubyLLM
154
92
  moderation: { price: 0.0 }
155
93
  }.freeze
156
94
 
157
- def model_family(model_id)
158
- MODEL_PATTERNS.each do |family, pattern|
159
- return family.to_s if model_id.match?(pattern)
160
- end
161
- 'other'
162
- end
95
+ NIL_LIMIT_FAMILIES = %w[
96
+ gpt_image
97
+ gpt_image_mini
98
+ gpt_image15
99
+ gpt4o_mini_tts
100
+ tts1
101
+ tts1_hd
102
+ whisper
103
+ moderation
104
+ embedding3_large
105
+ embedding3_small
106
+ embedding_ada
107
+ ].freeze
163
108
 
164
- def input_price_for(model_id)
165
- family = model_family(model_id).to_sym
166
- prices = PRICES.fetch(family, { input: default_input_price })
167
- prices[:input] || prices[:price] || default_input_price
109
+ def supports_tool_choice?(_model_id)
110
+ true
168
111
  end
169
112
 
170
- def cached_input_price_for(model_id)
171
- family = model_family(model_id).to_sym
172
- prices = PRICES.fetch(family, {})
173
- prices[:cached_input]
113
+ def supports_tool_parallel_control?(_model_id)
114
+ true
174
115
  end
175
116
 
176
- def output_price_for(model_id)
177
- family = model_family(model_id).to_sym
178
- prices = PRICES.fetch(family, { output: default_output_price })
179
- prices[:output] || prices[:price] || default_output_price
180
- end
117
+ def context_window_for(model_id)
118
+ family = model_family(model_id)
119
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
181
120
 
182
- def model_type(model_id)
183
- case model_family(model_id)
184
- when /embedding/ then 'embedding'
185
- when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
186
- when 'moderation' then 'moderation'
187
- when /dall/ then 'image'
188
- else 'chat'
121
+ case family
122
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
123
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
124
+ 'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
125
+ 'gpt4o_transcribe', 'o1_mini' then 128_000
126
+ when 'gpt4' then 8_192
127
+ when 'gpt4o_mini_transcribe' then 16_000
128
+ when 'o1', 'o1_pro', 'o3_mini' then 200_000
129
+ when 'gpt35_turbo' then 16_385
130
+ else 4_096
189
131
  end
190
132
  end
191
133
 
192
- def default_input_price
193
- 0.50
194
- end
134
+ def max_tokens_for(model_id)
135
+ family = model_family(model_id)
136
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
195
137
 
196
- def default_output_price
197
- 1.50
138
+ case family
139
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
140
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
141
+ when 'gpt4' then 8_192
142
+ when 'gpt35_turbo' then 4_096
143
+ when 'gpt4o_mini_transcribe' then 2_000
144
+ when 'o1', 'o1_pro', 'o3_mini' then 100_000
145
+ when 'o1_mini' then 65_536
146
+ else 16_384
147
+ end
198
148
  end
199
149
 
200
- def format_display_name(model_id)
201
- model_id.then { |id| humanize(id) }
202
- .then { |name| apply_special_formatting(name) }
150
+ def critical_capabilities_for(model_id)
151
+ capabilities = []
152
+ capabilities << 'function_calling' if supports_functions?(model_id)
153
+ capabilities << 'structured_output' if supports_structured_output?(model_id)
154
+ capabilities << 'vision' if supports_vision?(model_id)
155
+ capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
156
+ capabilities
203
157
  end
204
158
 
205
- def humanize(id)
206
- id.tr('-', ' ')
207
- .split
208
- .map(&:capitalize)
209
- .join(' ')
210
- end
159
+ def pricing_for(model_id)
160
+ return image_pricing_for(model_id) if image_model?(model_id)
161
+
162
+ standard_pricing = {
163
+ input_per_million: input_price_for(model_id),
164
+ output_per_million: output_price_for(model_id)
165
+ }
211
166
 
212
- def apply_special_formatting(name)
213
- name
214
- .gsub(/(\d{4}) (\d{2}) (\d{2})/, '\1\2\3')
215
- .gsub(/^(?:Gpt|Chatgpt|Tts|Dall E) /) { |m| special_prefix_format(m.strip) }
216
- .gsub(/^O([13]) /, 'O\1-')
217
- .gsub(/^O[13] Mini/, '\0'.tr(' ', '-'))
218
- .gsub(/\d\.\d /, '\0'.sub(' ', '-'))
219
- .gsub(/4o (?=Mini|Preview|Turbo|Audio|Realtime|Transcribe|Tts)/, '4o-')
220
- .gsub(/\bHd\b/, 'HD')
221
- .gsub(/(?:Omni|Text) Moderation/, '\0'.tr(' ', '-'))
222
- .gsub('Text Embedding', 'text-embedding-')
167
+ cached_price = cached_input_price_for(model_id)
168
+ standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
169
+
170
+ { text_tokens: { standard: standard_pricing } }
223
171
  end
224
172
 
225
- def special_prefix_format(prefix)
226
- case prefix # rubocop:disable Style/HashLikeCase
227
- when 'Gpt' then 'GPT-'
228
- when 'Chatgpt' then 'ChatGPT-'
229
- when 'Tts' then 'TTS-'
230
- when 'Dall E' then 'DALL-E-'
173
+ def model_family(model_id)
174
+ MODEL_PATTERNS.each do |family, pattern|
175
+ return family.to_s if model_id.match?(pattern)
231
176
  end
177
+
178
+ 'other'
232
179
  end
233
180
 
234
- def self.normalize_temperature(temperature, model_id)
235
- if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
236
- RubyLLM.logger.debug { "Model #{model_id} requires temperature=1.0, setting that instead." }
237
- 1.0
238
- elsif model_id.match?(/-search/)
239
- RubyLLM.logger.debug { "Model #{model_id} does not accept temperature parameter, removing" }
240
- nil
181
+ def supports_vision?(model_id)
182
+ case model_family(model_id)
183
+ when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
184
+ 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
185
+ 'gpt4o_search'
186
+ true
241
187
  else
242
- temperature
188
+ false
243
189
  end
244
190
  end
245
191
 
246
- def self.temperature_close_to_one?(temperature)
247
- (temperature.to_f - 1.0).abs <= Float::EPSILON
192
+ def supports_functions?(model_id)
193
+ case model_family(model_id)
194
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
195
+ 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
196
+ true
197
+ else
198
+ false
199
+ end
248
200
  end
249
201
 
250
- def modalities_for(model_id)
251
- modalities = {
252
- input: ['text'],
253
- output: ['text']
254
- }
202
+ def supports_structured_output?(model_id)
203
+ case model_family(model_id)
204
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4o',
205
+ 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
206
+ true
207
+ else
208
+ false
209
+ end
210
+ end
255
211
 
256
- # Vision support
257
- modalities[:input] << 'image' if supports_vision?(model_id)
258
- modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
259
- modalities[:input] << 'pdf' if supports_vision?(model_id)
260
- modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
261
- modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
262
- modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
263
- modalities[:output] << 'moderation' if model_id.match?(/moderation/)
212
+ def input_price_for(model_id)
213
+ return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
264
214
 
265
- modalities
215
+ price_for(model_id, :input, 0.50)
266
216
  end
267
217
 
268
- def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
269
- capabilities = []
218
+ def output_price_for(model_id)
219
+ return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
270
220
 
271
- capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
272
- capabilities << 'function_calling' if supports_functions?(model_id)
273
- capabilities << 'structured_output' if supports_json_mode?(model_id)
274
- capabilities << 'batch' if model_id.match?(/embedding|batch/)
275
- capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
221
+ price_for(model_id, :output, 1.50)
222
+ end
276
223
 
277
- if model_id.match?(/gpt-4-turbo|gpt-4o/)
278
- capabilities << 'image_generation' if model_id.match?(/vision/)
279
- capabilities << 'speech_generation' if model_id.match?(/audio/)
280
- capabilities << 'transcription' if model_id.match?(/audio/)
281
- end
224
+ def cached_input_price_for(model_id)
225
+ return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
282
226
 
283
- capabilities
227
+ family = model_family(model_id).to_sym
228
+ PRICES.fetch(family, {})[:cached_input]
284
229
  end
285
230
 
286
- def pricing_for(model_id)
287
- standard_pricing = {
288
- input_per_million: input_price_for(model_id),
289
- output_per_million: output_price_for(model_id)
231
+ def image_model?(model_id)
232
+ %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
233
+ end
234
+
235
+ def image_pricing_for(model_id)
236
+ text_pricing = {
237
+ input_per_million: input_price_for(model_id)
290
238
  }
239
+ cached_text_price = cached_input_price_for(model_id)
240
+ text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
291
241
 
292
- if respond_to?(:cached_input_price_for)
293
- cached_price = cached_input_price_for(model_id)
294
- standard_pricing[:cached_input_per_million] = cached_price if cached_price
295
- end
242
+ image_pricing = {
243
+ input_per_million: family_prices(model_id).dig(:images, :input),
244
+ output_per_million: family_prices(model_id).dig(:images, :output)
245
+ }
246
+ cached_image_price = family_prices(model_id).dig(:images, :cached_input)
247
+ image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
296
248
 
297
- pricing = { text_tokens: { standard: standard_pricing } }
249
+ {
250
+ text_tokens: { standard: text_pricing },
251
+ images: { standard: image_pricing }
252
+ }
253
+ end
298
254
 
299
- if model_id.match?(/embedding|batch/)
300
- pricing[:text_tokens][:batch] = {
301
- input_per_million: standard_pricing[:input_per_million] * 0.5,
302
- output_per_million: standard_pricing[:output_per_million] * 0.5
303
- }
304
- end
255
+ def price_for(model_id, key, fallback)
256
+ prices = family_prices(model_id)
257
+ prices = { key => fallback } if prices.empty?
258
+ prices[key] || prices[:price] || fallback
259
+ end
305
260
 
306
- pricing
261
+ def family_prices(model_id)
262
+ family = model_family(model_id).to_sym
263
+ PRICES.fetch(family, {})
307
264
  end
265
+
266
+ module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
267
+ :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
268
+ :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
269
+ :image_pricing_for, :price_for, :family_prices
308
270
  end
309
271
  end
310
272
  end
@@ -61,8 +61,7 @@ module RubyLLM
61
61
  return unless message_data
62
62
 
63
63
  usage = data['usage'] || {}
64
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
65
- thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
64
+ thinking_tokens = thinking_tokens(usage)
66
65
  content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
67
66
  thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
68
67
  thinking_signature = extract_thinking_signature(message_data)
@@ -72,16 +71,56 @@ module RubyLLM
72
71
  content: content,
73
72
  thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
74
73
  tool_calls: parse_tool_calls(message_data['tool_calls']),
75
- input_tokens: usage['prompt_tokens'],
76
- output_tokens: usage['completion_tokens'],
77
- cached_tokens: cached_tokens,
78
- cache_creation_tokens: 0,
74
+ input_tokens: input_tokens(usage),
75
+ output_tokens: output_tokens(usage),
76
+ cached_tokens: cache_read_tokens(usage),
77
+ cache_creation_tokens: cache_write_tokens(usage),
79
78
  thinking_tokens: thinking_tokens,
80
79
  model_id: data['model'],
81
80
  raw: response
82
81
  )
83
82
  end
84
83
 
84
+ def input_tokens(usage)
85
+ return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
86
+
87
+ prompt_tokens = usage['prompt_tokens']
88
+ return unless prompt_tokens
89
+
90
+ [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
91
+ end
92
+
93
+ def output_tokens(usage)
94
+ completion_tokens = usage['completion_tokens']
95
+ return unless completion_tokens
96
+
97
+ completion_tokens = completion_tokens.to_i
98
+ generated_tokens = generated_tokens_from_total(usage)
99
+ return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
100
+
101
+ generated_tokens
102
+ end
103
+
104
+ def generated_tokens_from_total(usage)
105
+ prompt_tokens = usage['prompt_tokens']
106
+ total_tokens = usage['total_tokens']
107
+ return unless prompt_tokens && total_tokens
108
+
109
+ [total_tokens.to_i - prompt_tokens.to_i, 0].max
110
+ end
111
+
112
+ def cache_read_tokens(usage)
113
+ usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
114
+ end
115
+
116
+ def cache_write_tokens(usage)
117
+ usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
118
+ end
119
+
120
+ def thinking_tokens(usage)
121
+ usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
122
+ end
123
+
85
124
  def format_messages(messages)
86
125
  messages.map do |msg|
87
126
  {
@@ -7,31 +7,83 @@ module RubyLLM
7
7
  module Images
8
8
  module_function
9
9
 
10
- def images_url
11
- 'images/generations'
10
+ def images_url(with: nil, mask: nil)
11
+ editing?(with, mask) ? 'images/edits' : 'images/generations'
12
12
  end
13
13
 
14
- def render_image_payload(prompt, model:, size:)
14
+ def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
15
+ return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
16
+
15
17
  {
16
18
  model: model,
17
19
  prompt: prompt,
18
20
  n: 1,
19
21
  size: size
20
- }
22
+ }.merge(params)
21
23
  end
22
24
 
23
25
  def parse_image_response(response, model:)
24
26
  data = response.body
25
- image_data = data['data'].first
27
+ image_data = Array(data['data']).first
28
+
29
+ raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
26
30
 
27
31
  Image.new(
28
32
  url: image_data['url'],
29
33
  mime_type: 'image/png', # DALL-E typically returns PNGs
30
34
  revised_prompt: image_data['revised_prompt'],
31
35
  model_id: model,
32
- data: image_data['b64_json']
36
+ data: image_data['b64_json'],
37
+ usage: data['usage'] || {}
33
38
  )
34
39
  end
40
+
41
+ def validate_paint_inputs!(with:, mask:)
42
+ return unless editing?(with, mask)
43
+
44
+ raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
45
+ end
46
+
47
+ def render_edit_payload(prompt, model:, with:, mask:, params:)
48
+ payload = params.merge(
49
+ model: model,
50
+ prompt: prompt,
51
+ image: build_upload_parts(with, label: 'images'),
52
+ n: 1
53
+ )
54
+ payload[:mask] = build_upload_part(mask, label: 'mask') if mask
55
+ payload
56
+ end
57
+
58
+ def build_upload_parts(sources, label:)
59
+ Array(sources).filter_map do |source|
60
+ next if blank_attachment?(source)
61
+
62
+ build_upload_part(source, label:)
63
+ end
64
+ end
65
+
66
+ def build_upload_part(source, label:)
67
+ attachment = Attachment.new(source)
68
+ unless attachment.image?
69
+ raise UnsupportedAttachmentError,
70
+ "OpenAI image editing only supports image attachments for #{label}"
71
+ end
72
+
73
+ Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
74
+ end
75
+
76
+ def editing?(with, mask)
77
+ attachments?(with) || !mask.nil?
78
+ end
79
+
80
+ def attachments?(value)
81
+ Array(value).any? { |item| !blank_attachment?(item) }
82
+ end
83
+
84
+ def blank_attachment?(value)
85
+ value.nil? || (value.is_a?(String) && value.strip.empty?)
86
+ end
35
87
  end
36
88
  end
37
89
  end
@@ -17,14 +17,12 @@ module RubyLLM
17
17
 
18
18
  Model::Info.new(
19
19
  id: model_id,
20
- name: capabilities.format_display_name(model_id),
20
+ name: model_id,
21
21
  provider: slug,
22
- family: capabilities.model_family(model_id),
23
22
  created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
24
23
  context_window: capabilities.context_window_for(model_id),
25
24
  max_output_tokens: capabilities.max_tokens_for(model_id),
26
- modalities: capabilities.modalities_for(model_id),
27
- capabilities: capabilities.capabilities_for(model_id),
25
+ capabilities: capabilities.critical_capabilities_for(model_id),
28
26
  pricing: capabilities.pricing_for(model_id),
29
27
  metadata: {
30
28
  object: model_data['object'],
@@ -13,7 +13,6 @@ module RubyLLM
13
13
 
14
14
  def build_chunk(data)
15
15
  usage = data['usage'] || {}
16
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
16
  delta = data.dig('choices', 0, 'delta') || {}
18
17
  content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
19
18
  content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
@@ -27,11 +26,11 @@ module RubyLLM
27
26
  signature: delta['reasoning_signature']
28
27
  ),
29
28
  tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
30
- input_tokens: usage['prompt_tokens'],
31
- output_tokens: usage['completion_tokens'],
32
- cached_tokens: cached_tokens,
33
- cache_creation_tokens: 0,
34
- thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
29
+ input_tokens: OpenAI::Chat.input_tokens(usage),
30
+ output_tokens: OpenAI::Chat.output_tokens(usage),
31
+ cached_tokens: OpenAI::Chat.cache_read_tokens(usage),
32
+ cache_creation_tokens: OpenAI::Chat.cache_write_tokens(usage),
33
+ thinking_tokens: OpenAI::Chat.thinking_tokens(usage)
35
34
  )
36
35
  end
37
36