ruby_llm 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +6 -7
  3. data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
  4. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
  5. data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
  6. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +3 -3
  7. data/lib/ruby_llm/active_record/acts_as.rb +4 -26
  8. data/lib/ruby_llm/active_record/acts_as_legacy.rb +123 -29
  9. data/lib/ruby_llm/active_record/chat_methods.rb +41 -24
  10. data/lib/ruby_llm/active_record/message_methods.rb +87 -4
  11. data/lib/ruby_llm/active_record/model_methods.rb +7 -9
  12. data/lib/ruby_llm/active_record/payload_helpers.rb +3 -0
  13. data/lib/ruby_llm/active_record/tool_call_methods.rb +3 -0
  14. data/lib/ruby_llm/agent.rb +4 -2
  15. data/lib/ruby_llm/aliases.json +108 -75
  16. data/lib/ruby_llm/aliases.rb +3 -0
  17. data/lib/ruby_llm/attachment.rb +41 -40
  18. data/lib/ruby_llm/chat.rb +229 -59
  19. data/lib/ruby_llm/configuration.rb +14 -1
  20. data/lib/ruby_llm/connection.rb +36 -7
  21. data/lib/ruby_llm/content.rb +15 -1
  22. data/lib/ruby_llm/cost.rb +224 -0
  23. data/lib/ruby_llm/deprecator.rb +24 -0
  24. data/lib/ruby_llm/embedding.rb +31 -1
  25. data/lib/ruby_llm/error.rb +11 -75
  26. data/lib/ruby_llm/error_middleware.rb +81 -0
  27. data/lib/ruby_llm/image.rb +39 -4
  28. data/lib/ruby_llm/instrumentation.rb +36 -0
  29. data/lib/ruby_llm/message.rb +20 -0
  30. data/lib/ruby_llm/mime_type.rb +25 -0
  31. data/lib/ruby_llm/model/info.rb +53 -2
  32. data/lib/ruby_llm/model/pricing.rb +19 -9
  33. data/lib/ruby_llm/model/pricing_category.rb +13 -2
  34. data/lib/ruby_llm/model/pricing_tier.rb +20 -9
  35. data/lib/ruby_llm/model_registry.rb +39 -0
  36. data/lib/ruby_llm/models.json +17817 -13942
  37. data/lib/ruby_llm/models.rb +97 -31
  38. data/lib/ruby_llm/models_schema.json +3 -0
  39. data/lib/ruby_llm/provider.rb +20 -4
  40. data/lib/ruby_llm/providers/anthropic/chat.rb +49 -15
  41. data/lib/ruby_llm/providers/anthropic/models.rb +2 -0
  42. data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
  43. data/lib/ruby_llm/providers/anthropic/tools.rb +32 -3
  44. data/lib/ruby_llm/providers/azure/media.rb +1 -1
  45. data/lib/ruby_llm/providers/bedrock/auth.rb +1 -0
  46. data/lib/ruby_llm/providers/bedrock/chat.rb +26 -13
  47. data/lib/ruby_llm/providers/bedrock/media.rb +21 -3
  48. data/lib/ruby_llm/providers/bedrock/models.rb +1 -1
  49. data/lib/ruby_llm/providers/bedrock/streaming.rb +10 -1
  50. data/lib/ruby_llm/providers/bedrock.rb +2 -2
  51. data/lib/ruby_llm/providers/deepseek/capabilities.rb +43 -0
  52. data/lib/ruby_llm/providers/deepseek/chat.rb +9 -0
  53. data/lib/ruby_llm/providers/gemini/chat.rb +10 -4
  54. data/lib/ruby_llm/providers/gemini/images.rb +2 -2
  55. data/lib/ruby_llm/providers/gemini/media.rb +16 -9
  56. data/lib/ruby_llm/providers/gemini/streaming.rb +6 -1
  57. data/lib/ruby_llm/providers/gemini/tools.rb +5 -1
  58. data/lib/ruby_llm/providers/gpustack/chat.rb +8 -1
  59. data/lib/ruby_llm/providers/gpustack/models.rb +2 -0
  60. data/lib/ruby_llm/providers/mistral/capabilities.rb +7 -2
  61. data/lib/ruby_llm/providers/mistral/chat.rb +56 -5
  62. data/lib/ruby_llm/providers/mistral/media.rb +55 -0
  63. data/lib/ruby_llm/providers/mistral/models.rb +2 -0
  64. data/lib/ruby_llm/providers/mistral.rb +2 -2
  65. data/lib/ruby_llm/providers/ollama/chat.rb +8 -1
  66. data/lib/ruby_llm/providers/openai/capabilities.rb +82 -12
  67. data/lib/ruby_llm/providers/openai/chat.rb +61 -7
  68. data/lib/ruby_llm/providers/openai/images.rb +58 -6
  69. data/lib/ruby_llm/providers/openai/media.rb +40 -16
  70. data/lib/ruby_llm/providers/openai/streaming.rb +7 -6
  71. data/lib/ruby_llm/providers/openai/tools.rb +2 -0
  72. data/lib/ruby_llm/providers/openai/transcription.rb +1 -0
  73. data/lib/ruby_llm/providers/openrouter/chat.rb +36 -8
  74. data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
  75. data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
  76. data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
  77. data/lib/ruby_llm/providers/perplexity/chat.rb +11 -0
  78. data/lib/ruby_llm/providers/perplexity/media.rb +62 -0
  79. data/lib/ruby_llm/providers/perplexity.rb +2 -2
  80. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  81. data/lib/ruby_llm/providers/xai/chat.rb +9 -0
  82. data/lib/ruby_llm/providers/xai/models.rb +15 -27
  83. data/lib/ruby_llm/providers/xai.rb +2 -2
  84. data/lib/ruby_llm/railtie.rb +11 -1
  85. data/lib/ruby_llm/stream_accumulator.rb +45 -30
  86. data/lib/ruby_llm/streaming.rb +4 -0
  87. data/lib/ruby_llm/tokens.rb +8 -0
  88. data/lib/ruby_llm/tool.rb +24 -7
  89. data/lib/ruby_llm/tool_concurrency.rb +105 -0
  90. data/lib/ruby_llm/transcription.rb +2 -1
  91. data/lib/ruby_llm/utils.rb +39 -0
  92. data/lib/ruby_llm/version.rb +1 -1
  93. data/lib/ruby_llm.rb +11 -6
  94. data/lib/tasks/models.rake +45 -16
  95. data/lib/tasks/release.rake +50 -23
  96. metadata +35 -13
@@ -8,6 +8,9 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  MODEL_PATTERNS = {
11
+ gpt_image15: /^gpt-image-1\.5/,
12
+ gpt_image_mini: /^gpt-image-1-mini/,
13
+ gpt_image: /^gpt-image-1(?:$|-)/,
11
14
  gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
12
15
  gpt41_mini: /^gpt-4\.1-mini/,
13
16
  gpt41_nano: /^gpt-4\.1-nano/,
@@ -43,6 +46,18 @@ module RubyLLM
43
46
  }.freeze
44
47
 
45
48
  PRICES = {
49
+ gpt_image: {
50
+ text: { input: 5.0, cached_input: 1.25 },
51
+ images: { input: 10.0, cached_input: 2.5, output: 40.0 }
52
+ },
53
+ gpt_image_mini: {
54
+ text: { input: 2.0, cached_input: 0.2 },
55
+ images: { input: 2.5, cached_input: 0.25, output: 8.0 }
56
+ },
57
+ gpt_image15: {
58
+ text: { input: 5.0, cached_input: 1.25, output: 10.0 },
59
+ images: { input: 8.0, cached_input: 2.0, output: 32.0 }
60
+ },
46
61
  gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
47
62
  gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
48
63
  gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
@@ -77,6 +92,20 @@ module RubyLLM
77
92
  moderation: { price: 0.0 }
78
93
  }.freeze
79
94
 
95
+ NIL_LIMIT_FAMILIES = %w[
96
+ gpt_image
97
+ gpt_image_mini
98
+ gpt_image15
99
+ gpt4o_mini_tts
100
+ tts1
101
+ tts1_hd
102
+ whisper
103
+ moderation
104
+ embedding3_large
105
+ embedding3_small
106
+ embedding_ada
107
+ ].freeze
108
+
80
109
  def supports_tool_choice?(_model_id)
81
110
  true
82
111
  end
@@ -86,7 +115,10 @@ module RubyLLM
86
115
  end
87
116
 
88
117
  def context_window_for(model_id)
89
- case model_family(model_id)
118
+ family = model_family(model_id)
119
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
120
+
121
+ case family
90
122
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
91
123
  when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
92
124
  'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
@@ -95,14 +127,15 @@ module RubyLLM
95
127
  when 'gpt4o_mini_transcribe' then 16_000
96
128
  when 'o1', 'o1_pro', 'o3_mini' then 200_000
97
129
  when 'gpt35_turbo' then 16_385
98
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
99
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
100
130
  else 4_096
101
131
  end
102
132
  end
103
133
 
104
134
  def max_tokens_for(model_id)
105
- case model_family(model_id)
135
+ family = model_family(model_id)
136
+ return nil if NIL_LIMIT_FAMILIES.include?(family)
137
+
138
+ case family
106
139
  when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
107
140
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
108
141
  when 'gpt4' then 8_192
@@ -110,8 +143,6 @@ module RubyLLM
110
143
  when 'gpt4o_mini_transcribe' then 2_000
111
144
  when 'o1', 'o1_pro', 'o3_mini' then 100_000
112
145
  when 'o1_mini' then 65_536
113
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
114
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
115
146
  else 16_384
116
147
  end
117
148
  end
@@ -126,13 +157,15 @@ module RubyLLM
126
157
  end
127
158
 
128
159
  def pricing_for(model_id)
160
+ return image_pricing_for(model_id) if image_model?(model_id)
161
+
129
162
  standard_pricing = {
130
163
  input_per_million: input_price_for(model_id),
131
164
  output_per_million: output_price_for(model_id)
132
165
  }
133
166
 
134
167
  cached_price = cached_input_price_for(model_id)
135
- standard_pricing[:cached_input_per_million] = cached_price if cached_price
168
+ standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
136
169
 
137
170
  { text_tokens: { standard: standard_pricing } }
138
171
  end
@@ -147,8 +180,9 @@ module RubyLLM
147
180
 
148
181
  def supports_vision?(model_id)
149
182
  case model_family(model_id)
150
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
151
- 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
183
+ when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
184
+ 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
185
+ 'gpt4o_search'
152
186
  true
153
187
  else
154
188
  false
@@ -176,27 +210,63 @@ module RubyLLM
176
210
  end
177
211
 
178
212
  def input_price_for(model_id)
213
+ return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
214
+
179
215
  price_for(model_id, :input, 0.50)
180
216
  end
181
217
 
182
218
  def output_price_for(model_id)
219
+ return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
220
+
183
221
  price_for(model_id, :output, 1.50)
184
222
  end
185
223
 
186
224
  def cached_input_price_for(model_id)
225
+ return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
226
+
187
227
  family = model_family(model_id).to_sym
188
228
  PRICES.fetch(family, {})[:cached_input]
189
229
  end
190
230
 
231
+ def image_model?(model_id)
232
+ %w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
233
+ end
234
+
235
+ def image_pricing_for(model_id)
236
+ text_pricing = {
237
+ input_per_million: input_price_for(model_id)
238
+ }
239
+ cached_text_price = cached_input_price_for(model_id)
240
+ text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
241
+
242
+ image_pricing = {
243
+ input_per_million: family_prices(model_id).dig(:images, :input),
244
+ output_per_million: family_prices(model_id).dig(:images, :output)
245
+ }
246
+ cached_image_price = family_prices(model_id).dig(:images, :cached_input)
247
+ image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
248
+
249
+ {
250
+ text_tokens: { standard: text_pricing },
251
+ images: { standard: image_pricing }
252
+ }
253
+ end
254
+
191
255
  def price_for(model_id, key, fallback)
192
- family = model_family(model_id).to_sym
193
- prices = PRICES.fetch(family, { key => fallback })
256
+ prices = family_prices(model_id)
257
+ prices = { key => fallback } if prices.empty?
194
258
  prices[key] || prices[:price] || fallback
195
259
  end
196
260
 
261
+ def family_prices(model_id)
262
+ family = model_family(model_id).to_sym
263
+ PRICES.fetch(family, {})
264
+ end
265
+
197
266
  module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
198
267
  :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
199
- :input_price_for, :output_price_for, :cached_input_price_for, :price_for
268
+ :input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
269
+ :image_pricing_for, :price_for, :family_prices
200
270
  end
201
271
  end
202
272
  end
@@ -61,8 +61,7 @@ module RubyLLM
61
61
  return unless message_data
62
62
 
63
63
  usage = data['usage'] || {}
64
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
65
- thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
64
+ thinking_tokens = thinking_tokens(usage)
66
65
  content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
67
66
  thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
68
67
  thinking_signature = extract_thinking_signature(message_data)
@@ -72,27 +71,82 @@ module RubyLLM
72
71
  content: content,
73
72
  thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
74
73
  tool_calls: parse_tool_calls(message_data['tool_calls']),
75
- input_tokens: usage['prompt_tokens'],
76
- output_tokens: usage['completion_tokens'],
77
- cached_tokens: cached_tokens,
78
- cache_creation_tokens: 0,
74
+ input_tokens: input_tokens(usage),
75
+ output_tokens: output_tokens(usage),
76
+ cached_tokens: cache_read_tokens(usage),
77
+ cache_creation_tokens: cache_write_tokens(usage),
79
78
  thinking_tokens: thinking_tokens,
80
79
  model_id: data['model'],
81
80
  raw: response
82
81
  )
83
82
  end
84
83
 
84
+ def input_tokens(usage)
85
+ return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
86
+
87
+ prompt_tokens = usage['prompt_tokens']
88
+ return unless prompt_tokens
89
+
90
+ [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
91
+ end
92
+
93
+ def output_tokens(usage)
94
+ completion_tokens = usage['completion_tokens']
95
+ return unless completion_tokens
96
+
97
+ completion_tokens = completion_tokens.to_i
98
+ generated_tokens = generated_tokens_from_total(usage)
99
+ return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
100
+
101
+ generated_tokens
102
+ end
103
+
104
+ def generated_tokens_from_total(usage)
105
+ prompt_tokens = usage['prompt_tokens']
106
+ total_tokens = usage['total_tokens']
107
+ return unless prompt_tokens && total_tokens
108
+
109
+ [total_tokens.to_i - prompt_tokens.to_i, 0].max
110
+ end
111
+
112
+ def cache_read_tokens(usage)
113
+ usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
114
+ end
115
+
116
+ def cache_write_tokens(usage)
117
+ usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
118
+ end
119
+
120
+ def thinking_tokens(usage)
121
+ usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
122
+ end
123
+
85
124
  def format_messages(messages)
86
125
  messages.map do |msg|
87
126
  {
88
127
  role: format_role(msg.role),
89
- content: Media.format_content(msg.content),
128
+ content: format_message_content(msg),
90
129
  tool_calls: format_tool_calls(msg.tool_calls),
91
130
  tool_call_id: msg.tool_call_id
92
131
  }.compact.merge(format_thinking(msg))
93
132
  end
94
133
  end
95
134
 
135
+ def format_message_content(msg)
136
+ content = format_content(msg.content)
137
+ return '' if content.nil? && thinking_only_assistant_message?(msg)
138
+
139
+ content
140
+ end
141
+
142
+ def thinking_only_assistant_message?(msg)
143
+ msg.role == :assistant && msg.thinking && !msg.tool_call?
144
+ end
145
+
146
+ def format_content(content)
147
+ Media.format_content(content)
148
+ end
149
+
96
150
  def format_role(role)
97
151
  case role
98
152
  when :system
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'faraday'
4
+ require 'stringio'
5
+
3
6
  module RubyLLM
4
7
  module Providers
5
8
  class OpenAI
@@ -7,30 +10,79 @@ module RubyLLM
7
10
  module Images
8
11
  module_function
9
12
 
10
- def images_url
11
- 'images/generations'
13
+ def images_url(with: nil, mask: nil)
14
+ editing?(with, mask) ? 'images/edits' : 'images/generations'
12
15
  end
13
16
 
14
- def render_image_payload(prompt, model:, size:)
17
+ def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
18
+ return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
19
+
15
20
  {
16
21
  model: model,
17
22
  prompt: prompt,
18
23
  n: 1,
19
24
  size: size
20
- }
25
+ }.merge(params)
21
26
  end
22
27
 
23
28
  def parse_image_response(response, model:)
24
29
  data = response.body
25
- image_data = data['data'].first
30
+ image_data = Array(data['data']).first
31
+
32
+ raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
26
33
 
27
34
  Image.new(
28
35
  url: image_data['url'],
29
36
  mime_type: 'image/png', # DALL-E typically returns PNGs
30
37
  revised_prompt: image_data['revised_prompt'],
31
38
  model_id: model,
32
- data: image_data['b64_json']
39
+ data: image_data['b64_json'],
40
+ usage: data['usage'] || {}
41
+ )
42
+ end
43
+
44
+ def validate_paint_inputs!(with:, mask:)
45
+ return unless editing?(with, mask)
46
+
47
+ raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
48
+ end
49
+
50
+ def render_edit_payload(prompt, model:, with:, mask:, params:)
51
+ payload = params.merge(
52
+ model: model,
53
+ prompt: prompt,
54
+ image: build_upload_parts(with),
55
+ n: 1
33
56
  )
57
+ payload[:mask] = build_upload_part(mask) if mask
58
+ payload
59
+ end
60
+
61
+ def build_upload_parts(sources)
62
+ Array(sources).filter_map do |source|
63
+ next if blank_attachment?(source)
64
+
65
+ build_upload_part(source)
66
+ end
67
+ end
68
+
69
+ def build_upload_part(source)
70
+ attachment = Attachment.new(source)
71
+ raise UnsupportedAttachmentError, attachment.mime_type unless attachment.image?
72
+
73
+ Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
74
+ end
75
+
76
+ def editing?(with, mask)
77
+ attachments?(with) || !mask.nil?
78
+ end
79
+
80
+ def attachments?(value)
81
+ Array(value).any? { |item| !blank_attachment?(item) }
82
+ end
83
+
84
+ def blank_attachment?(value)
85
+ value.nil? || (value.is_a?(String) && value.strip.empty?)
34
86
  end
35
87
  end
36
88
  end
@@ -7,7 +7,7 @@ module RubyLLM
7
7
  module Media
8
8
  module_function
9
9
 
10
- def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
10
+ def format_content(content, document_attachments: :pdf, image_attachments: true, audio_attachments: true)
11
11
  if content.is_a?(RubyLLM::Content::Raw)
12
12
  value = content.value
13
13
  return value.is_a?(Hash) ? value.to_json : value
@@ -19,23 +19,36 @@ module RubyLLM
19
19
  parts << format_text(content.text) if content.text
20
20
 
21
21
  content.attachments.each do |attachment|
22
- case attachment.type
23
- when :image
24
- parts << format_image(attachment)
25
- when :pdf
26
- parts << format_pdf(attachment)
27
- when :audio
28
- parts << format_audio(attachment)
29
- when :text
30
- parts << format_text_file(attachment)
31
- else
32
- raise UnsupportedAttachmentError, attachment.type
33
- end
22
+ parts << format_attachment(
23
+ attachment,
24
+ document_attachments:,
25
+ image_attachments:,
26
+ audio_attachments:
27
+ )
34
28
  end
35
29
 
36
30
  parts
37
31
  end
38
32
 
33
+ def format_attachment(attachment, document_attachments:, image_attachments:, audio_attachments:)
34
+ case attachment.type
35
+ when :image
36
+ raise UnsupportedAttachmentError, attachment.mime_type unless image_attachments
37
+
38
+ format_image(attachment)
39
+ when :audio
40
+ raise UnsupportedAttachmentError, attachment.mime_type unless audio_attachments
41
+
42
+ format_audio(attachment)
43
+ when :pdf, :document
44
+ format_document_attachment(attachment, document_attachments)
45
+ when :text
46
+ format_text_file(attachment)
47
+ else
48
+ raise UnsupportedAttachmentError, attachment.mime_type
49
+ end
50
+ end
51
+
39
52
  def format_image(image)
40
53
  {
41
54
  type: 'image_url',
@@ -45,16 +58,20 @@ module RubyLLM
45
58
  }
46
59
  end
47
60
 
48
- def format_pdf(pdf)
61
+ def format_document(document)
49
62
  {
50
63
  type: 'file',
51
64
  file: {
52
- filename: pdf.filename,
53
- file_data: pdf.for_llm
65
+ filename: document.filename,
66
+ file_data: document.for_llm
54
67
  }
55
68
  }
56
69
  end
57
70
 
71
+ def format_pdf(pdf)
72
+ format_document(pdf)
73
+ end
74
+
58
75
  def format_text_file(text_file)
59
76
  {
60
77
  type: 'text',
@@ -78,6 +95,13 @@ module RubyLLM
78
95
  text: text
79
96
  }
80
97
  end
98
+
99
+ def format_document_attachment(attachment, strategy)
100
+ return format_document(attachment) if strategy == :all
101
+ return format_document(attachment) if strategy == :pdf && attachment.pdf?
102
+
103
+ raise UnsupportedAttachmentError, attachment.mime_type
104
+ end
81
105
  end
82
106
  end
83
107
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
4
+
3
5
  module RubyLLM
4
6
  module Providers
5
7
  class OpenAI
@@ -13,7 +15,6 @@ module RubyLLM
13
15
 
14
16
  def build_chunk(data)
15
17
  usage = data['usage'] || {}
16
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
18
  delta = data.dig('choices', 0, 'delta') || {}
18
19
  content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
19
20
  content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
@@ -27,11 +28,11 @@ module RubyLLM
27
28
  signature: delta['reasoning_signature']
28
29
  ),
29
30
  tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
30
- input_tokens: usage['prompt_tokens'],
31
- output_tokens: usage['completion_tokens'],
32
- cached_tokens: cached_tokens,
33
- cache_creation_tokens: 0,
34
- thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
31
+ input_tokens: OpenAI::Chat.input_tokens(usage),
32
+ output_tokens: OpenAI::Chat.output_tokens(usage),
33
+ cached_tokens: OpenAI::Chat.cache_read_tokens(usage),
34
+ cache_creation_tokens: OpenAI::Chat.cache_write_tokens(usage),
35
+ thinking_tokens: OpenAI::Chat.thinking_tokens(usage)
35
36
  )
36
37
  end
37
38
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
4
+
3
5
  module RubyLLM
4
6
  module Providers
5
7
  class OpenAI
@@ -60,6 +60,7 @@ module RubyLLM
60
60
  language: data['language'],
61
61
  duration: data['duration'],
62
62
  segments: data['segments'],
63
+ words: data['words'],
63
64
  input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
64
65
  output_tokens: usage['output_tokens'] || usage['completion_tokens']
65
66
  )
@@ -52,7 +52,7 @@ module RubyLLM
52
52
 
53
53
  def parse_completion_response(response)
54
54
  data = response.body
55
- return if data.empty?
55
+ return if data.nil? || data.empty?
56
56
 
57
57
  raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
58
58
 
@@ -60,8 +60,7 @@ module RubyLLM
60
60
  return unless message_data
61
61
 
62
62
  usage = data['usage'] || {}
63
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
64
- thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
63
+ thinking_tokens = thinking_tokens(usage)
65
64
  thinking_text = extract_thinking_text(message_data)
66
65
  thinking_signature = extract_thinking_signature(message_data)
67
66
 
@@ -70,27 +69,56 @@ module RubyLLM
70
69
  content: message_data['content'],
71
70
  thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
72
71
  tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
73
- input_tokens: usage['prompt_tokens'],
74
- output_tokens: usage['completion_tokens'],
75
- cached_tokens: cached_tokens,
76
- cache_creation_tokens: 0,
72
+ input_tokens: input_tokens(usage),
73
+ output_tokens: output_tokens(usage),
74
+ cached_tokens: cache_read_tokens(usage),
75
+ cache_creation_tokens: cache_write_tokens(usage),
77
76
  thinking_tokens: thinking_tokens,
78
77
  model_id: data['model'],
79
78
  raw: response
80
79
  )
81
80
  end
82
81
 
82
+ def input_tokens(usage)
83
+ return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
84
+
85
+ prompt_tokens = usage['prompt_tokens']
86
+ return unless prompt_tokens
87
+
88
+ [prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
89
+ end
90
+
91
+ def output_tokens(usage)
92
+ OpenAI::Chat.output_tokens(usage)
93
+ end
94
+
95
+ def cache_read_tokens(usage)
96
+ usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
97
+ end
98
+
99
+ def cache_write_tokens(usage)
100
+ usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
101
+ end
102
+
103
+ def thinking_tokens(usage)
104
+ OpenAI::Chat.thinking_tokens(usage)
105
+ end
106
+
83
107
  def format_messages(messages)
84
108
  messages.map do |msg|
85
109
  {
86
110
  role: format_role(msg.role),
87
- content: OpenAI::Media.format_content(msg.content),
111
+ content: format_content(msg.content),
88
112
  tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
89
113
  tool_call_id: msg.tool_call_id
90
114
  }.compact.merge(format_thinking(msg))
91
115
  end
92
116
  end
93
117
 
118
+ def format_content(content)
119
+ OpenAI::Media.format_content(content)
120
+ end
121
+
94
122
  def format_role(role)
95
123
  case role
96
124
  when :system
@@ -9,11 +9,11 @@ module RubyLLM
9
9
  module Images
10
10
  module_function
11
11
 
12
- def images_url
12
+ def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
13
13
  'chat/completions'
14
14
  end
15
15
 
16
- def render_image_payload(prompt, model:, size:)
16
+ def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
17
17
  RubyLLM.logger.debug { "Ignoring size #{size}. OpenRouter image generation does not support size parameter." }
18
18
  {
19
19
  model: model,
@@ -23,7 +23,7 @@ module RubyLLM
23
23
  pricing_types = {
24
24
  prompt: :input_per_million,
25
25
  completion: :output_per_million,
26
- input_cache_read: :cached_input_per_million,
26
+ input_cache_read: :cache_read_input_per_million,
27
27
  internal_reasoning: :reasoning_output_per_million
28
28
  }
29
29
 
@@ -13,7 +13,6 @@ module RubyLLM
13
13
 
14
14
  def build_chunk(data)
15
15
  usage = data['usage'] || {}
16
- cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
16
  delta = data.dig('choices', 0, 'delta') || {}
18
17
 
19
18
  Chunk.new(
@@ -25,11 +24,11 @@ module RubyLLM
25
24
  signature: extract_thinking_signature(delta)
26
25
  ),
27
26
  tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
28
- input_tokens: usage['prompt_tokens'],
29
- output_tokens: usage['completion_tokens'],
30
- cached_tokens: cached_tokens,
31
- cache_creation_tokens: 0,
32
- thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
27
+ input_tokens: OpenRouter::Chat.input_tokens(usage),
28
+ output_tokens: OpenRouter::Chat.output_tokens(usage),
29
+ cached_tokens: OpenRouter::Chat.cache_read_tokens(usage),
30
+ cache_creation_tokens: OpenRouter::Chat.cache_write_tokens(usage),
31
+ thinking_tokens: OpenRouter::Chat.thinking_tokens(usage)
33
32
  )
34
33
  end
35
34
 
@@ -10,6 +10,17 @@ module RubyLLM
10
10
  def format_role(role)
11
11
  role.to_s
12
12
  end
13
+
14
+ def format_messages(messages)
15
+ messages.map do |msg|
16
+ {
17
+ role: format_role(msg.role),
18
+ content: Perplexity::Media.format_content(msg.content),
19
+ tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
20
+ tool_call_id: msg.tool_call_id
21
+ }.compact.merge(OpenAI::Chat.format_thinking(msg))
22
+ end
23
+ end
13
24
  end
14
25
  end
15
26
  end