ruby_llm 1.14.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -5
- data/lib/generators/ruby_llm/generator_helpers.rb +8 -0
- data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +1 -1
- data/lib/ruby_llm/active_record/acts_as.rb +3 -0
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +52 -25
- data/lib/ruby_llm/active_record/chat_methods.rb +47 -23
- data/lib/ruby_llm/active_record/message_methods.rb +19 -14
- data/lib/ruby_llm/active_record/model_methods.rb +7 -9
- data/lib/ruby_llm/active_record/payload_helpers.rb +29 -0
- data/lib/ruby_llm/active_record/tool_call_methods.rb +5 -15
- data/lib/ruby_llm/agent.rb +3 -2
- data/lib/ruby_llm/aliases.json +53 -14
- data/lib/ruby_llm/attachment.rb +11 -27
- data/lib/ruby_llm/chat.rb +62 -21
- data/lib/ruby_llm/cost.rb +224 -0
- data/lib/ruby_llm/image.rb +37 -4
- data/lib/ruby_llm/message.rb +20 -0
- data/lib/ruby_llm/model/info.rb +17 -0
- data/lib/ruby_llm/model/pricing_category.rb +13 -2
- data/lib/ruby_llm/models.json +26511 -24930
- data/lib/ruby_llm/models.rb +2 -1
- data/lib/ruby_llm/models_schema.json +3 -0
- data/lib/ruby_llm/provider.rb +10 -3
- data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -133
- data/lib/ruby_llm/providers/anthropic/models.rb +2 -8
- data/lib/ruby_llm/providers/anthropic/tools.rb +4 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +24 -13
- data/lib/ruby_llm/providers/bedrock/streaming.rb +4 -1
- data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -119
- data/lib/ruby_llm/providers/gemini/capabilities.rb +45 -215
- data/lib/ruby_llm/providers/gemini/chat.rb +8 -1
- data/lib/ruby_llm/providers/gemini/images.rb +2 -2
- data/lib/ruby_llm/providers/gemini/models.rb +2 -4
- data/lib/ruby_llm/providers/gemini/streaming.rb +4 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +3 -1
- data/lib/ruby_llm/providers/mistral/capabilities.rb +6 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +55 -4
- data/lib/ruby_llm/providers/openai/capabilities.rb +157 -195
- data/lib/ruby_llm/providers/openai/chat.rb +45 -6
- data/lib/ruby_llm/providers/openai/images.rb +58 -6
- data/lib/ruby_llm/providers/openai/models.rb +2 -4
- data/lib/ruby_llm/providers/openai/streaming.rb +5 -6
- data/lib/ruby_llm/providers/openrouter/chat.rb +30 -6
- data/lib/ruby_llm/providers/openrouter/images.rb +2 -2
- data/lib/ruby_llm/providers/openrouter/models.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/streaming.rb +5 -6
- data/lib/ruby_llm/providers/perplexity/capabilities.rb +34 -99
- data/lib/ruby_llm/providers/perplexity/models.rb +12 -14
- data/lib/ruby_llm/railtie.rb +6 -0
- data/lib/ruby_llm/tokens.rb +8 -0
- data/lib/ruby_llm/tool.rb +24 -7
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm.rb +2 -4
- data/lib/tasks/models.rake +13 -12
- metadata +21 -5
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Providers
|
|
5
5
|
class OpenAI
|
|
6
|
-
#
|
|
6
|
+
# Provider-level capability checks and narrow registry fallbacks.
|
|
7
7
|
module Capabilities
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
10
|
MODEL_PATTERNS = {
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
gpt_image15: /^gpt-image-1\.5/,
|
|
12
|
+
gpt_image_mini: /^gpt-image-1-mini/,
|
|
13
|
+
gpt_image: /^gpt-image-1(?:$|-)/,
|
|
13
14
|
gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
|
|
14
15
|
gpt41_mini: /^gpt-4\.1-mini/,
|
|
15
16
|
gpt41_nano: /^gpt-4\.1-nano/,
|
|
@@ -26,9 +27,9 @@ module RubyLLM
|
|
|
26
27
|
gpt4o_realtime: /^gpt-4o-realtime/,
|
|
27
28
|
gpt4o_search: /^gpt-4o-search/,
|
|
28
29
|
gpt4o_transcribe: /^gpt-4o-transcribe/,
|
|
29
|
-
gpt5: /^gpt-5/,
|
|
30
|
-
gpt5_mini: /^gpt-5
|
|
31
|
-
gpt5_nano: /^gpt-5
|
|
30
|
+
gpt5: /^gpt-5(?!.*(?:mini|nano))/,
|
|
31
|
+
gpt5_mini: /^gpt-5.*mini/,
|
|
32
|
+
gpt5_nano: /^gpt-5.*nano/,
|
|
32
33
|
o1: /^o1(?!-(?:mini|pro))/,
|
|
33
34
|
o1_mini: /^o1-mini/,
|
|
34
35
|
o1_pro: /^o1-pro/,
|
|
@@ -44,101 +45,38 @@ module RubyLLM
|
|
|
44
45
|
moderation: /^(?:omni|text)-moderation/
|
|
45
46
|
}.freeze
|
|
46
47
|
|
|
47
|
-
def context_window_for(model_id)
|
|
48
|
-
case model_family(model_id)
|
|
49
|
-
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
|
|
50
|
-
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
|
|
51
|
-
'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
|
|
52
|
-
'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
|
|
53
|
-
when 'gpt4' then 8_192
|
|
54
|
-
when 'gpt4o_mini_transcribe' then 16_000
|
|
55
|
-
when 'o1', 'o1_pro', 'o3_mini' then 200_000
|
|
56
|
-
when 'gpt35_turbo' then 16_385
|
|
57
|
-
when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
|
|
58
|
-
'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
|
|
59
|
-
else 4_096
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def max_tokens_for(model_id)
|
|
64
|
-
case model_family(model_id)
|
|
65
|
-
when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
|
|
66
|
-
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
|
|
67
|
-
when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
|
|
68
|
-
when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
|
|
69
|
-
when 'gpt4' then 8_192
|
|
70
|
-
when 'gpt35_turbo' then 4_096
|
|
71
|
-
when 'gpt4_turbo', 'gpt4o_realtime', 'gpt4o_mini_realtime' then 4_096 # rubocop:disable Lint/DuplicateBranch
|
|
72
|
-
when 'gpt4o_mini_transcribe' then 2_000
|
|
73
|
-
when 'o1', 'o1_pro', 'o3_mini' then 100_000
|
|
74
|
-
when 'o1_mini' then 65_536
|
|
75
|
-
when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
|
|
76
|
-
'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
|
|
77
|
-
else 16_384 # rubocop:disable Lint/DuplicateBranch
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def supports_vision?(model_id)
|
|
82
|
-
case model_family(model_id)
|
|
83
|
-
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
|
|
84
|
-
'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
|
|
85
|
-
'gpt4o_mini_search' then true
|
|
86
|
-
else false
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def supports_functions?(model_id)
|
|
91
|
-
case model_family(model_id)
|
|
92
|
-
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
|
|
93
|
-
'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
|
|
94
|
-
when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
|
|
95
|
-
'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
|
|
96
|
-
else false # rubocop:disable Lint/DuplicateBranch
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
def supports_tool_choice?(_model_id)
|
|
101
|
-
true
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def supports_tool_parallel_control?(_model_id)
|
|
105
|
-
true
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
def supports_structured_output?(model_id)
|
|
109
|
-
case model_family(model_id)
|
|
110
|
-
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
|
|
111
|
-
'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
|
|
112
|
-
else false
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
def supports_json_mode?(model_id)
|
|
117
|
-
supports_structured_output?(model_id)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
48
|
PRICES = {
|
|
49
|
+
gpt_image: {
|
|
50
|
+
text: { input: 5.0, cached_input: 1.25 },
|
|
51
|
+
images: { input: 10.0, cached_input: 2.5, output: 40.0 }
|
|
52
|
+
},
|
|
53
|
+
gpt_image_mini: {
|
|
54
|
+
text: { input: 2.0, cached_input: 0.2 },
|
|
55
|
+
images: { input: 2.5, cached_input: 0.25, output: 8.0 }
|
|
56
|
+
},
|
|
57
|
+
gpt_image15: {
|
|
58
|
+
text: { input: 5.0, cached_input: 1.25, output: 10.0 },
|
|
59
|
+
images: { input: 8.0, cached_input: 2.0, output: 32.0 }
|
|
60
|
+
},
|
|
121
61
|
gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
|
|
122
62
|
gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
|
|
123
63
|
gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
|
|
124
64
|
gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
|
|
125
65
|
gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
|
|
126
66
|
gpt41_nano: { input: 0.1, output: 0.4 },
|
|
127
|
-
chatgpt4o: { input: 5.0, output: 15.0 },
|
|
128
67
|
gpt4: { input: 10.0, output: 30.0 },
|
|
129
68
|
gpt4_turbo: { input: 10.0, output: 30.0 },
|
|
130
|
-
gpt45: { input: 75.0, output: 150.0 },
|
|
131
69
|
gpt35_turbo: { input: 0.5, output: 1.5 },
|
|
132
70
|
gpt4o: { input: 2.5, output: 10.0 },
|
|
133
|
-
gpt4o_audio: { input: 2.5, output: 10.0
|
|
71
|
+
gpt4o_audio: { input: 2.5, output: 10.0 },
|
|
134
72
|
gpt4o_mini: { input: 0.15, output: 0.6 },
|
|
135
|
-
gpt4o_mini_audio: { input: 0.15, output: 0.6
|
|
73
|
+
gpt4o_mini_audio: { input: 0.15, output: 0.6 },
|
|
136
74
|
gpt4o_mini_realtime: { input: 0.6, output: 2.4 },
|
|
137
|
-
gpt4o_mini_transcribe: { input: 1.25, output: 5.0
|
|
75
|
+
gpt4o_mini_transcribe: { input: 1.25, output: 5.0 },
|
|
138
76
|
gpt4o_mini_tts: { input: 0.6, output: 12.0 },
|
|
139
77
|
gpt4o_realtime: { input: 5.0, output: 20.0 },
|
|
140
78
|
gpt4o_search: { input: 2.5, output: 10.0 },
|
|
141
|
-
gpt4o_transcribe: { input: 2.5, output: 10.0
|
|
79
|
+
gpt4o_transcribe: { input: 2.5, output: 10.0 },
|
|
142
80
|
o1: { input: 15.0, output: 60.0 },
|
|
143
81
|
o1_mini: { input: 1.1, output: 4.4 },
|
|
144
82
|
o1_pro: { input: 150.0, output: 600.0 },
|
|
@@ -154,157 +92,181 @@ module RubyLLM
|
|
|
154
92
|
moderation: { price: 0.0 }
|
|
155
93
|
}.freeze
|
|
156
94
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
95
|
+
NIL_LIMIT_FAMILIES = %w[
|
|
96
|
+
gpt_image
|
|
97
|
+
gpt_image_mini
|
|
98
|
+
gpt_image15
|
|
99
|
+
gpt4o_mini_tts
|
|
100
|
+
tts1
|
|
101
|
+
tts1_hd
|
|
102
|
+
whisper
|
|
103
|
+
moderation
|
|
104
|
+
embedding3_large
|
|
105
|
+
embedding3_small
|
|
106
|
+
embedding_ada
|
|
107
|
+
].freeze
|
|
163
108
|
|
|
164
|
-
def
|
|
165
|
-
|
|
166
|
-
prices = PRICES.fetch(family, { input: default_input_price })
|
|
167
|
-
prices[:input] || prices[:price] || default_input_price
|
|
109
|
+
def supports_tool_choice?(_model_id)
|
|
110
|
+
true
|
|
168
111
|
end
|
|
169
112
|
|
|
170
|
-
def
|
|
171
|
-
|
|
172
|
-
prices = PRICES.fetch(family, {})
|
|
173
|
-
prices[:cached_input]
|
|
113
|
+
def supports_tool_parallel_control?(_model_id)
|
|
114
|
+
true
|
|
174
115
|
end
|
|
175
116
|
|
|
176
|
-
def
|
|
177
|
-
family = model_family(model_id)
|
|
178
|
-
|
|
179
|
-
prices[:output] || prices[:price] || default_output_price
|
|
180
|
-
end
|
|
117
|
+
def context_window_for(model_id)
|
|
118
|
+
family = model_family(model_id)
|
|
119
|
+
return nil if NIL_LIMIT_FAMILIES.include?(family)
|
|
181
120
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
when
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
when
|
|
188
|
-
|
|
121
|
+
case family
|
|
122
|
+
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
|
|
123
|
+
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
|
|
124
|
+
'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
|
|
125
|
+
'gpt4o_transcribe', 'o1_mini' then 128_000
|
|
126
|
+
when 'gpt4' then 8_192
|
|
127
|
+
when 'gpt4o_mini_transcribe' then 16_000
|
|
128
|
+
when 'o1', 'o1_pro', 'o3_mini' then 200_000
|
|
129
|
+
when 'gpt35_turbo' then 16_385
|
|
130
|
+
else 4_096
|
|
189
131
|
end
|
|
190
132
|
end
|
|
191
133
|
|
|
192
|
-
def
|
|
193
|
-
|
|
194
|
-
|
|
134
|
+
def max_tokens_for(model_id)
|
|
135
|
+
family = model_family(model_id)
|
|
136
|
+
return nil if NIL_LIMIT_FAMILIES.include?(family)
|
|
195
137
|
|
|
196
|
-
|
|
197
|
-
|
|
138
|
+
case family
|
|
139
|
+
when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
|
|
140
|
+
when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
|
|
141
|
+
when 'gpt4' then 8_192
|
|
142
|
+
when 'gpt35_turbo' then 4_096
|
|
143
|
+
when 'gpt4o_mini_transcribe' then 2_000
|
|
144
|
+
when 'o1', 'o1_pro', 'o3_mini' then 100_000
|
|
145
|
+
when 'o1_mini' then 65_536
|
|
146
|
+
else 16_384
|
|
147
|
+
end
|
|
198
148
|
end
|
|
199
149
|
|
|
200
|
-
def
|
|
201
|
-
|
|
202
|
-
|
|
150
|
+
def critical_capabilities_for(model_id)
|
|
151
|
+
capabilities = []
|
|
152
|
+
capabilities << 'function_calling' if supports_functions?(model_id)
|
|
153
|
+
capabilities << 'structured_output' if supports_structured_output?(model_id)
|
|
154
|
+
capabilities << 'vision' if supports_vision?(model_id)
|
|
155
|
+
capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
|
|
156
|
+
capabilities
|
|
203
157
|
end
|
|
204
158
|
|
|
205
|
-
def
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
159
|
+
def pricing_for(model_id)
|
|
160
|
+
return image_pricing_for(model_id) if image_model?(model_id)
|
|
161
|
+
|
|
162
|
+
standard_pricing = {
|
|
163
|
+
input_per_million: input_price_for(model_id),
|
|
164
|
+
output_per_million: output_price_for(model_id)
|
|
165
|
+
}
|
|
211
166
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
.gsub(/^O([13]) /, 'O\1-')
|
|
217
|
-
.gsub(/^O[13] Mini/, '\0'.tr(' ', '-'))
|
|
218
|
-
.gsub(/\d\.\d /, '\0'.sub(' ', '-'))
|
|
219
|
-
.gsub(/4o (?=Mini|Preview|Turbo|Audio|Realtime|Transcribe|Tts)/, '4o-')
|
|
220
|
-
.gsub(/\bHd\b/, 'HD')
|
|
221
|
-
.gsub(/(?:Omni|Text) Moderation/, '\0'.tr(' ', '-'))
|
|
222
|
-
.gsub('Text Embedding', 'text-embedding-')
|
|
167
|
+
cached_price = cached_input_price_for(model_id)
|
|
168
|
+
standard_pricing[:cache_read_input_per_million] = cached_price if cached_price
|
|
169
|
+
|
|
170
|
+
{ text_tokens: { standard: standard_pricing } }
|
|
223
171
|
end
|
|
224
172
|
|
|
225
|
-
def
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
when 'Chatgpt' then 'ChatGPT-'
|
|
229
|
-
when 'Tts' then 'TTS-'
|
|
230
|
-
when 'Dall E' then 'DALL-E-'
|
|
173
|
+
def model_family(model_id)
|
|
174
|
+
MODEL_PATTERNS.each do |family, pattern|
|
|
175
|
+
return family.to_s if model_id.match?(pattern)
|
|
231
176
|
end
|
|
177
|
+
|
|
178
|
+
'other'
|
|
232
179
|
end
|
|
233
180
|
|
|
234
|
-
def
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
nil
|
|
181
|
+
def supports_vision?(model_id)
|
|
182
|
+
case model_family(model_id)
|
|
183
|
+
when 'gpt_image', 'gpt_image_mini', 'gpt_image15', 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini',
|
|
184
|
+
'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation',
|
|
185
|
+
'gpt4o_search'
|
|
186
|
+
true
|
|
241
187
|
else
|
|
242
|
-
|
|
188
|
+
false
|
|
243
189
|
end
|
|
244
190
|
end
|
|
245
191
|
|
|
246
|
-
def
|
|
247
|
-
(
|
|
192
|
+
def supports_functions?(model_id)
|
|
193
|
+
case model_family(model_id)
|
|
194
|
+
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
|
|
195
|
+
'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
|
|
196
|
+
true
|
|
197
|
+
else
|
|
198
|
+
false
|
|
199
|
+
end
|
|
248
200
|
end
|
|
249
201
|
|
|
250
|
-
def
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
202
|
+
def supports_structured_output?(model_id)
|
|
203
|
+
case model_family(model_id)
|
|
204
|
+
when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4o',
|
|
205
|
+
'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
|
|
206
|
+
true
|
|
207
|
+
else
|
|
208
|
+
false
|
|
209
|
+
end
|
|
210
|
+
end
|
|
255
211
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
|
|
259
|
-
modalities[:input] << 'pdf' if supports_vision?(model_id)
|
|
260
|
-
modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
|
|
261
|
-
modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
|
|
262
|
-
modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
|
|
263
|
-
modalities[:output] << 'moderation' if model_id.match?(/moderation/)
|
|
212
|
+
def input_price_for(model_id)
|
|
213
|
+
return family_prices(model_id).dig(:text, :input) if image_model?(model_id)
|
|
264
214
|
|
|
265
|
-
|
|
215
|
+
price_for(model_id, :input, 0.50)
|
|
266
216
|
end
|
|
267
217
|
|
|
268
|
-
def
|
|
269
|
-
|
|
218
|
+
def output_price_for(model_id)
|
|
219
|
+
return family_prices(model_id).dig(:text, :output) if image_model?(model_id)
|
|
270
220
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
|
274
|
-
capabilities << 'batch' if model_id.match?(/embedding|batch/)
|
|
275
|
-
capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
|
|
221
|
+
price_for(model_id, :output, 1.50)
|
|
222
|
+
end
|
|
276
223
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
capabilities << 'speech_generation' if model_id.match?(/audio/)
|
|
280
|
-
capabilities << 'transcription' if model_id.match?(/audio/)
|
|
281
|
-
end
|
|
224
|
+
def cached_input_price_for(model_id)
|
|
225
|
+
return family_prices(model_id).dig(:text, :cached_input) if image_model?(model_id)
|
|
282
226
|
|
|
283
|
-
|
|
227
|
+
family = model_family(model_id).to_sym
|
|
228
|
+
PRICES.fetch(family, {})[:cached_input]
|
|
284
229
|
end
|
|
285
230
|
|
|
286
|
-
def
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
231
|
+
def image_model?(model_id)
|
|
232
|
+
%w[gpt_image gpt_image_mini gpt_image15].include?(model_family(model_id))
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def image_pricing_for(model_id)
|
|
236
|
+
text_pricing = {
|
|
237
|
+
input_per_million: input_price_for(model_id)
|
|
290
238
|
}
|
|
239
|
+
cached_text_price = cached_input_price_for(model_id)
|
|
240
|
+
text_pricing[:cache_read_input_per_million] = cached_text_price if cached_text_price
|
|
291
241
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
242
|
+
image_pricing = {
|
|
243
|
+
input_per_million: family_prices(model_id).dig(:images, :input),
|
|
244
|
+
output_per_million: family_prices(model_id).dig(:images, :output)
|
|
245
|
+
}
|
|
246
|
+
cached_image_price = family_prices(model_id).dig(:images, :cached_input)
|
|
247
|
+
image_pricing[:cache_read_input_per_million] = cached_image_price if cached_image_price
|
|
296
248
|
|
|
297
|
-
|
|
249
|
+
{
|
|
250
|
+
text_tokens: { standard: text_pricing },
|
|
251
|
+
images: { standard: image_pricing }
|
|
252
|
+
}
|
|
253
|
+
end
|
|
298
254
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
end
|
|
255
|
+
def price_for(model_id, key, fallback)
|
|
256
|
+
prices = family_prices(model_id)
|
|
257
|
+
prices = { key => fallback } if prices.empty?
|
|
258
|
+
prices[key] || prices[:price] || fallback
|
|
259
|
+
end
|
|
305
260
|
|
|
306
|
-
|
|
261
|
+
def family_prices(model_id)
|
|
262
|
+
family = model_family(model_id).to_sym
|
|
263
|
+
PRICES.fetch(family, {})
|
|
307
264
|
end
|
|
265
|
+
|
|
266
|
+
module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
|
|
267
|
+
:model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
|
|
268
|
+
:input_price_for, :output_price_for, :cached_input_price_for, :image_model?,
|
|
269
|
+
:image_pricing_for, :price_for, :family_prices
|
|
308
270
|
end
|
|
309
271
|
end
|
|
310
272
|
end
|
|
@@ -61,8 +61,7 @@ module RubyLLM
|
|
|
61
61
|
return unless message_data
|
|
62
62
|
|
|
63
63
|
usage = data['usage'] || {}
|
|
64
|
-
|
|
65
|
-
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
64
|
+
thinking_tokens = thinking_tokens(usage)
|
|
66
65
|
content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
|
|
67
66
|
thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
|
|
68
67
|
thinking_signature = extract_thinking_signature(message_data)
|
|
@@ -72,16 +71,56 @@ module RubyLLM
|
|
|
72
71
|
content: content,
|
|
73
72
|
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
74
73
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
|
75
|
-
input_tokens: usage
|
|
76
|
-
output_tokens: usage
|
|
77
|
-
cached_tokens:
|
|
78
|
-
cache_creation_tokens:
|
|
74
|
+
input_tokens: input_tokens(usage),
|
|
75
|
+
output_tokens: output_tokens(usage),
|
|
76
|
+
cached_tokens: cache_read_tokens(usage),
|
|
77
|
+
cache_creation_tokens: cache_write_tokens(usage),
|
|
79
78
|
thinking_tokens: thinking_tokens,
|
|
80
79
|
model_id: data['model'],
|
|
81
80
|
raw: response
|
|
82
81
|
)
|
|
83
82
|
end
|
|
84
83
|
|
|
84
|
+
def input_tokens(usage)
|
|
85
|
+
return usage['prompt_cache_miss_tokens'] if usage['prompt_cache_miss_tokens']
|
|
86
|
+
|
|
87
|
+
prompt_tokens = usage['prompt_tokens']
|
|
88
|
+
return unless prompt_tokens
|
|
89
|
+
|
|
90
|
+
[prompt_tokens.to_i - cache_read_tokens(usage).to_i - cache_write_tokens(usage).to_i, 0].max
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def output_tokens(usage)
|
|
94
|
+
completion_tokens = usage['completion_tokens']
|
|
95
|
+
return unless completion_tokens
|
|
96
|
+
|
|
97
|
+
completion_tokens = completion_tokens.to_i
|
|
98
|
+
generated_tokens = generated_tokens_from_total(usage)
|
|
99
|
+
return completion_tokens unless generated_tokens && generated_tokens > completion_tokens
|
|
100
|
+
|
|
101
|
+
generated_tokens
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def generated_tokens_from_total(usage)
|
|
105
|
+
prompt_tokens = usage['prompt_tokens']
|
|
106
|
+
total_tokens = usage['total_tokens']
|
|
107
|
+
return unless prompt_tokens && total_tokens
|
|
108
|
+
|
|
109
|
+
[total_tokens.to_i - prompt_tokens.to_i, 0].max
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def cache_read_tokens(usage)
|
|
113
|
+
usage.dig('prompt_tokens_details', 'cached_tokens') || usage['prompt_cache_hit_tokens']
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def cache_write_tokens(usage)
|
|
117
|
+
usage.dig('prompt_tokens_details', 'cache_write_tokens') || 0
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def thinking_tokens(usage)
|
|
121
|
+
usage.dig('completion_tokens_details', 'reasoning_tokens') || usage['reasoning_tokens']
|
|
122
|
+
end
|
|
123
|
+
|
|
85
124
|
def format_messages(messages)
|
|
86
125
|
messages.map do |msg|
|
|
87
126
|
{
|
|
@@ -7,31 +7,83 @@ module RubyLLM
|
|
|
7
7
|
module Images
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def images_url
|
|
11
|
-
'images/generations'
|
|
10
|
+
def images_url(with: nil, mask: nil)
|
|
11
|
+
editing?(with, mask) ? 'images/edits' : 'images/generations'
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def render_image_payload(prompt, model:, size:)
|
|
14
|
+
def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
|
|
15
|
+
return render_edit_payload(prompt, model:, with:, mask:, params:) if editing?(with, mask)
|
|
16
|
+
|
|
15
17
|
{
|
|
16
18
|
model: model,
|
|
17
19
|
prompt: prompt,
|
|
18
20
|
n: 1,
|
|
19
21
|
size: size
|
|
20
|
-
}
|
|
22
|
+
}.merge(params)
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
def parse_image_response(response, model:)
|
|
24
26
|
data = response.body
|
|
25
|
-
image_data = data['data'].first
|
|
27
|
+
image_data = Array(data['data']).first
|
|
28
|
+
|
|
29
|
+
raise Error.new(nil, 'Unexpected response format from OpenAI image API') unless image_data
|
|
26
30
|
|
|
27
31
|
Image.new(
|
|
28
32
|
url: image_data['url'],
|
|
29
33
|
mime_type: 'image/png', # DALL-E typically returns PNGs
|
|
30
34
|
revised_prompt: image_data['revised_prompt'],
|
|
31
35
|
model_id: model,
|
|
32
|
-
data: image_data['b64_json']
|
|
36
|
+
data: image_data['b64_json'],
|
|
37
|
+
usage: data['usage'] || {}
|
|
33
38
|
)
|
|
34
39
|
end
|
|
40
|
+
|
|
41
|
+
def validate_paint_inputs!(with:, mask:)
|
|
42
|
+
return unless editing?(with, mask)
|
|
43
|
+
|
|
44
|
+
raise ArgumentError, 'with: is required when mask: is provided' if mask && !attachments?(with)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def render_edit_payload(prompt, model:, with:, mask:, params:)
|
|
48
|
+
payload = params.merge(
|
|
49
|
+
model: model,
|
|
50
|
+
prompt: prompt,
|
|
51
|
+
image: build_upload_parts(with, label: 'images'),
|
|
52
|
+
n: 1
|
|
53
|
+
)
|
|
54
|
+
payload[:mask] = build_upload_part(mask, label: 'mask') if mask
|
|
55
|
+
payload
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_upload_parts(sources, label:)
|
|
59
|
+
Array(sources).filter_map do |source|
|
|
60
|
+
next if blank_attachment?(source)
|
|
61
|
+
|
|
62
|
+
build_upload_part(source, label:)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_upload_part(source, label:)
|
|
67
|
+
attachment = Attachment.new(source)
|
|
68
|
+
unless attachment.image?
|
|
69
|
+
raise UnsupportedAttachmentError,
|
|
70
|
+
"OpenAI image editing only supports image attachments for #{label}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
Faraday::UploadIO.new(StringIO.new(attachment.content), attachment.mime_type, attachment.filename)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def editing?(with, mask)
|
|
77
|
+
attachments?(with) || !mask.nil?
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def attachments?(value)
|
|
81
|
+
Array(value).any? { |item| !blank_attachment?(item) }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def blank_attachment?(value)
|
|
85
|
+
value.nil? || (value.is_a?(String) && value.strip.empty?)
|
|
86
|
+
end
|
|
35
87
|
end
|
|
36
88
|
end
|
|
37
89
|
end
|
|
@@ -17,14 +17,12 @@ module RubyLLM
|
|
|
17
17
|
|
|
18
18
|
Model::Info.new(
|
|
19
19
|
id: model_id,
|
|
20
|
-
name:
|
|
20
|
+
name: model_id,
|
|
21
21
|
provider: slug,
|
|
22
|
-
family: capabilities.model_family(model_id),
|
|
23
22
|
created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
|
|
24
23
|
context_window: capabilities.context_window_for(model_id),
|
|
25
24
|
max_output_tokens: capabilities.max_tokens_for(model_id),
|
|
26
|
-
|
|
27
|
-
capabilities: capabilities.capabilities_for(model_id),
|
|
25
|
+
capabilities: capabilities.critical_capabilities_for(model_id),
|
|
28
26
|
pricing: capabilities.pricing_for(model_id),
|
|
29
27
|
metadata: {
|
|
30
28
|
object: model_data['object'],
|
|
@@ -13,7 +13,6 @@ module RubyLLM
|
|
|
13
13
|
|
|
14
14
|
def build_chunk(data)
|
|
15
15
|
usage = data['usage'] || {}
|
|
16
|
-
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
16
|
delta = data.dig('choices', 0, 'delta') || {}
|
|
18
17
|
content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
|
|
19
18
|
content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
|
|
@@ -27,11 +26,11 @@ module RubyLLM
|
|
|
27
26
|
signature: delta['reasoning_signature']
|
|
28
27
|
),
|
|
29
28
|
tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
|
|
30
|
-
input_tokens: usage
|
|
31
|
-
output_tokens: usage
|
|
32
|
-
cached_tokens:
|
|
33
|
-
cache_creation_tokens:
|
|
34
|
-
thinking_tokens:
|
|
29
|
+
input_tokens: OpenAI::Chat.input_tokens(usage),
|
|
30
|
+
output_tokens: OpenAI::Chat.output_tokens(usage),
|
|
31
|
+
cached_tokens: OpenAI::Chat.cache_read_tokens(usage),
|
|
32
|
+
cache_creation_tokens: OpenAI::Chat.cache_write_tokens(usage),
|
|
33
|
+
thinking_tokens: OpenAI::Chat.thinking_tokens(usage)
|
|
35
34
|
)
|
|
36
35
|
end
|
|
37
36
|
|