ruby_llm 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +73 -91
- data/lib/ruby_llm/active_record/acts_as.rb +2 -10
- data/lib/ruby_llm/aliases.json +4 -0
- data/lib/ruby_llm/aliases.rb +7 -25
- data/lib/ruby_llm/chat.rb +2 -10
- data/lib/ruby_llm/configuration.rb +1 -12
- data/lib/ruby_llm/content.rb +0 -2
- data/lib/ruby_llm/embedding.rb +1 -2
- data/lib/ruby_llm/error.rb +0 -8
- data/lib/ruby_llm/image.rb +0 -4
- data/lib/ruby_llm/message.rb +2 -4
- data/lib/ruby_llm/model/info.rb +0 -10
- data/lib/ruby_llm/model/pricing.rb +0 -3
- data/lib/ruby_llm/model/pricing_category.rb +0 -2
- data/lib/ruby_llm/model/pricing_tier.rb +0 -1
- data/lib/ruby_llm/models.json +623 -452
- data/lib/ruby_llm/models.rb +5 -13
- data/lib/ruby_llm/provider.rb +1 -5
- data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
- data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
- data/lib/ruby_llm/providers/anthropic/tools.rb +0 -1
- data/lib/ruby_llm/providers/anthropic.rb +1 -2
- data/lib/ruby_llm/providers/bedrock/chat.rb +0 -2
- data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
- data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
- data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
- data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
- data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
- data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
- data/lib/ruby_llm/providers/bedrock.rb +1 -2
- data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
- data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
- data/lib/ruby_llm/providers/gemini/capabilities.rb +26 -101
- data/lib/ruby_llm/providers/gemini/chat.rb +5 -7
- data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
- data/lib/ruby_llm/providers/gemini/images.rb +0 -1
- data/lib/ruby_llm/providers/gemini/media.rb +0 -1
- data/lib/ruby_llm/providers/gemini/models.rb +1 -2
- data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
- data/lib/ruby_llm/providers/gpustack/chat.rb +0 -1
- data/lib/ruby_llm/providers/gpustack/models.rb +3 -4
- data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
- data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
- data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
- data/lib/ruby_llm/providers/mistral/models.rb +0 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
- data/lib/ruby_llm/providers/ollama/media.rb +0 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +0 -15
- data/lib/ruby_llm/providers/openai/chat.rb +0 -3
- data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
- data/lib/ruby_llm/providers/openai/media.rb +0 -1
- data/lib/ruby_llm/providers/openai.rb +1 -3
- data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
- data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
- data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
- data/lib/ruby_llm/providers/perplexity.rb +1 -5
- data/lib/ruby_llm/railtie.rb +0 -1
- data/lib/ruby_llm/stream_accumulator.rb +1 -3
- data/lib/ruby_llm/streaming.rb +15 -24
- data/lib/ruby_llm/tool.rb +2 -19
- data/lib/ruby_llm/tool_call.rb +0 -9
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm.rb +0 -2
- data/lib/tasks/aliases.rake +5 -35
- data/lib/tasks/models_docs.rake +1 -11
- data/lib/tasks/models_update.rake +1 -1
- data/lib/tasks/vcr.rake +0 -7
- metadata +1 -1
@@ -7,9 +7,6 @@ module RubyLLM
|
|
7
7
|
module Capabilities
|
8
8
|
module_function
|
9
9
|
|
10
|
-
# Returns the context window size (input token limit) for the given model
|
11
|
-
# @param model_id [String] the model identifier
|
12
|
-
# @return [Integer] the context window size in tokens
|
13
10
|
def context_window_for(model_id)
|
14
11
|
case model_id
|
15
12
|
when /gemini-2\.5-pro-exp-03-25/, /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/ # rubocop:disable Layout/LineLength
|
@@ -18,70 +15,49 @@ module RubyLLM
|
|
18
15
|
when /gemini-embedding-exp/ then 8_192
|
19
16
|
when /text-embedding-004/, /embedding-001/ then 2_048
|
20
17
|
when /aqa/ then 7_168
|
21
|
-
when /imagen-3/ then nil
|
22
|
-
else 32_768
|
18
|
+
when /imagen-3/ then nil
|
19
|
+
else 32_768
|
23
20
|
end
|
24
21
|
end
|
25
22
|
|
26
|
-
# Returns the maximum output tokens for the given model
|
27
|
-
# @param model_id [String] the model identifier
|
28
|
-
# @return [Integer] the maximum output tokens
|
29
23
|
def max_tokens_for(model_id)
|
30
24
|
case model_id
|
31
25
|
when /gemini-2\.5-pro-exp-03-25/ then 64_000
|
32
26
|
when /gemini-2\.0-flash/, /gemini-2\.0-flash-lite/, /gemini-1\.5-flash/, /gemini-1\.5-flash-8b/, /gemini-1\.5-pro/ # rubocop:disable Layout/LineLength
|
33
27
|
8_192
|
34
|
-
when /gemini-embedding-exp/ then nil
|
35
|
-
when /text-embedding-004/, /embedding-001/ then 768
|
36
|
-
when /
|
37
|
-
|
38
|
-
else 4_096 # Sensible default
|
28
|
+
when /gemini-embedding-exp/ then nil
|
29
|
+
when /text-embedding-004/, /embedding-001/ then 768
|
30
|
+
when /imagen-3/ then 4
|
31
|
+
else 4_096
|
39
32
|
end
|
40
33
|
end
|
41
34
|
|
42
|
-
# Returns the input price per million tokens for the given model
|
43
|
-
# @param model_id [String] the model identifier
|
44
|
-
# @return [Float] the price per million tokens in USD
|
45
35
|
def input_price_for(model_id)
|
46
36
|
base_price = PRICES.dig(pricing_family(model_id), :input) || default_input_price
|
47
37
|
return base_price unless long_context_model?(model_id)
|
48
38
|
|
49
|
-
# Apply different pricing for prompts longer than 128k tokens
|
50
39
|
context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
|
51
40
|
end
|
52
41
|
|
53
|
-
# Returns the output price per million tokens for the given model
|
54
|
-
# @param model_id [String] the model identifier
|
55
|
-
# @return [Float] the price per million tokens in USD
|
56
42
|
def output_price_for(model_id)
|
57
43
|
base_price = PRICES.dig(pricing_family(model_id), :output) || default_output_price
|
58
44
|
return base_price unless long_context_model?(model_id)
|
59
45
|
|
60
|
-
# Apply different pricing for prompts longer than 128k tokens
|
61
46
|
context_window_for(model_id) > 128_000 ? base_price * 2 : base_price
|
62
47
|
end
|
63
48
|
|
64
|
-
# Determines if the model supports vision (image/video) inputs
|
65
|
-
# @param model_id [String] the model identifier
|
66
|
-
# @return [Boolean] true if the model supports vision inputs
|
67
49
|
def supports_vision?(model_id)
|
68
50
|
return false if model_id.match?(/text-embedding|embedding-001|aqa/)
|
69
51
|
|
70
52
|
model_id.match?(/gemini|flash|pro|imagen/)
|
71
53
|
end
|
72
54
|
|
73
|
-
# Determines if the model supports function calling
|
74
|
-
# @param model_id [String] the model identifier
|
75
|
-
# @return [Boolean] true if the model supports function calling
|
76
55
|
def supports_functions?(model_id)
|
77
56
|
return false if model_id.match?(/text-embedding|embedding-001|aqa|flash-lite|imagen|gemini-2\.0-flash-lite/)
|
78
57
|
|
79
58
|
model_id.match?(/gemini|pro|flash/)
|
80
59
|
end
|
81
60
|
|
82
|
-
# Determines if the model supports JSON mode
|
83
|
-
# @param model_id [String] the model identifier
|
84
|
-
# @return [Boolean] true if the model supports JSON mode
|
85
61
|
def supports_json_mode?(model_id)
|
86
62
|
if model_id.match?(/text-embedding|embedding-001|aqa|imagen|gemini-2\.0-flash-lite|gemini-2\.5-pro-exp-03-25/)
|
87
63
|
return false
|
@@ -90,24 +66,18 @@ module RubyLLM
|
|
90
66
|
model_id.match?(/gemini|pro|flash/)
|
91
67
|
end
|
92
68
|
|
93
|
-
# Formats the model ID into a human-readable display name
|
94
|
-
# @param model_id [String] the model identifier
|
95
|
-
# @return [String] the formatted display name
|
96
69
|
def format_display_name(model_id)
|
97
70
|
model_id
|
98
71
|
.delete_prefix('models/')
|
99
72
|
.split('-')
|
100
73
|
.map(&:capitalize)
|
101
74
|
.join(' ')
|
102
|
-
.gsub(/(\d+\.\d+)/, ' \1')
|
103
|
-
.gsub(/\s+/, ' ')
|
104
|
-
.gsub('Aqa', 'AQA')
|
75
|
+
.gsub(/(\d+\.\d+)/, ' \1')
|
76
|
+
.gsub(/\s+/, ' ')
|
77
|
+
.gsub('Aqa', 'AQA')
|
105
78
|
.strip
|
106
79
|
end
|
107
80
|
|
108
|
-
# Determines if the model supports context caching
|
109
|
-
# @param model_id [String] the model identifier
|
110
|
-
# @return [Boolean] true if the model supports caching
|
111
81
|
def supports_caching?(model_id)
|
112
82
|
if model_id.match?(/flash-lite|gemini-2\.5-pro-exp-03-25|aqa|imagen|text-embedding|embedding-001/)
|
113
83
|
return false
|
@@ -116,23 +86,14 @@ module RubyLLM
|
|
116
86
|
model_id.match?(/gemini|pro|flash/)
|
117
87
|
end
|
118
88
|
|
119
|
-
# Determines if the model supports tuning
|
120
|
-
# @param model_id [String] the model identifier
|
121
|
-
# @return [Boolean] true if the model supports tuning
|
122
89
|
def supports_tuning?(model_id)
|
123
90
|
model_id.match?(/gemini-1\.5-flash|gemini-1\.5-flash-8b/)
|
124
91
|
end
|
125
92
|
|
126
|
-
# Determines if the model supports audio inputs
|
127
|
-
# @param model_id [String] the model identifier
|
128
|
-
# @return [Boolean] true if the model supports audio inputs
|
129
93
|
def supports_audio?(model_id)
|
130
94
|
model_id.match?(/gemini|pro|flash/)
|
131
95
|
end
|
132
96
|
|
133
|
-
# Returns the type of model (chat, embedding, image)
|
134
|
-
# @param model_id [String] the model identifier
|
135
|
-
# @return [String] the model type
|
136
97
|
def model_type(model_id)
|
137
98
|
case model_id
|
138
99
|
when /text-embedding|embedding|gemini-embedding/ then 'embedding'
|
@@ -141,9 +102,6 @@ module RubyLLM
|
|
141
102
|
end
|
142
103
|
end
|
143
104
|
|
144
|
-
# Returns the model family identifier
|
145
|
-
# @param model_id [String] the model identifier
|
146
|
-
# @return [String] the model family identifier
|
147
105
|
def model_family(model_id)
|
148
106
|
case model_id
|
149
107
|
when /gemini-2\.5-pro-exp-03-25/ then 'gemini25_pro_exp'
|
@@ -161,9 +119,6 @@ module RubyLLM
|
|
161
119
|
end
|
162
120
|
end
|
163
121
|
|
164
|
-
# Returns the pricing family identifier for the model
|
165
|
-
# @param model_id [String] the model identifier
|
166
|
-
# @return [Symbol] the pricing family identifier
|
167
122
|
def pricing_family(model_id)
|
168
123
|
case model_id
|
169
124
|
when /gemini-2\.5-pro-exp-03-25/ then :pro_2_5 # rubocop:disable Naming/VariableNumber
|
@@ -180,86 +135,75 @@ module RubyLLM
|
|
180
135
|
end
|
181
136
|
end
|
182
137
|
|
183
|
-
# Determines if the model supports long context
|
184
|
-
# @param model_id [String] the model identifier
|
185
|
-
# @return [Boolean] true if the model supports long context
|
186
138
|
def long_context_model?(model_id)
|
187
139
|
model_id.match?(/gemini-1\.5-(?:pro|flash)|gemini-1\.5-flash-8b/)
|
188
140
|
end
|
189
141
|
|
190
|
-
# Returns the context length for the model
|
191
|
-
# @param model_id [String] the model identifier
|
192
|
-
# @return [Integer] the context length in tokens
|
193
142
|
def context_length(model_id)
|
194
143
|
context_window_for(model_id)
|
195
144
|
end
|
196
145
|
|
197
|
-
# Pricing information for Gemini models (per 1M tokens in USD)
|
198
146
|
PRICES = {
|
199
|
-
flash_2: { #
|
147
|
+
flash_2: { # rubocop:disable Naming/VariableNumber
|
200
148
|
input: 0.10,
|
201
149
|
output: 0.40,
|
202
150
|
audio_input: 0.70,
|
203
151
|
cache: 0.025,
|
204
152
|
cache_storage: 1.00,
|
205
|
-
grounding_search: 35.00
|
153
|
+
grounding_search: 35.00
|
206
154
|
},
|
207
|
-
flash_lite_2: { #
|
155
|
+
flash_lite_2: { # rubocop:disable Naming/VariableNumber
|
208
156
|
input: 0.075,
|
209
157
|
output: 0.30
|
210
158
|
},
|
211
|
-
flash: {
|
159
|
+
flash: {
|
212
160
|
input: 0.075,
|
213
161
|
output: 0.30,
|
214
162
|
cache: 0.01875,
|
215
163
|
cache_storage: 1.00,
|
216
|
-
grounding_search: 35.00
|
164
|
+
grounding_search: 35.00
|
217
165
|
},
|
218
|
-
flash_8b: {
|
166
|
+
flash_8b: {
|
219
167
|
input: 0.0375,
|
220
168
|
output: 0.15,
|
221
169
|
cache: 0.01,
|
222
170
|
cache_storage: 0.25,
|
223
|
-
grounding_search: 35.00
|
171
|
+
grounding_search: 35.00
|
224
172
|
},
|
225
|
-
pro: {
|
173
|
+
pro: {
|
226
174
|
input: 1.25,
|
227
175
|
output: 5.0,
|
228
176
|
cache: 0.3125,
|
229
177
|
cache_storage: 4.50,
|
230
|
-
grounding_search: 35.00
|
178
|
+
grounding_search: 35.00
|
231
179
|
},
|
232
|
-
pro_2_5: { #
|
180
|
+
pro_2_5: { # rubocop:disable Naming/VariableNumber
|
233
181
|
input: 0.12,
|
234
182
|
output: 0.50
|
235
183
|
},
|
236
|
-
gemini_embedding: {
|
184
|
+
gemini_embedding: {
|
237
185
|
input: 0.002,
|
238
186
|
output: 0.004
|
239
187
|
},
|
240
|
-
embedding: {
|
188
|
+
embedding: {
|
241
189
|
input: 0.00,
|
242
190
|
output: 0.00
|
243
191
|
},
|
244
|
-
imagen: {
|
245
|
-
price: 0.03
|
192
|
+
imagen: {
|
193
|
+
price: 0.03
|
246
194
|
},
|
247
|
-
aqa: {
|
195
|
+
aqa: {
|
248
196
|
input: 0.00,
|
249
197
|
output: 0.00
|
250
198
|
}
|
251
199
|
}.freeze
|
252
200
|
|
253
|
-
# Default input price for unknown models
|
254
|
-
# @return [Float] the default input price per million tokens
|
255
201
|
def default_input_price
|
256
|
-
0.075
|
202
|
+
0.075
|
257
203
|
end
|
258
204
|
|
259
|
-
# Default output price for unknown models
|
260
|
-
# @return [Float] the default output price per million tokens
|
261
205
|
def default_output_price
|
262
|
-
0.30
|
206
|
+
0.30
|
263
207
|
end
|
264
208
|
|
265
209
|
def modalities_for(model_id)
|
@@ -268,19 +212,13 @@ module RubyLLM
|
|
268
212
|
output: ['text']
|
269
213
|
}
|
270
214
|
|
271
|
-
# Vision support
|
272
215
|
if supports_vision?(model_id)
|
273
216
|
modalities[:input] << 'image'
|
274
217
|
modalities[:input] << 'pdf'
|
275
218
|
end
|
276
219
|
|
277
|
-
# Audio support
|
278
220
|
modalities[:input] << 'audio' if model_id.match?(/audio/)
|
279
|
-
|
280
|
-
# Embedding output
|
281
221
|
modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
|
282
|
-
|
283
|
-
# Image output for imagen models
|
284
222
|
modalities[:output] = ['image'] if model_id.match?(/imagen/)
|
285
223
|
|
286
224
|
modalities
|
@@ -289,21 +227,11 @@ module RubyLLM
|
|
289
227
|
def capabilities_for(model_id)
|
290
228
|
capabilities = ['streaming']
|
291
229
|
|
292
|
-
# Function calling
|
293
230
|
capabilities << 'function_calling' if supports_functions?(model_id)
|
294
|
-
|
295
|
-
# JSON mode
|
296
231
|
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
297
|
-
|
298
|
-
# Batch processing
|
299
232
|
capabilities << 'batch' if model_id.match?(/embedding|flash/)
|
300
|
-
|
301
|
-
# Caching
|
302
233
|
capabilities << 'caching' if supports_caching?(model_id)
|
303
|
-
|
304
|
-
# Tuning
|
305
234
|
capabilities << 'fine_tuning' if supports_tuning?(model_id)
|
306
|
-
|
307
235
|
capabilities
|
308
236
|
end
|
309
237
|
|
@@ -316,10 +244,8 @@ module RubyLLM
|
|
316
244
|
output_per_million: prices[:output]
|
317
245
|
}
|
318
246
|
|
319
|
-
# Add cached pricing if available
|
320
247
|
standard_pricing[:cached_input_per_million] = prices[:input_hit] if prices[:input_hit]
|
321
248
|
|
322
|
-
# Batch pricing (typically 50% discount)
|
323
249
|
batch_pricing = {
|
324
250
|
input_per_million: (standard_pricing[:input_per_million] || 0) * 0.5,
|
325
251
|
output_per_million: (standard_pricing[:output_per_million] || 0) * 0.5
|
@@ -336,7 +262,6 @@ module RubyLLM
|
|
336
262
|
}
|
337
263
|
}
|
338
264
|
|
339
|
-
# Add embedding pricing if applicable
|
340
265
|
if model_id.match?(/embedding|gemini-embedding/)
|
341
266
|
pricing[:embeddings] = {
|
342
267
|
standard: { input_per_million: prices[:price] || 0.002 }
|
@@ -12,14 +12,14 @@ module RubyLLM
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
15
|
-
@model = model
|
15
|
+
@model = model
|
16
16
|
payload = {
|
17
17
|
contents: format_messages(messages),
|
18
|
-
generationConfig: {
|
19
|
-
temperature: temperature
|
20
|
-
}
|
18
|
+
generationConfig: {}
|
21
19
|
}
|
22
20
|
|
21
|
+
payload[:generationConfig][:temperature] = temperature unless temperature.nil?
|
22
|
+
|
23
23
|
if schema
|
24
24
|
payload[:generationConfig][:responseMimeType] = 'application/json'
|
25
25
|
payload[:generationConfig][:responseSchema] = convert_schema_to_gemini(schema)
|
@@ -43,7 +43,7 @@ module RubyLLM
|
|
43
43
|
def format_role(role)
|
44
44
|
case role
|
45
45
|
when :assistant then 'model'
|
46
|
-
when :system, :tool then 'user'
|
46
|
+
when :system, :tool then 'user'
|
47
47
|
else role.to_s
|
48
48
|
end
|
49
49
|
end
|
@@ -118,10 +118,8 @@ module RubyLLM
|
|
118
118
|
candidate = data.dig('candidates', 0)
|
119
119
|
return '' unless candidate
|
120
120
|
|
121
|
-
# Content will be empty for function calls
|
122
121
|
return '' if function_call?(candidate)
|
123
122
|
|
124
|
-
# Extract text content
|
125
123
|
parts = candidate.dig('content', 'parts')
|
126
124
|
text_parts = parts&.select { |p| p['text'] }
|
127
125
|
return '' unless text_parts&.any?
|
@@ -17,8 +17,6 @@ module RubyLLM
|
|
17
17
|
|
18
18
|
def parse_embedding_response(response, model:, text:)
|
19
19
|
vectors = response.body['embeddings']&.map { |e| e['values'] }
|
20
|
-
# If we only got one embedding AND the input was a single string (not an array),
|
21
|
-
# return it as a single vector
|
22
20
|
vectors = vectors.first if vectors&.length == 1 && !text.is_a?(Array)
|
23
21
|
|
24
22
|
Embedding.new(vectors:, model:, input_tokens: 0)
|
@@ -8,7 +8,6 @@ module RubyLLM
|
|
8
8
|
module_function
|
9
9
|
|
10
10
|
def format_content(content)
|
11
|
-
# Convert Hash/Array back to JSON string for API
|
12
11
|
return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
|
13
12
|
return [format_text(content)] unless content.is_a?(Content)
|
14
13
|
|
@@ -13,7 +13,6 @@ module RubyLLM
|
|
13
13
|
|
14
14
|
def parse_list_models_response(response, slug, capabilities)
|
15
15
|
Array(response.body['models']).map do |model_data|
|
16
|
-
# Extract model ID without "models/" prefix
|
17
16
|
model_id = model_data['name'].gsub('models/', '')
|
18
17
|
|
19
18
|
Model::Info.new(
|
@@ -21,7 +20,7 @@ module RubyLLM
|
|
21
20
|
name: model_data['displayName'],
|
22
21
|
provider: slug,
|
23
22
|
family: capabilities.model_family(model_id),
|
24
|
-
created_at: nil,
|
23
|
+
created_at: nil,
|
25
24
|
context_window: model_data['inputTokenLimit'] || capabilities.context_window_for(model_id),
|
26
25
|
max_output_tokens: model_data['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
|
27
26
|
modalities: capabilities.modalities_for(model_id),
|
@@ -5,7 +5,6 @@ module RubyLLM
|
|
5
5
|
class Gemini
|
6
6
|
# Tools methods for the Gemini API implementation
|
7
7
|
module Tools
|
8
|
-
# Format tools for Gemini API
|
9
8
|
def format_tools(tools)
|
10
9
|
return [] if tools.empty?
|
11
10
|
|
@@ -14,7 +13,6 @@ module RubyLLM
|
|
14
13
|
}]
|
15
14
|
end
|
16
15
|
|
17
|
-
# Extract tool calls from response data
|
18
16
|
def extract_tool_calls(data)
|
19
17
|
return nil unless data
|
20
18
|
|
@@ -43,7 +41,6 @@ module RubyLLM
|
|
43
41
|
|
44
42
|
private
|
45
43
|
|
46
|
-
# Format a single tool for Gemini API
|
47
44
|
def function_declaration_for(tool)
|
48
45
|
{
|
49
46
|
name: tool.name,
|
@@ -52,7 +49,6 @@ module RubyLLM
|
|
52
49
|
}.compact
|
53
50
|
end
|
54
51
|
|
55
|
-
# Format tool parameters for Gemini API
|
56
52
|
def format_parameters(parameters)
|
57
53
|
{
|
58
54
|
type: 'OBJECT',
|
@@ -66,7 +62,6 @@ module RubyLLM
|
|
66
62
|
}
|
67
63
|
end
|
68
64
|
|
69
|
-
# Convert RubyLLM param types to Gemini API types
|
70
65
|
def param_type_for_gemini(type)
|
71
66
|
case type.to_s.downcase
|
72
67
|
when 'integer', 'number', 'float' then 'NUMBER'
|
@@ -30,13 +30,12 @@ module RubyLLM
|
|
30
30
|
categories: model['categories']
|
31
31
|
},
|
32
32
|
context_window: model.dig('meta', 'n_ctx'),
|
33
|
-
# Using context window as max tokens since it's not explicitly provided
|
34
33
|
max_tokens: model.dig('meta', 'n_ctx'),
|
35
34
|
supports_vision: model.dig('meta', 'support_vision') || false,
|
36
35
|
supports_functions: model.dig('meta', 'support_tool_calls') || false,
|
37
|
-
supports_json_mode: true,
|
38
|
-
input_price_per_million: 0.0,
|
39
|
-
output_price_per_million: 0.0
|
36
|
+
supports_json_mode: true,
|
37
|
+
input_price_per_million: 0.0,
|
38
|
+
output_price_per_million: 0.0
|
40
39
|
)
|
41
40
|
end
|
42
41
|
end
|
@@ -8,22 +8,18 @@ module RubyLLM
|
|
8
8
|
module_function
|
9
9
|
|
10
10
|
def supports_streaming?(model_id)
|
11
|
-
# All chat models support streaming, but not embedding/moderation/OCR/transcription
|
12
11
|
!model_id.match?(/embed|moderation|ocr|transcriptions/)
|
13
12
|
end
|
14
13
|
|
15
14
|
def supports_tools?(model_id)
|
16
|
-
# Most chat models support tools except embedding/moderation/OCR/voxtral/transcription
|
17
15
|
!model_id.match?(/embed|moderation|ocr|voxtral|transcriptions|mistral-(tiny|small)-(2312|2402)/)
|
18
16
|
end
|
19
17
|
|
20
18
|
def supports_vision?(model_id)
|
21
|
-
# Models with vision capabilities
|
22
19
|
model_id.match?(/pixtral|mistral-small-(2503|2506)|mistral-medium/)
|
23
20
|
end
|
24
21
|
|
25
22
|
def supports_json_mode?(model_id)
|
26
|
-
# Most chat models support JSON mode (structured output)
|
27
23
|
!model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
|
28
24
|
end
|
29
25
|
|
@@ -58,11 +54,11 @@ module RubyLLM
|
|
58
54
|
end
|
59
55
|
|
60
56
|
def context_window_for(_model_id)
|
61
|
-
32_768
|
57
|
+
32_768
|
62
58
|
end
|
63
59
|
|
64
60
|
def max_tokens_for(_model_id)
|
65
|
-
8192
|
61
|
+
8192
|
66
62
|
end
|
67
63
|
|
68
64
|
def modalities_for(model_id)
|
@@ -97,7 +93,6 @@ module RubyLLM
|
|
97
93
|
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
98
94
|
capabilities << 'vision' if supports_vision?(model_id)
|
99
95
|
|
100
|
-
# Model-specific capabilities
|
101
96
|
capabilities << 'reasoning' if model_id.match?(/magistral/)
|
102
97
|
capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
|
103
98
|
capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
|
@@ -117,12 +112,10 @@ module RubyLLM
|
|
117
112
|
|
118
113
|
def release_date_for(model_id)
|
119
114
|
case model_id
|
120
|
-
# 2023 releases
|
121
115
|
when 'open-mistral-7b', 'mistral-tiny' then '2023-09-27'
|
122
116
|
when 'mistral-medium-2312', 'mistral-small-2312', 'mistral-small',
|
123
117
|
'open-mixtral-8x7b', 'mistral-tiny-2312' then '2023-12-11'
|
124
118
|
|
125
|
-
# 2024 releases
|
126
119
|
when 'mistral-embed' then '2024-01-11'
|
127
120
|
when 'mistral-large-2402', 'mistral-small-2402' then '2024-02-26'
|
128
121
|
when 'open-mixtral-8x22b', 'open-mixtral-8x22b-2404' then '2024-04-17'
|
@@ -140,7 +133,6 @@ module RubyLLM
|
|
140
133
|
when 'codestral-2411-rc5', 'mistral-moderation-2411', 'mistral-moderation-latest' then '2024-11-26'
|
141
134
|
when 'codestral-2412' then '2024-12-17'
|
142
135
|
|
143
|
-
# 2025 releases
|
144
136
|
when 'mistral-small-2501' then '2025-01-13'
|
145
137
|
when 'codestral-2501' then '2025-01-14'
|
146
138
|
when 'mistral-saba-2502', 'mistral-saba-latest' then '2025-02-18'
|
@@ -8,14 +8,12 @@ module RubyLLM
|
|
8
8
|
module_function
|
9
9
|
|
10
10
|
def format_role(role)
|
11
|
-
# Mistral doesn't use the new OpenAI convention for system prompts
|
12
11
|
role.to_s
|
13
12
|
end
|
14
13
|
|
15
14
|
# rubocop:disable Metrics/ParameterLists
|
16
15
|
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
|
17
16
|
payload = super
|
18
|
-
# Mistral doesn't support stream_options
|
19
17
|
payload.delete(:stream_options)
|
20
18
|
payload
|
21
19
|
end
|
@@ -12,7 +12,6 @@ module RubyLLM
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def render_embedding_payload(text, model:, dimensions:) # rubocop:disable Lint/UnusedMethodArgument
|
15
|
-
# Mistral doesn't support dimensions parameter
|
16
15
|
{
|
17
16
|
model: model,
|
18
17
|
input: text
|
@@ -24,8 +23,6 @@ module RubyLLM
|
|
24
23
|
input_tokens = data.dig('usage', 'prompt_tokens') || 0
|
25
24
|
vectors = data['data'].map { |d| d['embedding'] }
|
26
25
|
|
27
|
-
# If we only got one embedding AND the input was a single string (not an array),
|
28
|
-
# return it as a single vector
|
29
26
|
vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
|
30
27
|
|
31
28
|
Embedding.new(vectors:, model:, input_tokens:)
|
@@ -21,7 +21,6 @@ module RubyLLM
|
|
21
21
|
Array(response.body['data']).map do |model_data|
|
22
22
|
model_id = model_data['id']
|
23
23
|
|
24
|
-
# Use fixed release date for Mistral models
|
25
24
|
release_date = capabilities.release_date_for(model_id)
|
26
25
|
created_at = release_date ? Time.parse(release_date) : nil
|
27
26
|
|
@@ -235,20 +235,11 @@ module RubyLLM
|
|
235
235
|
|
236
236
|
# Vision support
|
237
237
|
modalities[:input] << 'image' if supports_vision?(model_id)
|
238
|
-
|
239
|
-
# Audio support
|
240
238
|
modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
|
241
|
-
|
242
|
-
# PDF support
|
243
239
|
modalities[:input] << 'pdf' if supports_vision?(model_id)
|
244
|
-
|
245
|
-
# Output modalities
|
246
240
|
modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
|
247
|
-
|
248
241
|
modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
|
249
|
-
|
250
242
|
modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
|
251
|
-
|
252
243
|
modalities[:output] << 'moderation' if model_id.match?(/moderation/)
|
253
244
|
|
254
245
|
modalities
|
@@ -257,13 +248,10 @@ module RubyLLM
|
|
257
248
|
def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
|
258
249
|
capabilities = []
|
259
250
|
|
260
|
-
# Common capabilities
|
261
251
|
capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
|
262
252
|
capabilities << 'function_calling' if supports_functions?(model_id)
|
263
253
|
capabilities << 'structured_output' if supports_json_mode?(model_id)
|
264
254
|
capabilities << 'batch' if model_id.match?(/embedding|batch/)
|
265
|
-
|
266
|
-
# Advanced capabilities
|
267
255
|
capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
|
268
256
|
|
269
257
|
if model_id.match?(/gpt-4-turbo|gpt-4o/)
|
@@ -281,16 +269,13 @@ module RubyLLM
|
|
281
269
|
output_per_million: output_price_for(model_id)
|
282
270
|
}
|
283
271
|
|
284
|
-
# Add cached pricing if available
|
285
272
|
if respond_to?(:cached_input_price_for)
|
286
273
|
cached_price = cached_input_price_for(model_id)
|
287
274
|
standard_pricing[:cached_input_per_million] = cached_price if cached_price
|
288
275
|
end
|
289
276
|
|
290
|
-
# Pricing structure
|
291
277
|
pricing = { text_tokens: { standard: standard_pricing } }
|
292
278
|
|
293
|
-
# Add batch pricing if applicable
|
294
279
|
if model_id.match?(/embedding|batch/)
|
295
280
|
pricing[:text_tokens][:batch] = {
|
296
281
|
input_per_million: standard_pricing[:input_per_million] * 0.5,
|
@@ -18,13 +18,10 @@ module RubyLLM
|
|
18
18
|
stream: stream
|
19
19
|
}
|
20
20
|
|
21
|
-
# Only include temperature if it's not nil (some models don't accept it)
|
22
21
|
payload[:temperature] = temperature unless temperature.nil?
|
23
|
-
|
24
22
|
payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
|
25
23
|
|
26
24
|
if schema
|
27
|
-
# Use strict mode from schema if specified, default to true
|
28
25
|
strict = schema[:strict] != false
|
29
26
|
|
30
27
|
payload[:response_format] = {
|