ruby_llm 1.9.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
- data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
- data/lib/ruby_llm/active_record/message_methods.rb +41 -8
- data/lib/ruby_llm/aliases.json +101 -21
- data/lib/ruby_llm/chat.rb +10 -7
- data/lib/ruby_llm/configuration.rb +1 -1
- data/lib/ruby_llm/message.rb +37 -11
- data/lib/ruby_llm/models.json +21119 -10230
- data/lib/ruby_llm/models.rb +271 -27
- data/lib/ruby_llm/models_schema.json +2 -2
- data/lib/ruby_llm/provider.rb +4 -3
- data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
- data/lib/ruby_llm/providers/bedrock/models.rb +21 -15
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
- data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
- data/lib/ruby_llm/providers/openai/chat.rb +87 -3
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +2 -0
- data/lib/ruby_llm/providers/vertexai.rb +5 -1
- data/lib/ruby_llm/stream_accumulator.rb +111 -14
- data/lib/ruby_llm/streaming.rb +76 -54
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool.rb +1 -1
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +20 -13
- metadata +12 -5
data/lib/ruby_llm/models.rb
CHANGED
|
@@ -5,6 +5,18 @@ module RubyLLM
|
|
|
5
5
|
class Models
|
|
6
6
|
include Enumerable
|
|
7
7
|
|
|
8
|
+
MODELS_DEV_PROVIDER_MAP = {
|
|
9
|
+
'openai' => 'openai',
|
|
10
|
+
'anthropic' => 'anthropic',
|
|
11
|
+
'google' => 'gemini',
|
|
12
|
+
'google-vertex' => 'vertexai',
|
|
13
|
+
'amazon-bedrock' => 'bedrock',
|
|
14
|
+
'deepseek' => 'deepseek',
|
|
15
|
+
'mistral' => 'mistral',
|
|
16
|
+
'openrouter' => 'openrouter',
|
|
17
|
+
'perplexity' => 'perplexity'
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
8
20
|
class << self
|
|
9
21
|
def instance
|
|
10
22
|
@instance ||= new
|
|
@@ -26,24 +38,53 @@ module RubyLLM
|
|
|
26
38
|
end
|
|
27
39
|
|
|
28
40
|
def refresh!(remote_only: false)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
41
|
+
existing_models = load_existing_models
|
|
42
|
+
|
|
43
|
+
provider_fetch = fetch_provider_models(remote_only: remote_only)
|
|
44
|
+
log_provider_fetch(provider_fetch)
|
|
45
|
+
|
|
46
|
+
models_dev_fetch = fetch_models_dev_models(existing_models)
|
|
47
|
+
log_models_dev_fetch(models_dev_fetch)
|
|
48
|
+
|
|
49
|
+
merged_models = merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
|
|
32
50
|
@instance = new(merged_models)
|
|
33
51
|
end
|
|
34
52
|
|
|
35
|
-
def
|
|
53
|
+
def fetch_provider_models(remote_only: true) # rubocop:disable Metrics/PerceivedComplexity
|
|
36
54
|
config = RubyLLM.config
|
|
55
|
+
provider_classes = remote_only ? Provider.remote_providers.values : Provider.providers.values
|
|
37
56
|
configured_classes = if remote_only
|
|
38
57
|
Provider.configured_remote_providers(config)
|
|
39
58
|
else
|
|
40
59
|
Provider.configured_providers(config)
|
|
41
60
|
end
|
|
42
|
-
configured = configured_classes.
|
|
61
|
+
configured = configured_classes.select { |klass| provider_classes.include?(klass) }
|
|
62
|
+
result = {
|
|
63
|
+
models: [],
|
|
64
|
+
fetched_providers: [],
|
|
65
|
+
configured_names: configured.map(&:name),
|
|
66
|
+
failed: []
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
provider_classes.each do |provider_class|
|
|
70
|
+
next if remote_only && provider_class.local?
|
|
71
|
+
next unless provider_class.configured?(config)
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
result[:models].concat(provider_class.new(config).list_models)
|
|
75
|
+
result[:fetched_providers] << provider_class.slug
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
|
|
78
|
+
end
|
|
79
|
+
end
|
|
43
80
|
|
|
44
|
-
|
|
81
|
+
result[:fetched_providers].uniq!
|
|
82
|
+
result
|
|
83
|
+
end
|
|
45
84
|
|
|
46
|
-
|
|
85
|
+
# Backwards-compatible wrapper used by specs.
|
|
86
|
+
def fetch_from_providers(remote_only: true)
|
|
87
|
+
fetch_provider_models(remote_only: remote_only)[:models]
|
|
47
88
|
end
|
|
48
89
|
|
|
49
90
|
def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
|
|
@@ -91,32 +132,86 @@ module RubyLLM
|
|
|
91
132
|
instance.respond_to?(method, include_private) || super
|
|
92
133
|
end
|
|
93
134
|
|
|
94
|
-
def
|
|
95
|
-
RubyLLM.logger.info 'Fetching models from
|
|
135
|
+
def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
|
|
136
|
+
RubyLLM.logger.info 'Fetching models from models.dev API...'
|
|
96
137
|
|
|
97
138
|
connection = Connection.basic do |f|
|
|
98
139
|
f.request :json
|
|
99
140
|
f.response :json, parser_options: { symbolize_names: true }
|
|
100
141
|
end
|
|
101
|
-
response = connection.get 'https://api.
|
|
102
|
-
|
|
103
|
-
|
|
142
|
+
response = connection.get 'https://models.dev/api.json'
|
|
143
|
+
providers = response.body || {}
|
|
144
|
+
|
|
145
|
+
models = providers.flat_map do |provider_key, provider_data|
|
|
146
|
+
provider_slug = MODELS_DEV_PROVIDER_MAP[provider_key.to_s]
|
|
147
|
+
next [] unless provider_slug
|
|
148
|
+
|
|
149
|
+
(provider_data[:models] || {}).values.map do |model_data|
|
|
150
|
+
Model::Info.new(models_dev_model_to_info(model_data, provider_slug, provider_key.to_s))
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
{ models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
RubyLLM.logger.warn("Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing.")
|
|
156
|
+
{
|
|
157
|
+
models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
|
|
158
|
+
fetched: false
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def load_existing_models
|
|
163
|
+
existing_models = instance&.all
|
|
164
|
+
existing_models = read_from_json if existing_models.nil? || existing_models.empty?
|
|
165
|
+
existing_models
|
|
104
166
|
end
|
|
105
167
|
|
|
106
|
-
def
|
|
107
|
-
|
|
168
|
+
def log_provider_fetch(provider_fetch)
|
|
169
|
+
RubyLLM.logger.info "Fetching models from providers: #{provider_fetch[:configured_names].join(', ')}"
|
|
170
|
+
provider_fetch[:failed].each do |failure|
|
|
171
|
+
RubyLLM.logger.warn(
|
|
172
|
+
"Failed to fetch #{failure[:name]} models (#{failure[:error].class}: #{failure[:error].message}). " \
|
|
173
|
+
'Keeping existing.'
|
|
174
|
+
)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def log_models_dev_fetch(models_dev_fetch)
|
|
179
|
+
return if models_dev_fetch[:fetched]
|
|
180
|
+
|
|
181
|
+
RubyLLM.logger.warn('Using cached models.dev data due to fetch failure.')
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
|
|
185
|
+
existing_by_provider = existing_models.group_by(&:provider)
|
|
186
|
+
preserved_models = existing_by_provider
|
|
187
|
+
.except(*provider_fetch[:fetched_providers])
|
|
188
|
+
.values
|
|
189
|
+
.flatten
|
|
190
|
+
|
|
191
|
+
provider_models = provider_fetch[:models] + preserved_models
|
|
192
|
+
models_dev_models = if models_dev_fetch[:fetched]
|
|
193
|
+
models_dev_fetch[:models]
|
|
194
|
+
else
|
|
195
|
+
existing_models.select { |model| model.metadata[:source] == 'models.dev' }
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
merge_models(provider_models, models_dev_models)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def merge_models(provider_models, models_dev_models)
|
|
202
|
+
models_dev_by_key = index_by_key(models_dev_models)
|
|
108
203
|
provider_by_key = index_by_key(provider_models)
|
|
109
204
|
|
|
110
|
-
all_keys =
|
|
205
|
+
all_keys = models_dev_by_key.keys | provider_by_key.keys
|
|
111
206
|
|
|
112
207
|
models = all_keys.map do |key|
|
|
113
|
-
|
|
208
|
+
models_dev_model = find_models_dev_model(key, models_dev_by_key)
|
|
114
209
|
provider_model = provider_by_key[key]
|
|
115
210
|
|
|
116
|
-
if
|
|
117
|
-
add_provider_metadata(
|
|
118
|
-
elsif
|
|
119
|
-
|
|
211
|
+
if models_dev_model && provider_model
|
|
212
|
+
add_provider_metadata(models_dev_model, provider_model)
|
|
213
|
+
elsif models_dev_model
|
|
214
|
+
models_dev_model
|
|
120
215
|
else
|
|
121
216
|
provider_model
|
|
122
217
|
end
|
|
@@ -125,18 +220,33 @@ module RubyLLM
|
|
|
125
220
|
models.sort_by { |m| [m.provider, m.id] }
|
|
126
221
|
end
|
|
127
222
|
|
|
128
|
-
def
|
|
223
|
+
def find_models_dev_model(key, models_dev_by_key)
|
|
129
224
|
# Direct match
|
|
130
|
-
return
|
|
225
|
+
return models_dev_by_key[key] if models_dev_by_key[key]
|
|
131
226
|
|
|
132
|
-
# VertexAI uses same models as Gemini
|
|
133
227
|
provider, model_id = key.split(':', 2)
|
|
228
|
+
if provider == 'bedrock'
|
|
229
|
+
normalized_id = model_id.sub(/^[a-z]{2}\./, '')
|
|
230
|
+
context_override = nil
|
|
231
|
+
normalized_id = normalized_id.gsub(/:(\d+)k\b/) do
|
|
232
|
+
context_override = Regexp.last_match(1).to_i * 1000
|
|
233
|
+
''
|
|
234
|
+
end
|
|
235
|
+
bedrock_model = models_dev_by_key["bedrock:#{normalized_id}"]
|
|
236
|
+
if bedrock_model
|
|
237
|
+
data = bedrock_model.to_h.merge(id: model_id)
|
|
238
|
+
data[:context_window] = context_override if context_override
|
|
239
|
+
return Model::Info.new(data)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# VertexAI uses same models as Gemini
|
|
134
244
|
return unless provider == 'vertexai'
|
|
135
245
|
|
|
136
|
-
gemini_model =
|
|
246
|
+
gemini_model = models_dev_by_key["gemini:#{model_id}"]
|
|
137
247
|
return unless gemini_model
|
|
138
248
|
|
|
139
|
-
# Return Gemini's
|
|
249
|
+
# Return Gemini's models.dev data but with VertexAI as provider
|
|
140
250
|
Model::Info.new(gemini_model.to_h.merge(provider: 'vertexai'))
|
|
141
251
|
end
|
|
142
252
|
|
|
@@ -146,11 +256,130 @@ module RubyLLM
|
|
|
146
256
|
end
|
|
147
257
|
end
|
|
148
258
|
|
|
149
|
-
def add_provider_metadata(
|
|
150
|
-
data =
|
|
259
|
+
def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
|
|
260
|
+
data = models_dev_model.to_h
|
|
261
|
+
data[:name] = provider_model.name if blank_value?(data[:name])
|
|
262
|
+
data[:family] = provider_model.family if blank_value?(data[:family])
|
|
263
|
+
data[:created_at] = provider_model.created_at if blank_value?(data[:created_at])
|
|
264
|
+
data[:context_window] = provider_model.context_window if blank_value?(data[:context_window])
|
|
265
|
+
data[:max_output_tokens] = provider_model.max_output_tokens if blank_value?(data[:max_output_tokens])
|
|
266
|
+
data[:modalities] = provider_model.modalities.to_h if blank_value?(data[:modalities])
|
|
267
|
+
data[:pricing] = provider_model.pricing.to_h if blank_value?(data[:pricing])
|
|
151
268
|
data[:metadata] = provider_model.metadata.merge(data[:metadata] || {})
|
|
269
|
+
data[:capabilities] = (models_dev_model.capabilities + provider_model.capabilities).uniq
|
|
270
|
+
normalize_embedding_modalities(data)
|
|
152
271
|
Model::Info.new(data)
|
|
153
272
|
end
|
|
273
|
+
|
|
274
|
+
def normalize_embedding_modalities(data)
|
|
275
|
+
return unless data[:id].to_s.include?('embedding')
|
|
276
|
+
|
|
277
|
+
modalities = data[:modalities].to_h
|
|
278
|
+
modalities[:input] = ['text'] if modalities[:input].nil? || modalities[:input].empty?
|
|
279
|
+
modalities[:output] = ['embeddings']
|
|
280
|
+
data[:modalities] = modalities
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def blank_value?(value)
|
|
284
|
+
return true if value.nil?
|
|
285
|
+
return value.empty? if value.is_a?(String) || value.is_a?(Array)
|
|
286
|
+
|
|
287
|
+
if value.is_a?(Hash)
|
|
288
|
+
return true if value.empty?
|
|
289
|
+
|
|
290
|
+
return value.values.all? { |nested| blank_value?(nested) }
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
false
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def models_dev_model_to_info(model_data, provider_slug, provider_key)
|
|
297
|
+
modalities = normalize_models_dev_modalities(model_data[:modalities])
|
|
298
|
+
capabilities = models_dev_capabilities(model_data, modalities)
|
|
299
|
+
|
|
300
|
+
data = {
|
|
301
|
+
id: model_data[:id],
|
|
302
|
+
name: model_data[:name] || model_data[:id],
|
|
303
|
+
provider: provider_slug,
|
|
304
|
+
family: model_data[:family],
|
|
305
|
+
created_at: model_data[:release_date] || model_data[:last_updated],
|
|
306
|
+
context_window: model_data.dig(:limit, :context),
|
|
307
|
+
max_output_tokens: model_data.dig(:limit, :output),
|
|
308
|
+
knowledge_cutoff: normalize_models_dev_knowledge(model_data[:knowledge]),
|
|
309
|
+
modalities: modalities,
|
|
310
|
+
capabilities: capabilities,
|
|
311
|
+
pricing: models_dev_pricing(model_data[:cost]),
|
|
312
|
+
metadata: models_dev_metadata(model_data, provider_key)
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
normalize_embedding_modalities(data)
|
|
316
|
+
data
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def models_dev_capabilities(model_data, modalities)
|
|
320
|
+
capabilities = []
|
|
321
|
+
capabilities << 'function_calling' if model_data[:tool_call]
|
|
322
|
+
capabilities << 'structured_output' if model_data[:structured_output]
|
|
323
|
+
capabilities << 'reasoning' if model_data[:reasoning]
|
|
324
|
+
capabilities << 'vision' if modalities[:input].intersect?(%w[image video pdf])
|
|
325
|
+
capabilities.uniq
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def models_dev_pricing(cost)
|
|
329
|
+
return {} unless cost
|
|
330
|
+
|
|
331
|
+
text_standard = {
|
|
332
|
+
input_per_million: cost[:input],
|
|
333
|
+
output_per_million: cost[:output],
|
|
334
|
+
cached_input_per_million: cost[:cache_read],
|
|
335
|
+
reasoning_output_per_million: cost[:reasoning]
|
|
336
|
+
}.compact
|
|
337
|
+
|
|
338
|
+
audio_standard = {
|
|
339
|
+
input_per_million: cost[:input_audio],
|
|
340
|
+
output_per_million: cost[:output_audio]
|
|
341
|
+
}.compact
|
|
342
|
+
|
|
343
|
+
pricing = {}
|
|
344
|
+
pricing[:text_tokens] = { standard: text_standard } if text_standard.any?
|
|
345
|
+
pricing[:audio_tokens] = { standard: audio_standard } if audio_standard.any?
|
|
346
|
+
pricing
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def models_dev_metadata(model_data, provider_key)
|
|
350
|
+
metadata = {
|
|
351
|
+
source: 'models.dev',
|
|
352
|
+
provider_id: provider_key,
|
|
353
|
+
open_weights: model_data[:open_weights],
|
|
354
|
+
attachment: model_data[:attachment],
|
|
355
|
+
temperature: model_data[:temperature],
|
|
356
|
+
last_updated: model_data[:last_updated],
|
|
357
|
+
status: model_data[:status],
|
|
358
|
+
interleaved: model_data[:interleaved],
|
|
359
|
+
cost: model_data[:cost],
|
|
360
|
+
limit: model_data[:limit],
|
|
361
|
+
knowledge: model_data[:knowledge]
|
|
362
|
+
}
|
|
363
|
+
metadata.compact
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def normalize_models_dev_modalities(modalities)
|
|
367
|
+
normalized = { input: [], output: [] }
|
|
368
|
+
return normalized unless modalities
|
|
369
|
+
|
|
370
|
+
normalized[:input] = Array(modalities[:input]).compact
|
|
371
|
+
normalized[:output] = Array(modalities[:output]).compact
|
|
372
|
+
normalized
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def normalize_models_dev_knowledge(value)
|
|
376
|
+
return if value.nil?
|
|
377
|
+
return value if value.is_a?(Date)
|
|
378
|
+
|
|
379
|
+
Date.parse(value.to_s)
|
|
380
|
+
rescue ArgumentError
|
|
381
|
+
nil
|
|
382
|
+
end
|
|
154
383
|
end
|
|
155
384
|
|
|
156
385
|
def initialize(models = nil)
|
|
@@ -217,11 +446,26 @@ module RubyLLM
|
|
|
217
446
|
|
|
218
447
|
def find_with_provider(model_id, provider)
|
|
219
448
|
resolved_id = Aliases.resolve(model_id, provider)
|
|
449
|
+
resolved_id = resolve_bedrock_region_id(resolved_id) if provider.to_s == 'bedrock'
|
|
220
450
|
all.find { |m| m.id == model_id && m.provider == provider.to_s } ||
|
|
221
451
|
all.find { |m| m.id == resolved_id && m.provider == provider.to_s } ||
|
|
222
452
|
raise(ModelNotFoundError, "Unknown model: #{model_id} for provider: #{provider}")
|
|
223
453
|
end
|
|
224
454
|
|
|
455
|
+
def resolve_bedrock_region_id(model_id)
|
|
456
|
+
region = RubyLLM.config.bedrock_region.to_s
|
|
457
|
+
return model_id if region.empty?
|
|
458
|
+
|
|
459
|
+
candidate_id = Providers::Bedrock::Models.with_region_prefix(model_id, region)
|
|
460
|
+
return model_id if candidate_id == model_id
|
|
461
|
+
|
|
462
|
+
candidate = all.find { |m| m.provider == 'bedrock' && m.id == candidate_id }
|
|
463
|
+
return model_id unless candidate
|
|
464
|
+
|
|
465
|
+
inference_types = Array(candidate.metadata[:inference_types] || candidate.metadata['inference_types'])
|
|
466
|
+
Providers::Bedrock::Models.normalize_inference_profile_id(model_id, inference_types, region)
|
|
467
|
+
end
|
|
468
|
+
|
|
225
469
|
def find_without_provider(model_id)
|
|
226
470
|
all.find { |m| m.id == model_id } ||
|
|
227
471
|
all.find { |m| m.id == Aliases.resolve(model_id) } ||
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"type": "array",
|
|
56
56
|
"items": {
|
|
57
57
|
"type": "string",
|
|
58
|
-
"enum": ["text", "image", "audio", "embeddings", "moderation"]
|
|
58
|
+
"enum": ["text", "image", "audio", "video", "embeddings", "moderation"]
|
|
59
59
|
},
|
|
60
60
|
"uniqueItems": true,
|
|
61
61
|
"description": "Supported output modalities"
|
|
@@ -165,4 +165,4 @@
|
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
167
|
}
|
|
168
|
-
}
|
|
168
|
+
}
|
data/lib/ruby_llm/provider.rb
CHANGED
|
@@ -37,7 +37,7 @@ module RubyLLM
|
|
|
37
37
|
self.class.configuration_requirements
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
|
|
40
|
+
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
|
|
41
41
|
normalized_temperature = maybe_normalize_temperature(temperature, model)
|
|
42
42
|
|
|
43
43
|
payload = Utils.deep_merge(
|
|
@@ -47,7 +47,8 @@ module RubyLLM
|
|
|
47
47
|
temperature: normalized_temperature,
|
|
48
48
|
model: model,
|
|
49
49
|
stream: block_given?,
|
|
50
|
-
schema: schema
|
|
50
|
+
schema: schema,
|
|
51
|
+
thinking: thinking
|
|
51
52
|
),
|
|
52
53
|
params
|
|
53
54
|
)
|
|
@@ -144,7 +145,7 @@ module RubyLLM
|
|
|
144
145
|
end
|
|
145
146
|
|
|
146
147
|
def capabilities
|
|
147
|
-
|
|
148
|
+
nil
|
|
148
149
|
end
|
|
149
150
|
|
|
150
151
|
def configuration_requirements
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Providers
|
|
5
5
|
class Anthropic
|
|
6
|
-
# Chat methods
|
|
6
|
+
# Chat methods for the Anthropic API implementation
|
|
7
7
|
module Chat
|
|
8
8
|
module_function
|
|
9
9
|
|
|
@@ -11,11 +11,11 @@ module RubyLLM
|
|
|
11
11
|
'/v1/messages'
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
15
15
|
system_messages, chat_messages = separate_messages(messages)
|
|
16
16
|
system_content = build_system_content(system_messages)
|
|
17
17
|
|
|
18
|
-
build_base_payload(chat_messages, model, stream).tap do |payload|
|
|
18
|
+
build_base_payload(chat_messages, model, stream, thinking).tap do |payload|
|
|
19
19
|
add_optional_fields(payload, system_content:, tools:, temperature:)
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -45,13 +45,18 @@ module RubyLLM
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
def build_base_payload(chat_messages, model, stream)
|
|
49
|
-
{
|
|
48
|
+
def build_base_payload(chat_messages, model, stream, thinking)
|
|
49
|
+
payload = {
|
|
50
50
|
model: model.id,
|
|
51
|
-
messages: chat_messages.map { |msg| format_message(msg) },
|
|
51
|
+
messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
|
|
52
52
|
stream: stream,
|
|
53
53
|
max_tokens: model.max_tokens || 4096
|
|
54
54
|
}
|
|
55
|
+
|
|
56
|
+
thinking_payload = build_thinking_payload(thinking)
|
|
57
|
+
payload[:thinking] = thinking_payload if thinking_payload
|
|
58
|
+
|
|
59
|
+
payload
|
|
55
60
|
end
|
|
56
61
|
|
|
57
62
|
def add_optional_fields(payload, system_content:, tools:, temperature:)
|
|
@@ -65,9 +70,11 @@ module RubyLLM
|
|
|
65
70
|
content_blocks = data['content'] || []
|
|
66
71
|
|
|
67
72
|
text_content = extract_text_content(content_blocks)
|
|
73
|
+
thinking_content = extract_thinking_content(content_blocks)
|
|
74
|
+
thinking_signature = extract_thinking_signature(content_blocks)
|
|
68
75
|
tool_use_blocks = Tools.find_tool_uses(content_blocks)
|
|
69
76
|
|
|
70
|
-
build_message(data, text_content, tool_use_blocks, response)
|
|
77
|
+
build_message(data, text_content, thinking_content, thinking_signature, tool_use_blocks, response)
|
|
71
78
|
end
|
|
72
79
|
|
|
73
80
|
def extract_text_content(blocks)
|
|
@@ -75,50 +82,158 @@ module RubyLLM
|
|
|
75
82
|
text_blocks.map { |c| c['text'] }.join
|
|
76
83
|
end
|
|
77
84
|
|
|
78
|
-
def
|
|
85
|
+
def extract_thinking_content(blocks)
|
|
86
|
+
thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
|
|
87
|
+
thoughts = thinking_blocks.map { |c| c['thinking'] || c['text'] }.join
|
|
88
|
+
thoughts.empty? ? nil : thoughts
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def extract_thinking_signature(blocks)
|
|
92
|
+
thinking_block = blocks.find { |c| c['type'] == 'thinking' } ||
|
|
93
|
+
blocks.find { |c| c['type'] == 'redacted_thinking' }
|
|
94
|
+
thinking_block&.dig('signature') || thinking_block&.dig('data')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def build_message(data, content, thinking, thinking_signature, tool_use_blocks, response) # rubocop:disable Metrics/ParameterLists
|
|
79
98
|
usage = data['usage'] || {}
|
|
80
99
|
cached_tokens = usage['cache_read_input_tokens']
|
|
81
100
|
cache_creation_tokens = usage['cache_creation_input_tokens']
|
|
82
101
|
if cache_creation_tokens.nil? && usage['cache_creation'].is_a?(Hash)
|
|
83
102
|
cache_creation_tokens = usage['cache_creation'].values.compact.sum
|
|
84
103
|
end
|
|
104
|
+
thinking_tokens = usage.dig('output_tokens_details', 'thinking_tokens') ||
|
|
105
|
+
usage.dig('output_tokens_details', 'reasoning_tokens') ||
|
|
106
|
+
usage['thinking_tokens'] ||
|
|
107
|
+
usage['reasoning_tokens']
|
|
85
108
|
|
|
86
109
|
Message.new(
|
|
87
110
|
role: :assistant,
|
|
88
111
|
content: content,
|
|
112
|
+
thinking: Thinking.build(text: thinking, signature: thinking_signature),
|
|
89
113
|
tool_calls: Tools.parse_tool_calls(tool_use_blocks),
|
|
90
114
|
input_tokens: usage['input_tokens'],
|
|
91
115
|
output_tokens: usage['output_tokens'],
|
|
92
116
|
cached_tokens: cached_tokens,
|
|
93
117
|
cache_creation_tokens: cache_creation_tokens,
|
|
118
|
+
thinking_tokens: thinking_tokens,
|
|
94
119
|
model_id: data['model'],
|
|
95
120
|
raw: response
|
|
96
121
|
)
|
|
97
122
|
end
|
|
98
123
|
|
|
99
|
-
def format_message(msg)
|
|
124
|
+
def format_message(msg, thinking: nil)
|
|
125
|
+
thinking_enabled = thinking&.enabled?
|
|
126
|
+
|
|
100
127
|
if msg.tool_call?
|
|
101
|
-
|
|
128
|
+
format_tool_call_with_thinking(msg, thinking_enabled)
|
|
102
129
|
elsif msg.tool_result?
|
|
103
130
|
Tools.format_tool_result(msg)
|
|
104
131
|
else
|
|
105
|
-
|
|
132
|
+
format_basic_message_with_thinking(msg, thinking_enabled)
|
|
106
133
|
end
|
|
107
134
|
end
|
|
108
135
|
|
|
109
|
-
def
|
|
136
|
+
def format_basic_message_with_thinking(msg, thinking_enabled)
|
|
137
|
+
content_blocks = []
|
|
138
|
+
|
|
139
|
+
if msg.role == :assistant && thinking_enabled
|
|
140
|
+
thinking_block = build_thinking_block(msg.thinking)
|
|
141
|
+
content_blocks << thinking_block if thinking_block
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
append_formatted_content(content_blocks, msg.content)
|
|
145
|
+
|
|
110
146
|
{
|
|
111
147
|
role: convert_role(msg.role),
|
|
112
|
-
content:
|
|
148
|
+
content: content_blocks
|
|
113
149
|
}
|
|
114
150
|
end
|
|
115
151
|
|
|
152
|
+
def format_tool_call_with_thinking(msg, thinking_enabled)
|
|
153
|
+
if msg.content.is_a?(RubyLLM::Content::Raw)
|
|
154
|
+
content_blocks = msg.content.value
|
|
155
|
+
content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
|
|
156
|
+
content_blocks = prepend_thinking_block(content_blocks, msg, thinking_enabled)
|
|
157
|
+
|
|
158
|
+
return { role: 'assistant', content: content_blocks }
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
content_blocks = prepend_thinking_block([], msg, thinking_enabled)
|
|
162
|
+
content_blocks << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
|
|
163
|
+
|
|
164
|
+
msg.tool_calls.each_value do |tool_call|
|
|
165
|
+
content_blocks << {
|
|
166
|
+
type: 'tool_use',
|
|
167
|
+
id: tool_call.id,
|
|
168
|
+
name: tool_call.name,
|
|
169
|
+
input: tool_call.arguments
|
|
170
|
+
}
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
{
|
|
174
|
+
role: 'assistant',
|
|
175
|
+
content: content_blocks
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def prepend_thinking_block(content_blocks, msg, thinking_enabled)
|
|
180
|
+
return content_blocks unless thinking_enabled
|
|
181
|
+
|
|
182
|
+
thinking_block = build_thinking_block(msg.thinking)
|
|
183
|
+
content_blocks.unshift(thinking_block) if thinking_block
|
|
184
|
+
|
|
185
|
+
content_blocks
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def build_thinking_block(thinking)
|
|
189
|
+
return nil unless thinking
|
|
190
|
+
|
|
191
|
+
if thinking.text
|
|
192
|
+
{
|
|
193
|
+
type: 'thinking',
|
|
194
|
+
thinking: thinking.text,
|
|
195
|
+
signature: thinking.signature
|
|
196
|
+
}.compact
|
|
197
|
+
elsif thinking.signature
|
|
198
|
+
{
|
|
199
|
+
type: 'redacted_thinking',
|
|
200
|
+
data: thinking.signature
|
|
201
|
+
}
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def append_formatted_content(content_blocks, content)
|
|
206
|
+
formatted_content = Media.format_content(content)
|
|
207
|
+
if formatted_content.is_a?(Array)
|
|
208
|
+
content_blocks.concat(formatted_content)
|
|
209
|
+
else
|
|
210
|
+
content_blocks << formatted_content
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
116
214
|
def convert_role(role)
|
|
117
215
|
case role
|
|
118
216
|
when :tool, :user then 'user'
|
|
119
217
|
else 'assistant'
|
|
120
218
|
end
|
|
121
219
|
end
|
|
220
|
+
|
|
221
|
+
def build_thinking_payload(thinking)
|
|
222
|
+
return nil unless thinking&.enabled?
|
|
223
|
+
|
|
224
|
+
budget = resolve_budget(thinking)
|
|
225
|
+
raise ArgumentError, 'Anthropic thinking requires a budget' if budget.nil?
|
|
226
|
+
|
|
227
|
+
{
|
|
228
|
+
type: 'enabled',
|
|
229
|
+
budget_tokens: budget
|
|
230
|
+
}
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def resolve_budget(thinking)
|
|
234
|
+
budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
|
|
235
|
+
budget.is_a?(Integer) ? budget : nil
|
|
236
|
+
end
|
|
122
237
|
end
|
|
123
238
|
end
|
|
124
239
|
end
|
|
@@ -12,10 +12,16 @@ module RubyLLM
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def build_chunk(data)
|
|
15
|
+
delta_type = data.dig('delta', 'type')
|
|
16
|
+
|
|
15
17
|
Chunk.new(
|
|
16
18
|
role: :assistant,
|
|
17
19
|
model_id: extract_model_id(data),
|
|
18
|
-
content: data
|
|
20
|
+
content: extract_content_delta(data, delta_type),
|
|
21
|
+
thinking: Thinking.build(
|
|
22
|
+
text: extract_thinking_delta(data, delta_type),
|
|
23
|
+
signature: extract_signature_delta(data, delta_type)
|
|
24
|
+
),
|
|
19
25
|
input_tokens: extract_input_tokens(data),
|
|
20
26
|
output_tokens: extract_output_tokens(data),
|
|
21
27
|
cached_tokens: extract_cached_tokens(data),
|
|
@@ -24,6 +30,24 @@ module RubyLLM
|
|
|
24
30
|
)
|
|
25
31
|
end
|
|
26
32
|
|
|
33
|
+
def extract_content_delta(data, delta_type)
|
|
34
|
+
return data.dig('delta', 'text') if delta_type == 'text_delta'
|
|
35
|
+
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def extract_thinking_delta(data, delta_type)
|
|
40
|
+
return data.dig('delta', 'thinking') if delta_type == 'thinking_delta'
|
|
41
|
+
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def extract_signature_delta(data, delta_type)
|
|
46
|
+
return data.dig('delta', 'signature') if delta_type == 'signature_delta'
|
|
47
|
+
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
27
51
|
def json_delta?(data)
|
|
28
52
|
data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
|
|
29
53
|
end
|