ruby_llm 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -2
  3. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  4. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
  5. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  6. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  7. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
  8. data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
  9. data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
  10. data/lib/ruby_llm/active_record/message_methods.rb +41 -8
  11. data/lib/ruby_llm/aliases.json +101 -21
  12. data/lib/ruby_llm/chat.rb +10 -7
  13. data/lib/ruby_llm/configuration.rb +1 -1
  14. data/lib/ruby_llm/message.rb +37 -11
  15. data/lib/ruby_llm/models.json +21119 -10230
  16. data/lib/ruby_llm/models.rb +271 -27
  17. data/lib/ruby_llm/models_schema.json +2 -2
  18. data/lib/ruby_llm/provider.rb +4 -3
  19. data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
  20. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  21. data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
  22. data/lib/ruby_llm/providers/bedrock/models.rb +21 -15
  23. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
  24. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
  25. data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
  26. data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
  27. data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
  28. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  29. data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
  30. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  31. data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
  32. data/lib/ruby_llm/providers/openai/chat.rb +87 -3
  33. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  34. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  35. data/lib/ruby_llm/providers/openai.rb +1 -1
  36. data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
  37. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  38. data/lib/ruby_llm/providers/openrouter.rb +2 -0
  39. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  40. data/lib/ruby_llm/stream_accumulator.rb +111 -14
  41. data/lib/ruby_llm/streaming.rb +76 -54
  42. data/lib/ruby_llm/thinking.rb +49 -0
  43. data/lib/ruby_llm/tokens.rb +47 -0
  44. data/lib/ruby_llm/tool.rb +1 -1
  45. data/lib/ruby_llm/tool_call.rb +6 -3
  46. data/lib/ruby_llm/version.rb +1 -1
  47. data/lib/tasks/models.rake +20 -13
  48. metadata +12 -5
@@ -5,6 +5,18 @@ module RubyLLM
5
5
  class Models
6
6
  include Enumerable
7
7
 
8
+ MODELS_DEV_PROVIDER_MAP = {
9
+ 'openai' => 'openai',
10
+ 'anthropic' => 'anthropic',
11
+ 'google' => 'gemini',
12
+ 'google-vertex' => 'vertexai',
13
+ 'amazon-bedrock' => 'bedrock',
14
+ 'deepseek' => 'deepseek',
15
+ 'mistral' => 'mistral',
16
+ 'openrouter' => 'openrouter',
17
+ 'perplexity' => 'perplexity'
18
+ }.freeze
19
+
8
20
  class << self
9
21
  def instance
10
22
  @instance ||= new
@@ -26,24 +38,53 @@ module RubyLLM
26
38
  end
27
39
 
28
40
  def refresh!(remote_only: false)
29
- provider_models = fetch_from_providers(remote_only: remote_only)
30
- parsera_models = fetch_from_parsera
31
- merged_models = merge_models(provider_models, parsera_models)
41
+ existing_models = load_existing_models
42
+
43
+ provider_fetch = fetch_provider_models(remote_only: remote_only)
44
+ log_provider_fetch(provider_fetch)
45
+
46
+ models_dev_fetch = fetch_models_dev_models(existing_models)
47
+ log_models_dev_fetch(models_dev_fetch)
48
+
49
+ merged_models = merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
32
50
  @instance = new(merged_models)
33
51
  end
34
52
 
35
- def fetch_from_providers(remote_only: true)
53
+ def fetch_provider_models(remote_only: true) # rubocop:disable Metrics/PerceivedComplexity
36
54
  config = RubyLLM.config
55
+ provider_classes = remote_only ? Provider.remote_providers.values : Provider.providers.values
37
56
  configured_classes = if remote_only
38
57
  Provider.configured_remote_providers(config)
39
58
  else
40
59
  Provider.configured_providers(config)
41
60
  end
42
- configured = configured_classes.map { |klass| klass.new(config) }
61
+ configured = configured_classes.select { |klass| provider_classes.include?(klass) }
62
+ result = {
63
+ models: [],
64
+ fetched_providers: [],
65
+ configured_names: configured.map(&:name),
66
+ failed: []
67
+ }
68
+
69
+ provider_classes.each do |provider_class|
70
+ next if remote_only && provider_class.local?
71
+ next unless provider_class.configured?(config)
72
+
73
+ begin
74
+ result[:models].concat(provider_class.new(config).list_models)
75
+ result[:fetched_providers] << provider_class.slug
76
+ rescue StandardError => e
77
+ result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
78
+ end
79
+ end
43
80
 
44
- RubyLLM.logger.info "Fetching models from providers: #{configured.map(&:name).join(', ')}"
81
+ result[:fetched_providers].uniq!
82
+ result
83
+ end
45
84
 
46
- configured.flat_map(&:list_models)
85
+ # Backwards-compatible wrapper used by specs.
86
+ def fetch_from_providers(remote_only: true)
87
+ fetch_provider_models(remote_only: remote_only)[:models]
47
88
  end
48
89
 
49
90
  def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
@@ -91,32 +132,86 @@ module RubyLLM
91
132
  instance.respond_to?(method, include_private) || super
92
133
  end
93
134
 
94
- def fetch_from_parsera
95
- RubyLLM.logger.info 'Fetching models from Parsera API...'
135
+ def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
136
+ RubyLLM.logger.info 'Fetching models from models.dev API...'
96
137
 
97
138
  connection = Connection.basic do |f|
98
139
  f.request :json
99
140
  f.response :json, parser_options: { symbolize_names: true }
100
141
  end
101
- response = connection.get 'https://api.parsera.org/v1/llm-specs'
102
- models = response.body.map { |data| Model::Info.new(data) }
103
- models.reject { |model| model.provider.nil? || model.id.nil? }
142
+ response = connection.get 'https://models.dev/api.json'
143
+ providers = response.body || {}
144
+
145
+ models = providers.flat_map do |provider_key, provider_data|
146
+ provider_slug = MODELS_DEV_PROVIDER_MAP[provider_key.to_s]
147
+ next [] unless provider_slug
148
+
149
+ (provider_data[:models] || {}).values.map do |model_data|
150
+ Model::Info.new(models_dev_model_to_info(model_data, provider_slug, provider_key.to_s))
151
+ end
152
+ end
153
+ { models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
154
+ rescue StandardError => e
155
+ RubyLLM.logger.warn("Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing.")
156
+ {
157
+ models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
158
+ fetched: false
159
+ }
160
+ end
161
+
162
+ def load_existing_models
163
+ existing_models = instance&.all
164
+ existing_models = read_from_json if existing_models.nil? || existing_models.empty?
165
+ existing_models
104
166
  end
105
167
 
106
- def merge_models(provider_models, parsera_models)
107
- parsera_by_key = index_by_key(parsera_models)
168
+ def log_provider_fetch(provider_fetch)
169
+ RubyLLM.logger.info "Fetching models from providers: #{provider_fetch[:configured_names].join(', ')}"
170
+ provider_fetch[:failed].each do |failure|
171
+ RubyLLM.logger.warn(
172
+ "Failed to fetch #{failure[:name]} models (#{failure[:error].class}: #{failure[:error].message}). " \
173
+ 'Keeping existing.'
174
+ )
175
+ end
176
+ end
177
+
178
+ def log_models_dev_fetch(models_dev_fetch)
179
+ return if models_dev_fetch[:fetched]
180
+
181
+ RubyLLM.logger.warn('Using cached models.dev data due to fetch failure.')
182
+ end
183
+
184
+ def merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
185
+ existing_by_provider = existing_models.group_by(&:provider)
186
+ preserved_models = existing_by_provider
187
+ .except(*provider_fetch[:fetched_providers])
188
+ .values
189
+ .flatten
190
+
191
+ provider_models = provider_fetch[:models] + preserved_models
192
+ models_dev_models = if models_dev_fetch[:fetched]
193
+ models_dev_fetch[:models]
194
+ else
195
+ existing_models.select { |model| model.metadata[:source] == 'models.dev' }
196
+ end
197
+
198
+ merge_models(provider_models, models_dev_models)
199
+ end
200
+
201
+ def merge_models(provider_models, models_dev_models)
202
+ models_dev_by_key = index_by_key(models_dev_models)
108
203
  provider_by_key = index_by_key(provider_models)
109
204
 
110
- all_keys = parsera_by_key.keys | provider_by_key.keys
205
+ all_keys = models_dev_by_key.keys | provider_by_key.keys
111
206
 
112
207
  models = all_keys.map do |key|
113
- parsera_model = find_parsera_model(key, parsera_by_key)
208
+ models_dev_model = find_models_dev_model(key, models_dev_by_key)
114
209
  provider_model = provider_by_key[key]
115
210
 
116
- if parsera_model && provider_model
117
- add_provider_metadata(parsera_model, provider_model)
118
- elsif parsera_model
119
- parsera_model
211
+ if models_dev_model && provider_model
212
+ add_provider_metadata(models_dev_model, provider_model)
213
+ elsif models_dev_model
214
+ models_dev_model
120
215
  else
121
216
  provider_model
122
217
  end
@@ -125,18 +220,33 @@ module RubyLLM
125
220
  models.sort_by { |m| [m.provider, m.id] }
126
221
  end
127
222
 
128
- def find_parsera_model(key, parsera_by_key)
223
+ def find_models_dev_model(key, models_dev_by_key)
129
224
  # Direct match
130
- return parsera_by_key[key] if parsera_by_key[key]
225
+ return models_dev_by_key[key] if models_dev_by_key[key]
131
226
 
132
- # VertexAI uses same models as Gemini
133
227
  provider, model_id = key.split(':', 2)
228
+ if provider == 'bedrock'
229
+ normalized_id = model_id.sub(/^[a-z]{2}\./, '')
230
+ context_override = nil
231
+ normalized_id = normalized_id.gsub(/:(\d+)k\b/) do
232
+ context_override = Regexp.last_match(1).to_i * 1000
233
+ ''
234
+ end
235
+ bedrock_model = models_dev_by_key["bedrock:#{normalized_id}"]
236
+ if bedrock_model
237
+ data = bedrock_model.to_h.merge(id: model_id)
238
+ data[:context_window] = context_override if context_override
239
+ return Model::Info.new(data)
240
+ end
241
+ end
242
+
243
+ # VertexAI uses same models as Gemini
134
244
  return unless provider == 'vertexai'
135
245
 
136
- gemini_model = parsera_by_key["gemini:#{model_id}"]
246
+ gemini_model = models_dev_by_key["gemini:#{model_id}"]
137
247
  return unless gemini_model
138
248
 
139
- # Return Gemini's Parsera data but with VertexAI as provider
249
+ # Return Gemini's models.dev data but with VertexAI as provider
140
250
  Model::Info.new(gemini_model.to_h.merge(provider: 'vertexai'))
141
251
  end
142
252
 
@@ -146,11 +256,130 @@ module RubyLLM
146
256
  end
147
257
  end
148
258
 
149
- def add_provider_metadata(parsera_model, provider_model)
150
- data = parsera_model.to_h
259
+ def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
260
+ data = models_dev_model.to_h
261
+ data[:name] = provider_model.name if blank_value?(data[:name])
262
+ data[:family] = provider_model.family if blank_value?(data[:family])
263
+ data[:created_at] = provider_model.created_at if blank_value?(data[:created_at])
264
+ data[:context_window] = provider_model.context_window if blank_value?(data[:context_window])
265
+ data[:max_output_tokens] = provider_model.max_output_tokens if blank_value?(data[:max_output_tokens])
266
+ data[:modalities] = provider_model.modalities.to_h if blank_value?(data[:modalities])
267
+ data[:pricing] = provider_model.pricing.to_h if blank_value?(data[:pricing])
151
268
  data[:metadata] = provider_model.metadata.merge(data[:metadata] || {})
269
+ data[:capabilities] = (models_dev_model.capabilities + provider_model.capabilities).uniq
270
+ normalize_embedding_modalities(data)
152
271
  Model::Info.new(data)
153
272
  end
273
+
274
+ def normalize_embedding_modalities(data)
275
+ return unless data[:id].to_s.include?('embedding')
276
+
277
+ modalities = data[:modalities].to_h
278
+ modalities[:input] = ['text'] if modalities[:input].nil? || modalities[:input].empty?
279
+ modalities[:output] = ['embeddings']
280
+ data[:modalities] = modalities
281
+ end
282
+
283
+ def blank_value?(value)
284
+ return true if value.nil?
285
+ return value.empty? if value.is_a?(String) || value.is_a?(Array)
286
+
287
+ if value.is_a?(Hash)
288
+ return true if value.empty?
289
+
290
+ return value.values.all? { |nested| blank_value?(nested) }
291
+ end
292
+
293
+ false
294
+ end
295
+
296
+ def models_dev_model_to_info(model_data, provider_slug, provider_key)
297
+ modalities = normalize_models_dev_modalities(model_data[:modalities])
298
+ capabilities = models_dev_capabilities(model_data, modalities)
299
+
300
+ data = {
301
+ id: model_data[:id],
302
+ name: model_data[:name] || model_data[:id],
303
+ provider: provider_slug,
304
+ family: model_data[:family],
305
+ created_at: model_data[:release_date] || model_data[:last_updated],
306
+ context_window: model_data.dig(:limit, :context),
307
+ max_output_tokens: model_data.dig(:limit, :output),
308
+ knowledge_cutoff: normalize_models_dev_knowledge(model_data[:knowledge]),
309
+ modalities: modalities,
310
+ capabilities: capabilities,
311
+ pricing: models_dev_pricing(model_data[:cost]),
312
+ metadata: models_dev_metadata(model_data, provider_key)
313
+ }
314
+
315
+ normalize_embedding_modalities(data)
316
+ data
317
+ end
318
+
319
+ def models_dev_capabilities(model_data, modalities)
320
+ capabilities = []
321
+ capabilities << 'function_calling' if model_data[:tool_call]
322
+ capabilities << 'structured_output' if model_data[:structured_output]
323
+ capabilities << 'reasoning' if model_data[:reasoning]
324
+ capabilities << 'vision' if modalities[:input].intersect?(%w[image video pdf])
325
+ capabilities.uniq
326
+ end
327
+
328
+ def models_dev_pricing(cost)
329
+ return {} unless cost
330
+
331
+ text_standard = {
332
+ input_per_million: cost[:input],
333
+ output_per_million: cost[:output],
334
+ cached_input_per_million: cost[:cache_read],
335
+ reasoning_output_per_million: cost[:reasoning]
336
+ }.compact
337
+
338
+ audio_standard = {
339
+ input_per_million: cost[:input_audio],
340
+ output_per_million: cost[:output_audio]
341
+ }.compact
342
+
343
+ pricing = {}
344
+ pricing[:text_tokens] = { standard: text_standard } if text_standard.any?
345
+ pricing[:audio_tokens] = { standard: audio_standard } if audio_standard.any?
346
+ pricing
347
+ end
348
+
349
+ def models_dev_metadata(model_data, provider_key)
350
+ metadata = {
351
+ source: 'models.dev',
352
+ provider_id: provider_key,
353
+ open_weights: model_data[:open_weights],
354
+ attachment: model_data[:attachment],
355
+ temperature: model_data[:temperature],
356
+ last_updated: model_data[:last_updated],
357
+ status: model_data[:status],
358
+ interleaved: model_data[:interleaved],
359
+ cost: model_data[:cost],
360
+ limit: model_data[:limit],
361
+ knowledge: model_data[:knowledge]
362
+ }
363
+ metadata.compact
364
+ end
365
+
366
+ def normalize_models_dev_modalities(modalities)
367
+ normalized = { input: [], output: [] }
368
+ return normalized unless modalities
369
+
370
+ normalized[:input] = Array(modalities[:input]).compact
371
+ normalized[:output] = Array(modalities[:output]).compact
372
+ normalized
373
+ end
374
+
375
+ def normalize_models_dev_knowledge(value)
376
+ return if value.nil?
377
+ return value if value.is_a?(Date)
378
+
379
+ Date.parse(value.to_s)
380
+ rescue ArgumentError
381
+ nil
382
+ end
154
383
  end
155
384
 
156
385
  def initialize(models = nil)
@@ -217,11 +446,26 @@ module RubyLLM
217
446
 
218
447
  def find_with_provider(model_id, provider)
219
448
  resolved_id = Aliases.resolve(model_id, provider)
449
+ resolved_id = resolve_bedrock_region_id(resolved_id) if provider.to_s == 'bedrock'
220
450
  all.find { |m| m.id == model_id && m.provider == provider.to_s } ||
221
451
  all.find { |m| m.id == resolved_id && m.provider == provider.to_s } ||
222
452
  raise(ModelNotFoundError, "Unknown model: #{model_id} for provider: #{provider}")
223
453
  end
224
454
 
455
+ def resolve_bedrock_region_id(model_id)
456
+ region = RubyLLM.config.bedrock_region.to_s
457
+ return model_id if region.empty?
458
+
459
+ candidate_id = Providers::Bedrock::Models.with_region_prefix(model_id, region)
460
+ return model_id if candidate_id == model_id
461
+
462
+ candidate = all.find { |m| m.provider == 'bedrock' && m.id == candidate_id }
463
+ return model_id unless candidate
464
+
465
+ inference_types = Array(candidate.metadata[:inference_types] || candidate.metadata['inference_types'])
466
+ Providers::Bedrock::Models.normalize_inference_profile_id(model_id, inference_types, region)
467
+ end
468
+
225
469
  def find_without_provider(model_id)
226
470
  all.find { |m| m.id == model_id } ||
227
471
  all.find { |m| m.id == Aliases.resolve(model_id) } ||
@@ -55,7 +55,7 @@
55
55
  "type": "array",
56
56
  "items": {
57
57
  "type": "string",
58
- "enum": ["text", "image", "audio", "embeddings", "moderation"]
58
+ "enum": ["text", "image", "audio", "video", "embeddings", "moderation"]
59
59
  },
60
60
  "uniqueItems": true,
61
61
  "description": "Supported output modalities"
@@ -165,4 +165,4 @@
165
165
  }
166
166
  }
167
167
  }
168
- }
168
+ }
@@ -37,7 +37,7 @@ module RubyLLM
37
37
  self.class.configuration_requirements
38
38
  end
39
39
 
40
- def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
40
+ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
41
41
  normalized_temperature = maybe_normalize_temperature(temperature, model)
42
42
 
43
43
  payload = Utils.deep_merge(
@@ -47,7 +47,8 @@ module RubyLLM
47
47
  temperature: normalized_temperature,
48
48
  model: model,
49
49
  stream: block_given?,
50
- schema: schema
50
+ schema: schema,
51
+ thinking: thinking
51
52
  ),
52
53
  params
53
54
  )
@@ -144,7 +145,7 @@ module RubyLLM
144
145
  end
145
146
 
146
147
  def capabilities
147
- raise NotImplementedError
148
+ nil
148
149
  end
149
150
 
150
151
  def configuration_requirements
@@ -3,7 +3,7 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class Anthropic
6
- # Chat methods of the OpenAI API integration
6
+ # Chat methods for the Anthropic API implementation
7
7
  module Chat
8
8
  module_function
9
9
 
@@ -11,11 +11,11 @@ module RubyLLM
11
11
  '/v1/messages'
12
12
  end
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
14
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
15
15
  system_messages, chat_messages = separate_messages(messages)
16
16
  system_content = build_system_content(system_messages)
17
17
 
18
- build_base_payload(chat_messages, model, stream).tap do |payload|
18
+ build_base_payload(chat_messages, model, stream, thinking).tap do |payload|
19
19
  add_optional_fields(payload, system_content:, tools:, temperature:)
20
20
  end
21
21
  end
@@ -45,13 +45,18 @@ module RubyLLM
45
45
  end
46
46
  end
47
47
 
48
- def build_base_payload(chat_messages, model, stream)
49
- {
48
+ def build_base_payload(chat_messages, model, stream, thinking)
49
+ payload = {
50
50
  model: model.id,
51
- messages: chat_messages.map { |msg| format_message(msg) },
51
+ messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
52
52
  stream: stream,
53
53
  max_tokens: model.max_tokens || 4096
54
54
  }
55
+
56
+ thinking_payload = build_thinking_payload(thinking)
57
+ payload[:thinking] = thinking_payload if thinking_payload
58
+
59
+ payload
55
60
  end
56
61
 
57
62
  def add_optional_fields(payload, system_content:, tools:, temperature:)
@@ -65,9 +70,11 @@ module RubyLLM
65
70
  content_blocks = data['content'] || []
66
71
 
67
72
  text_content = extract_text_content(content_blocks)
73
+ thinking_content = extract_thinking_content(content_blocks)
74
+ thinking_signature = extract_thinking_signature(content_blocks)
68
75
  tool_use_blocks = Tools.find_tool_uses(content_blocks)
69
76
 
70
- build_message(data, text_content, tool_use_blocks, response)
77
+ build_message(data, text_content, thinking_content, thinking_signature, tool_use_blocks, response)
71
78
  end
72
79
 
73
80
  def extract_text_content(blocks)
@@ -75,50 +82,158 @@ module RubyLLM
75
82
  text_blocks.map { |c| c['text'] }.join
76
83
  end
77
84
 
78
- def build_message(data, content, tool_use_blocks, response)
85
+ def extract_thinking_content(blocks)
86
+ thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
87
+ thoughts = thinking_blocks.map { |c| c['thinking'] || c['text'] }.join
88
+ thoughts.empty? ? nil : thoughts
89
+ end
90
+
91
+ def extract_thinking_signature(blocks)
92
+ thinking_block = blocks.find { |c| c['type'] == 'thinking' } ||
93
+ blocks.find { |c| c['type'] == 'redacted_thinking' }
94
+ thinking_block&.dig('signature') || thinking_block&.dig('data')
95
+ end
96
+
97
+ def build_message(data, content, thinking, thinking_signature, tool_use_blocks, response) # rubocop:disable Metrics/ParameterLists
79
98
  usage = data['usage'] || {}
80
99
  cached_tokens = usage['cache_read_input_tokens']
81
100
  cache_creation_tokens = usage['cache_creation_input_tokens']
82
101
  if cache_creation_tokens.nil? && usage['cache_creation'].is_a?(Hash)
83
102
  cache_creation_tokens = usage['cache_creation'].values.compact.sum
84
103
  end
104
+ thinking_tokens = usage.dig('output_tokens_details', 'thinking_tokens') ||
105
+ usage.dig('output_tokens_details', 'reasoning_tokens') ||
106
+ usage['thinking_tokens'] ||
107
+ usage['reasoning_tokens']
85
108
 
86
109
  Message.new(
87
110
  role: :assistant,
88
111
  content: content,
112
+ thinking: Thinking.build(text: thinking, signature: thinking_signature),
89
113
  tool_calls: Tools.parse_tool_calls(tool_use_blocks),
90
114
  input_tokens: usage['input_tokens'],
91
115
  output_tokens: usage['output_tokens'],
92
116
  cached_tokens: cached_tokens,
93
117
  cache_creation_tokens: cache_creation_tokens,
118
+ thinking_tokens: thinking_tokens,
94
119
  model_id: data['model'],
95
120
  raw: response
96
121
  )
97
122
  end
98
123
 
99
- def format_message(msg)
124
+ def format_message(msg, thinking: nil)
125
+ thinking_enabled = thinking&.enabled?
126
+
100
127
  if msg.tool_call?
101
- Tools.format_tool_call(msg)
128
+ format_tool_call_with_thinking(msg, thinking_enabled)
102
129
  elsif msg.tool_result?
103
130
  Tools.format_tool_result(msg)
104
131
  else
105
- format_basic_message(msg)
132
+ format_basic_message_with_thinking(msg, thinking_enabled)
106
133
  end
107
134
  end
108
135
 
109
- def format_basic_message(msg)
136
+ def format_basic_message_with_thinking(msg, thinking_enabled)
137
+ content_blocks = []
138
+
139
+ if msg.role == :assistant && thinking_enabled
140
+ thinking_block = build_thinking_block(msg.thinking)
141
+ content_blocks << thinking_block if thinking_block
142
+ end
143
+
144
+ append_formatted_content(content_blocks, msg.content)
145
+
110
146
  {
111
147
  role: convert_role(msg.role),
112
- content: Media.format_content(msg.content)
148
+ content: content_blocks
113
149
  }
114
150
  end
115
151
 
152
+ def format_tool_call_with_thinking(msg, thinking_enabled)
153
+ if msg.content.is_a?(RubyLLM::Content::Raw)
154
+ content_blocks = msg.content.value
155
+ content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
156
+ content_blocks = prepend_thinking_block(content_blocks, msg, thinking_enabled)
157
+
158
+ return { role: 'assistant', content: content_blocks }
159
+ end
160
+
161
+ content_blocks = prepend_thinking_block([], msg, thinking_enabled)
162
+ content_blocks << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
163
+
164
+ msg.tool_calls.each_value do |tool_call|
165
+ content_blocks << {
166
+ type: 'tool_use',
167
+ id: tool_call.id,
168
+ name: tool_call.name,
169
+ input: tool_call.arguments
170
+ }
171
+ end
172
+
173
+ {
174
+ role: 'assistant',
175
+ content: content_blocks
176
+ }
177
+ end
178
+
179
+ def prepend_thinking_block(content_blocks, msg, thinking_enabled)
180
+ return content_blocks unless thinking_enabled
181
+
182
+ thinking_block = build_thinking_block(msg.thinking)
183
+ content_blocks.unshift(thinking_block) if thinking_block
184
+
185
+ content_blocks
186
+ end
187
+
188
+ def build_thinking_block(thinking)
189
+ return nil unless thinking
190
+
191
+ if thinking.text
192
+ {
193
+ type: 'thinking',
194
+ thinking: thinking.text,
195
+ signature: thinking.signature
196
+ }.compact
197
+ elsif thinking.signature
198
+ {
199
+ type: 'redacted_thinking',
200
+ data: thinking.signature
201
+ }
202
+ end
203
+ end
204
+
205
+ def append_formatted_content(content_blocks, content)
206
+ formatted_content = Media.format_content(content)
207
+ if formatted_content.is_a?(Array)
208
+ content_blocks.concat(formatted_content)
209
+ else
210
+ content_blocks << formatted_content
211
+ end
212
+ end
213
+
116
214
  def convert_role(role)
117
215
  case role
118
216
  when :tool, :user then 'user'
119
217
  else 'assistant'
120
218
  end
121
219
  end
220
+
221
+ def build_thinking_payload(thinking)
222
+ return nil unless thinking&.enabled?
223
+
224
+ budget = resolve_budget(thinking)
225
+ raise ArgumentError, 'Anthropic thinking requires a budget' if budget.nil?
226
+
227
+ {
228
+ type: 'enabled',
229
+ budget_tokens: budget
230
+ }
231
+ end
232
+
233
+ def resolve_budget(thinking)
234
+ budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
235
+ budget.is_a?(Integer) ? budget : nil
236
+ end
122
237
  end
123
238
  end
124
239
  end
@@ -12,10 +12,16 @@ module RubyLLM
12
12
  end
13
13
 
14
14
  def build_chunk(data)
15
+ delta_type = data.dig('delta', 'type')
16
+
15
17
  Chunk.new(
16
18
  role: :assistant,
17
19
  model_id: extract_model_id(data),
18
- content: data.dig('delta', 'text'),
20
+ content: extract_content_delta(data, delta_type),
21
+ thinking: Thinking.build(
22
+ text: extract_thinking_delta(data, delta_type),
23
+ signature: extract_signature_delta(data, delta_type)
24
+ ),
19
25
  input_tokens: extract_input_tokens(data),
20
26
  output_tokens: extract_output_tokens(data),
21
27
  cached_tokens: extract_cached_tokens(data),
@@ -24,6 +30,24 @@ module RubyLLM
24
30
  )
25
31
  end
26
32
 
33
+ def extract_content_delta(data, delta_type)
34
+ return data.dig('delta', 'text') if delta_type == 'text_delta'
35
+
36
+ nil
37
+ end
38
+
39
+ def extract_thinking_delta(data, delta_type)
40
+ return data.dig('delta', 'thinking') if delta_type == 'thinking_delta'
41
+
42
+ nil
43
+ end
44
+
45
+ def extract_signature_delta(data, delta_type)
46
+ return data.dig('delta', 'signature') if delta_type == 'signature_delta'
47
+
48
+ nil
49
+ end
50
+
27
51
  def json_delta?(data)
28
52
  data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
29
53
  end