ruby_llm 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -2
  3. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  4. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
  5. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  6. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  7. data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
  8. data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
  9. data/lib/ruby_llm/active_record/message_methods.rb +41 -8
  10. data/lib/ruby_llm/aliases.json +0 -12
  11. data/lib/ruby_llm/chat.rb +10 -7
  12. data/lib/ruby_llm/configuration.rb +1 -1
  13. data/lib/ruby_llm/message.rb +37 -11
  14. data/lib/ruby_llm/models.json +1059 -857
  15. data/lib/ruby_llm/models.rb +134 -12
  16. data/lib/ruby_llm/provider.rb +4 -3
  17. data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
  18. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  19. data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
  20. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
  21. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
  22. data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
  23. data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
  24. data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
  25. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  26. data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
  27. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  28. data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
  29. data/lib/ruby_llm/providers/openai/chat.rb +87 -3
  30. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  31. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  32. data/lib/ruby_llm/providers/openai.rb +1 -1
  33. data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
  34. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  35. data/lib/ruby_llm/providers/openrouter.rb +2 -0
  36. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  37. data/lib/ruby_llm/stream_accumulator.rb +111 -14
  38. data/lib/ruby_llm/streaming.rb +54 -51
  39. data/lib/ruby_llm/thinking.rb +49 -0
  40. data/lib/ruby_llm/tokens.rb +47 -0
  41. data/lib/ruby_llm/tool_call.rb +6 -3
  42. data/lib/ruby_llm/version.rb +1 -1
  43. data/lib/tasks/models.rake +19 -12
  44. metadata +12 -5
@@ -38,24 +38,53 @@ module RubyLLM
38
38
  end
39
39
 
40
40
  def refresh!(remote_only: false)
41
- provider_models = fetch_from_providers(remote_only: remote_only)
42
- models_dev_models = fetch_from_models_dev
43
- merged_models = merge_models(provider_models, models_dev_models)
41
+ existing_models = load_existing_models
42
+
43
+ provider_fetch = fetch_provider_models(remote_only: remote_only)
44
+ log_provider_fetch(provider_fetch)
45
+
46
+ models_dev_fetch = fetch_models_dev_models(existing_models)
47
+ log_models_dev_fetch(models_dev_fetch)
48
+
49
+ merged_models = merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
44
50
  @instance = new(merged_models)
45
51
  end
46
52
 
47
- def fetch_from_providers(remote_only: true)
53
+ def fetch_provider_models(remote_only: true) # rubocop:disable Metrics/PerceivedComplexity
48
54
  config = RubyLLM.config
55
+ provider_classes = remote_only ? Provider.remote_providers.values : Provider.providers.values
49
56
  configured_classes = if remote_only
50
57
  Provider.configured_remote_providers(config)
51
58
  else
52
59
  Provider.configured_providers(config)
53
60
  end
54
- configured = configured_classes.map { |klass| klass.new(config) }
61
+ configured = configured_classes.select { |klass| provider_classes.include?(klass) }
62
+ result = {
63
+ models: [],
64
+ fetched_providers: [],
65
+ configured_names: configured.map(&:name),
66
+ failed: []
67
+ }
55
68
 
56
- RubyLLM.logger.info "Fetching models from providers: #{configured.map(&:name).join(', ')}"
69
+ provider_classes.each do |provider_class|
70
+ next if remote_only && provider_class.local?
71
+ next unless provider_class.configured?(config)
72
+
73
+ begin
74
+ result[:models].concat(provider_class.new(config).list_models)
75
+ result[:fetched_providers] << provider_class.slug
76
+ rescue StandardError => e
77
+ result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
78
+ end
79
+ end
80
+
81
+ result[:fetched_providers].uniq!
82
+ result
83
+ end
57
84
 
58
- configured.flat_map(&:list_models)
85
+ # Backwards-compatible wrapper used by specs.
86
+ def fetch_from_providers(remote_only: true)
87
+ fetch_provider_models(remote_only: remote_only)[:models]
59
88
  end
60
89
 
61
90
  def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
@@ -103,7 +132,7 @@ module RubyLLM
103
132
  instance.respond_to?(method, include_private) || super
104
133
  end
105
134
 
106
- def fetch_from_models_dev
135
+ def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
107
136
  RubyLLM.logger.info 'Fetching models from models.dev API...'
108
137
 
109
138
  connection = Connection.basic do |f|
@@ -121,7 +150,52 @@ module RubyLLM
121
150
  Model::Info.new(models_dev_model_to_info(model_data, provider_slug, provider_key.to_s))
122
151
  end
123
152
  end
124
- models.reject { |model| model.provider.nil? || model.id.nil? }
153
+ { models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
154
+ rescue StandardError => e
155
+ RubyLLM.logger.warn("Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing.")
156
+ {
157
+ models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
158
+ fetched: false
159
+ }
160
+ end
161
+
162
+ def load_existing_models
163
+ existing_models = instance&.all
164
+ existing_models = read_from_json if existing_models.nil? || existing_models.empty?
165
+ existing_models
166
+ end
167
+
168
+ def log_provider_fetch(provider_fetch)
169
+ RubyLLM.logger.info "Fetching models from providers: #{provider_fetch[:configured_names].join(', ')}"
170
+ provider_fetch[:failed].each do |failure|
171
+ RubyLLM.logger.warn(
172
+ "Failed to fetch #{failure[:name]} models (#{failure[:error].class}: #{failure[:error].message}). " \
173
+ 'Keeping existing.'
174
+ )
175
+ end
176
+ end
177
+
178
+ def log_models_dev_fetch(models_dev_fetch)
179
+ return if models_dev_fetch[:fetched]
180
+
181
+ RubyLLM.logger.warn('Using cached models.dev data due to fetch failure.')
182
+ end
183
+
184
+ def merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
185
+ existing_by_provider = existing_models.group_by(&:provider)
186
+ preserved_models = existing_by_provider
187
+ .except(*provider_fetch[:fetched_providers])
188
+ .values
189
+ .flatten
190
+
191
+ provider_models = provider_fetch[:models] + preserved_models
192
+ models_dev_models = if models_dev_fetch[:fetched]
193
+ models_dev_fetch[:models]
194
+ else
195
+ existing_models.select { |model| model.metadata[:source] == 'models.dev' }
196
+ end
197
+
198
+ merge_models(provider_models, models_dev_models)
125
199
  end
126
200
 
127
201
  def merge_models(provider_models, models_dev_models)
@@ -150,8 +224,23 @@ module RubyLLM
150
224
  # Direct match
151
225
  return models_dev_by_key[key] if models_dev_by_key[key]
152
226
 
153
- # VertexAI uses same models as Gemini
154
227
  provider, model_id = key.split(':', 2)
228
+ if provider == 'bedrock'
229
+ normalized_id = model_id.sub(/^[a-z]{2}\./, '')
230
+ context_override = nil
231
+ normalized_id = normalized_id.gsub(/:(\d+)k\b/) do
232
+ context_override = Regexp.last_match(1).to_i * 1000
233
+ ''
234
+ end
235
+ bedrock_model = models_dev_by_key["bedrock:#{normalized_id}"]
236
+ if bedrock_model
237
+ data = bedrock_model.to_h.merge(id: model_id)
238
+ data[:context_window] = context_override if context_override
239
+ return Model::Info.new(data)
240
+ end
241
+ end
242
+
243
+ # VertexAI uses same models as Gemini
155
244
  return unless provider == 'vertexai'
156
245
 
157
246
  gemini_model = models_dev_by_key["gemini:#{model_id}"]
@@ -167,18 +256,48 @@ module RubyLLM
167
256
  end
168
257
  end
169
258
 
170
- def add_provider_metadata(models_dev_model, provider_model)
259
+ def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
171
260
  data = models_dev_model.to_h
261
+ data[:name] = provider_model.name if blank_value?(data[:name])
262
+ data[:family] = provider_model.family if blank_value?(data[:family])
263
+ data[:created_at] = provider_model.created_at if blank_value?(data[:created_at])
264
+ data[:context_window] = provider_model.context_window if blank_value?(data[:context_window])
265
+ data[:max_output_tokens] = provider_model.max_output_tokens if blank_value?(data[:max_output_tokens])
266
+ data[:modalities] = provider_model.modalities.to_h if blank_value?(data[:modalities])
267
+ data[:pricing] = provider_model.pricing.to_h if blank_value?(data[:pricing])
172
268
  data[:metadata] = provider_model.metadata.merge(data[:metadata] || {})
173
269
  data[:capabilities] = (models_dev_model.capabilities + provider_model.capabilities).uniq
270
+ normalize_embedding_modalities(data)
174
271
  Model::Info.new(data)
175
272
  end
176
273
 
274
+ def normalize_embedding_modalities(data)
275
+ return unless data[:id].to_s.include?('embedding')
276
+
277
+ modalities = data[:modalities].to_h
278
+ modalities[:input] = ['text'] if modalities[:input].nil? || modalities[:input].empty?
279
+ modalities[:output] = ['embeddings']
280
+ data[:modalities] = modalities
281
+ end
282
+
283
+ def blank_value?(value)
284
+ return true if value.nil?
285
+ return value.empty? if value.is_a?(String) || value.is_a?(Array)
286
+
287
+ if value.is_a?(Hash)
288
+ return true if value.empty?
289
+
290
+ return value.values.all? { |nested| blank_value?(nested) }
291
+ end
292
+
293
+ false
294
+ end
295
+
177
296
  def models_dev_model_to_info(model_data, provider_slug, provider_key)
178
297
  modalities = normalize_models_dev_modalities(model_data[:modalities])
179
298
  capabilities = models_dev_capabilities(model_data, modalities)
180
299
 
181
- {
300
+ data = {
182
301
  id: model_data[:id],
183
302
  name: model_data[:name] || model_data[:id],
184
303
  provider: provider_slug,
@@ -192,6 +311,9 @@ module RubyLLM
192
311
  pricing: models_dev_pricing(model_data[:cost]),
193
312
  metadata: models_dev_metadata(model_data, provider_key)
194
313
  }
314
+
315
+ normalize_embedding_modalities(data)
316
+ data
195
317
  end
196
318
 
197
319
  def models_dev_capabilities(model_data, modalities)
@@ -37,7 +37,7 @@ module RubyLLM
37
37
  self.class.configuration_requirements
38
38
  end
39
39
 
40
- def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
40
+ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
41
41
  normalized_temperature = maybe_normalize_temperature(temperature, model)
42
42
 
43
43
  payload = Utils.deep_merge(
@@ -47,7 +47,8 @@ module RubyLLM
47
47
  temperature: normalized_temperature,
48
48
  model: model,
49
49
  stream: block_given?,
50
- schema: schema
50
+ schema: schema,
51
+ thinking: thinking
51
52
  ),
52
53
  params
53
54
  )
@@ -144,7 +145,7 @@ module RubyLLM
144
145
  end
145
146
 
146
147
  def capabilities
147
- raise NotImplementedError
148
+ nil
148
149
  end
149
150
 
150
151
  def configuration_requirements
@@ -3,7 +3,7 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class Anthropic
6
- # Chat methods of the OpenAI API integration
6
+ # Chat methods for the Anthropic API implementation
7
7
  module Chat
8
8
  module_function
9
9
 
@@ -11,11 +11,11 @@ module RubyLLM
11
11
  '/v1/messages'
12
12
  end
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
14
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
15
15
  system_messages, chat_messages = separate_messages(messages)
16
16
  system_content = build_system_content(system_messages)
17
17
 
18
- build_base_payload(chat_messages, model, stream).tap do |payload|
18
+ build_base_payload(chat_messages, model, stream, thinking).tap do |payload|
19
19
  add_optional_fields(payload, system_content:, tools:, temperature:)
20
20
  end
21
21
  end
@@ -45,13 +45,18 @@ module RubyLLM
45
45
  end
46
46
  end
47
47
 
48
- def build_base_payload(chat_messages, model, stream)
49
- {
48
+ def build_base_payload(chat_messages, model, stream, thinking)
49
+ payload = {
50
50
  model: model.id,
51
- messages: chat_messages.map { |msg| format_message(msg) },
51
+ messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
52
52
  stream: stream,
53
53
  max_tokens: model.max_tokens || 4096
54
54
  }
55
+
56
+ thinking_payload = build_thinking_payload(thinking)
57
+ payload[:thinking] = thinking_payload if thinking_payload
58
+
59
+ payload
55
60
  end
56
61
 
57
62
  def add_optional_fields(payload, system_content:, tools:, temperature:)
@@ -65,9 +70,11 @@ module RubyLLM
65
70
  content_blocks = data['content'] || []
66
71
 
67
72
  text_content = extract_text_content(content_blocks)
73
+ thinking_content = extract_thinking_content(content_blocks)
74
+ thinking_signature = extract_thinking_signature(content_blocks)
68
75
  tool_use_blocks = Tools.find_tool_uses(content_blocks)
69
76
 
70
- build_message(data, text_content, tool_use_blocks, response)
77
+ build_message(data, text_content, thinking_content, thinking_signature, tool_use_blocks, response)
71
78
  end
72
79
 
73
80
  def extract_text_content(blocks)
@@ -75,50 +82,158 @@ module RubyLLM
75
82
  text_blocks.map { |c| c['text'] }.join
76
83
  end
77
84
 
78
- def build_message(data, content, tool_use_blocks, response)
85
+ def extract_thinking_content(blocks)
86
+ thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
87
+ thoughts = thinking_blocks.map { |c| c['thinking'] || c['text'] }.join
88
+ thoughts.empty? ? nil : thoughts
89
+ end
90
+
91
+ def extract_thinking_signature(blocks)
92
+ thinking_block = blocks.find { |c| c['type'] == 'thinking' } ||
93
+ blocks.find { |c| c['type'] == 'redacted_thinking' }
94
+ thinking_block&.dig('signature') || thinking_block&.dig('data')
95
+ end
96
+
97
+ def build_message(data, content, thinking, thinking_signature, tool_use_blocks, response) # rubocop:disable Metrics/ParameterLists
79
98
  usage = data['usage'] || {}
80
99
  cached_tokens = usage['cache_read_input_tokens']
81
100
  cache_creation_tokens = usage['cache_creation_input_tokens']
82
101
  if cache_creation_tokens.nil? && usage['cache_creation'].is_a?(Hash)
83
102
  cache_creation_tokens = usage['cache_creation'].values.compact.sum
84
103
  end
104
+ thinking_tokens = usage.dig('output_tokens_details', 'thinking_tokens') ||
105
+ usage.dig('output_tokens_details', 'reasoning_tokens') ||
106
+ usage['thinking_tokens'] ||
107
+ usage['reasoning_tokens']
85
108
 
86
109
  Message.new(
87
110
  role: :assistant,
88
111
  content: content,
112
+ thinking: Thinking.build(text: thinking, signature: thinking_signature),
89
113
  tool_calls: Tools.parse_tool_calls(tool_use_blocks),
90
114
  input_tokens: usage['input_tokens'],
91
115
  output_tokens: usage['output_tokens'],
92
116
  cached_tokens: cached_tokens,
93
117
  cache_creation_tokens: cache_creation_tokens,
118
+ thinking_tokens: thinking_tokens,
94
119
  model_id: data['model'],
95
120
  raw: response
96
121
  )
97
122
  end
98
123
 
99
- def format_message(msg)
124
+ def format_message(msg, thinking: nil)
125
+ thinking_enabled = thinking&.enabled?
126
+
100
127
  if msg.tool_call?
101
- Tools.format_tool_call(msg)
128
+ format_tool_call_with_thinking(msg, thinking_enabled)
102
129
  elsif msg.tool_result?
103
130
  Tools.format_tool_result(msg)
104
131
  else
105
- format_basic_message(msg)
132
+ format_basic_message_with_thinking(msg, thinking_enabled)
106
133
  end
107
134
  end
108
135
 
109
- def format_basic_message(msg)
136
+ def format_basic_message_with_thinking(msg, thinking_enabled)
137
+ content_blocks = []
138
+
139
+ if msg.role == :assistant && thinking_enabled
140
+ thinking_block = build_thinking_block(msg.thinking)
141
+ content_blocks << thinking_block if thinking_block
142
+ end
143
+
144
+ append_formatted_content(content_blocks, msg.content)
145
+
110
146
  {
111
147
  role: convert_role(msg.role),
112
- content: Media.format_content(msg.content)
148
+ content: content_blocks
113
149
  }
114
150
  end
115
151
 
152
+ def format_tool_call_with_thinking(msg, thinking_enabled)
153
+ if msg.content.is_a?(RubyLLM::Content::Raw)
154
+ content_blocks = msg.content.value
155
+ content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
156
+ content_blocks = prepend_thinking_block(content_blocks, msg, thinking_enabled)
157
+
158
+ return { role: 'assistant', content: content_blocks }
159
+ end
160
+
161
+ content_blocks = prepend_thinking_block([], msg, thinking_enabled)
162
+ content_blocks << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
163
+
164
+ msg.tool_calls.each_value do |tool_call|
165
+ content_blocks << {
166
+ type: 'tool_use',
167
+ id: tool_call.id,
168
+ name: tool_call.name,
169
+ input: tool_call.arguments
170
+ }
171
+ end
172
+
173
+ {
174
+ role: 'assistant',
175
+ content: content_blocks
176
+ }
177
+ end
178
+
179
+ def prepend_thinking_block(content_blocks, msg, thinking_enabled)
180
+ return content_blocks unless thinking_enabled
181
+
182
+ thinking_block = build_thinking_block(msg.thinking)
183
+ content_blocks.unshift(thinking_block) if thinking_block
184
+
185
+ content_blocks
186
+ end
187
+
188
+ def build_thinking_block(thinking)
189
+ return nil unless thinking
190
+
191
+ if thinking.text
192
+ {
193
+ type: 'thinking',
194
+ thinking: thinking.text,
195
+ signature: thinking.signature
196
+ }.compact
197
+ elsif thinking.signature
198
+ {
199
+ type: 'redacted_thinking',
200
+ data: thinking.signature
201
+ }
202
+ end
203
+ end
204
+
205
+ def append_formatted_content(content_blocks, content)
206
+ formatted_content = Media.format_content(content)
207
+ if formatted_content.is_a?(Array)
208
+ content_blocks.concat(formatted_content)
209
+ else
210
+ content_blocks << formatted_content
211
+ end
212
+ end
213
+
116
214
  def convert_role(role)
117
215
  case role
118
216
  when :tool, :user then 'user'
119
217
  else 'assistant'
120
218
  end
121
219
  end
220
+
221
+ def build_thinking_payload(thinking)
222
+ return nil unless thinking&.enabled?
223
+
224
+ budget = resolve_budget(thinking)
225
+ raise ArgumentError, 'Anthropic thinking requires a budget' if budget.nil?
226
+
227
+ {
228
+ type: 'enabled',
229
+ budget_tokens: budget
230
+ }
231
+ end
232
+
233
+ def resolve_budget(thinking)
234
+ budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
235
+ budget.is_a?(Integer) ? budget : nil
236
+ end
122
237
  end
123
238
  end
124
239
  end
@@ -12,10 +12,16 @@ module RubyLLM
12
12
  end
13
13
 
14
14
  def build_chunk(data)
15
+ delta_type = data.dig('delta', 'type')
16
+
15
17
  Chunk.new(
16
18
  role: :assistant,
17
19
  model_id: extract_model_id(data),
18
- content: data.dig('delta', 'text'),
20
+ content: extract_content_delta(data, delta_type),
21
+ thinking: Thinking.build(
22
+ text: extract_thinking_delta(data, delta_type),
23
+ signature: extract_signature_delta(data, delta_type)
24
+ ),
19
25
  input_tokens: extract_input_tokens(data),
20
26
  output_tokens: extract_output_tokens(data),
21
27
  cached_tokens: extract_cached_tokens(data),
@@ -24,6 +30,24 @@ module RubyLLM
24
30
  )
25
31
  end
26
32
 
33
+ def extract_content_delta(data, delta_type)
34
+ return data.dig('delta', 'text') if delta_type == 'text_delta'
35
+
36
+ nil
37
+ end
38
+
39
+ def extract_thinking_delta(data, delta_type)
40
+ return data.dig('delta', 'thinking') if delta_type == 'thinking_delta'
41
+
42
+ nil
43
+ end
44
+
45
+ def extract_signature_delta(data, delta_type)
46
+ return data.dig('delta', 'signature') if delta_type == 'signature_delta'
47
+
48
+ nil
49
+ end
50
+
27
51
  def json_delta?(data)
28
52
  data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
29
53
  end
@@ -16,46 +16,89 @@ module RubyLLM
16
16
  Anthropic::Chat.parse_completion_response response
17
17
  end
18
18
 
19
- def format_message(msg)
19
+ def format_message(msg, thinking: nil)
20
+ thinking_enabled = thinking&.enabled?
21
+
20
22
  if msg.tool_call?
21
- Anthropic::Tools.format_tool_call(msg)
23
+ format_tool_call_with_thinking(msg, thinking_enabled)
22
24
  elsif msg.tool_result?
23
25
  Anthropic::Tools.format_tool_result(msg)
24
26
  else
25
- format_basic_message(msg)
27
+ format_basic_message_with_thinking(msg, thinking_enabled)
26
28
  end
27
29
  end
28
30
 
29
- def format_basic_message(msg)
30
- {
31
- role: Anthropic::Chat.convert_role(msg.role),
32
- content: Media.format_content(msg.content)
33
- }
34
- end
35
-
36
31
  private
37
32
 
38
33
  def completion_url
39
34
  "model/#{@model_id}/invoke"
40
35
  end
41
36
 
42
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
37
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
43
38
  @model_id = model.id
44
39
 
45
40
  system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
46
41
  system_content = Anthropic::Chat.build_system_content(system_messages)
47
42
 
48
- build_base_payload(chat_messages, model).tap do |payload|
43
+ build_base_payload(chat_messages, model, thinking).tap do |payload|
49
44
  Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
50
45
  end
51
46
  end
52
47
 
53
- def build_base_payload(chat_messages, model)
54
- {
48
+ def build_base_payload(chat_messages, model, thinking)
49
+ payload = {
55
50
  anthropic_version: 'bedrock-2023-05-31',
56
- messages: chat_messages.map { |msg| format_message(msg) },
51
+ messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
57
52
  max_tokens: model.max_tokens || 4096
58
53
  }
54
+
55
+ thinking_payload = Anthropic::Chat.build_thinking_payload(thinking)
56
+ payload[:thinking] = thinking_payload if thinking_payload
57
+
58
+ payload
59
+ end
60
+
61
+ def format_basic_message_with_thinking(msg, thinking_enabled)
62
+ content_blocks = []
63
+
64
+ if msg.role == :assistant && thinking_enabled
65
+ thinking_block = Anthropic::Chat.build_thinking_block(msg.thinking)
66
+ content_blocks << thinking_block if thinking_block
67
+ end
68
+
69
+ Anthropic::Chat.append_formatted_content(content_blocks, msg.content)
70
+
71
+ {
72
+ role: Anthropic::Chat.convert_role(msg.role),
73
+ content: content_blocks
74
+ }
75
+ end
76
+
77
+ def format_tool_call_with_thinking(msg, thinking_enabled)
78
+ if msg.content.is_a?(RubyLLM::Content::Raw)
79
+ content_blocks = msg.content.value
80
+ content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
81
+ content_blocks = Anthropic::Chat.prepend_thinking_block(content_blocks, msg, thinking_enabled)
82
+
83
+ return { role: 'assistant', content: content_blocks }
84
+ end
85
+
86
+ content_blocks = Anthropic::Chat.prepend_thinking_block([], msg, thinking_enabled)
87
+ content_blocks << Anthropic::Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
88
+
89
+ msg.tool_calls.each_value do |tool_call|
90
+ content_blocks << {
91
+ type: 'tool_use',
92
+ id: tool_call.id,
93
+ name: tool_call.name,
94
+ input: tool_call.arguments
95
+ }
96
+ end
97
+
98
+ {
99
+ role: 'assistant',
100
+ content: content_blocks
101
+ }
59
102
  end
60
103
  end
61
104
  end
@@ -16,6 +16,31 @@ module RubyLLM
16
16
  extract_content_by_type(data)
17
17
  end
18
18
 
19
+ def extract_thinking_delta(data)
20
+ return nil unless data.is_a?(Hash)
21
+
22
+ if data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'thinking_delta'
23
+ return data.dig('delta', 'thinking')
24
+ end
25
+
26
+ if data['type'] == 'content_block_start' && data.dig('content_block', 'type') == 'thinking'
27
+ return data.dig('content_block', 'thinking') || data.dig('content_block', 'text')
28
+ end
29
+
30
+ nil
31
+ end
32
+
33
+ def extract_signature_delta(data)
34
+ return nil unless data.is_a?(Hash)
35
+
36
+ signature = extract_signature_from_delta(data)
37
+ return signature if signature
38
+
39
+ return nil unless data['type'] == 'content_block_start'
40
+
41
+ extract_signature_from_block(data['content_block'])
42
+ end
43
+
19
44
  def extract_tool_calls(data)
20
45
  data.dig('message', 'tool_calls') || data['tool_calls']
21
46
  end
@@ -47,6 +72,17 @@ module RubyLLM
47
72
  breakdown.values.compact.sum
48
73
  end
49
74
 
75
+ def extract_thinking_tokens(data)
76
+ data.dig('message', 'usage', 'thinking_tokens') ||
77
+ data.dig('message', 'usage', 'output_tokens_details', 'thinking_tokens') ||
78
+ data.dig('usage', 'thinking_tokens') ||
79
+ data.dig('usage', 'output_tokens_details', 'thinking_tokens') ||
80
+ data.dig('message', 'usage', 'reasoning_tokens') ||
81
+ data.dig('message', 'usage', 'output_tokens_details', 'reasoning_tokens') ||
82
+ data.dig('usage', 'reasoning_tokens') ||
83
+ data.dig('usage', 'output_tokens_details', 'reasoning_tokens')
84
+ end
85
+
50
86
  private
51
87
 
52
88
  def extract_content_by_type(data)
@@ -58,11 +94,32 @@ module RubyLLM
58
94
  end
59
95
 
60
96
  def extract_block_start_content(data)
61
- data.dig('content_block', 'text').to_s
97
+ content_block = data['content_block'] || {}
98
+ return '' if %w[thinking redacted_thinking].include?(content_block['type'])
99
+
100
+ content_block['text'].to_s
62
101
  end
63
102
 
64
103
  def extract_delta_content(data)
65
- data.dig('delta', 'text').to_s
104
+ delta = data['delta'] || {}
105
+ return '' if %w[thinking_delta signature_delta].include?(delta['type'])
106
+
107
+ delta['text'].to_s
108
+ end
109
+
110
+ def extract_signature_from_delta(data)
111
+ return unless data['type'] == 'content_block_delta'
112
+ return unless data.dig('delta', 'type') == 'signature_delta'
113
+
114
+ data.dig('delta', 'signature')
115
+ end
116
+
117
+ def extract_signature_from_block(content_block)
118
+ block = content_block || {}
119
+ return block['signature'] if block['type'] == 'thinking' && block['signature']
120
+ return block['data'] if block['type'] == 'redacted_thinking'
121
+
122
+ nil
66
123
  end
67
124
  end
68
125
  end