ruby_llm 1.9.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
- data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
- data/lib/ruby_llm/active_record/message_methods.rb +41 -8
- data/lib/ruby_llm/aliases.json +0 -12
- data/lib/ruby_llm/chat.rb +10 -7
- data/lib/ruby_llm/configuration.rb +1 -1
- data/lib/ruby_llm/message.rb +37 -11
- data/lib/ruby_llm/models.json +1059 -857
- data/lib/ruby_llm/models.rb +134 -12
- data/lib/ruby_llm/provider.rb +4 -3
- data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
- data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
- data/lib/ruby_llm/providers/openai/chat.rb +87 -3
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +2 -0
- data/lib/ruby_llm/providers/vertexai.rb +5 -1
- data/lib/ruby_llm/stream_accumulator.rb +111 -14
- data/lib/ruby_llm/streaming.rb +54 -51
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +19 -12
- metadata +12 -5
data/lib/ruby_llm/models.rb
CHANGED
|
@@ -38,24 +38,53 @@ module RubyLLM
|
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
def refresh!(remote_only: false)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
existing_models = load_existing_models
|
|
42
|
+
|
|
43
|
+
provider_fetch = fetch_provider_models(remote_only: remote_only)
|
|
44
|
+
log_provider_fetch(provider_fetch)
|
|
45
|
+
|
|
46
|
+
models_dev_fetch = fetch_models_dev_models(existing_models)
|
|
47
|
+
log_models_dev_fetch(models_dev_fetch)
|
|
48
|
+
|
|
49
|
+
merged_models = merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
|
|
44
50
|
@instance = new(merged_models)
|
|
45
51
|
end
|
|
46
52
|
|
|
47
|
-
def
|
|
53
|
+
def fetch_provider_models(remote_only: true) # rubocop:disable Metrics/PerceivedComplexity
|
|
48
54
|
config = RubyLLM.config
|
|
55
|
+
provider_classes = remote_only ? Provider.remote_providers.values : Provider.providers.values
|
|
49
56
|
configured_classes = if remote_only
|
|
50
57
|
Provider.configured_remote_providers(config)
|
|
51
58
|
else
|
|
52
59
|
Provider.configured_providers(config)
|
|
53
60
|
end
|
|
54
|
-
configured = configured_classes.
|
|
61
|
+
configured = configured_classes.select { |klass| provider_classes.include?(klass) }
|
|
62
|
+
result = {
|
|
63
|
+
models: [],
|
|
64
|
+
fetched_providers: [],
|
|
65
|
+
configured_names: configured.map(&:name),
|
|
66
|
+
failed: []
|
|
67
|
+
}
|
|
55
68
|
|
|
56
|
-
|
|
69
|
+
provider_classes.each do |provider_class|
|
|
70
|
+
next if remote_only && provider_class.local?
|
|
71
|
+
next unless provider_class.configured?(config)
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
result[:models].concat(provider_class.new(config).list_models)
|
|
75
|
+
result[:fetched_providers] << provider_class.slug
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
result[:fetched_providers].uniq!
|
|
82
|
+
result
|
|
83
|
+
end
|
|
57
84
|
|
|
58
|
-
|
|
85
|
+
# Backwards-compatible wrapper used by specs.
|
|
86
|
+
def fetch_from_providers(remote_only: true)
|
|
87
|
+
fetch_provider_models(remote_only: remote_only)[:models]
|
|
59
88
|
end
|
|
60
89
|
|
|
61
90
|
def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
|
|
@@ -103,7 +132,7 @@ module RubyLLM
|
|
|
103
132
|
instance.respond_to?(method, include_private) || super
|
|
104
133
|
end
|
|
105
134
|
|
|
106
|
-
def
|
|
135
|
+
def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
|
|
107
136
|
RubyLLM.logger.info 'Fetching models from models.dev API...'
|
|
108
137
|
|
|
109
138
|
connection = Connection.basic do |f|
|
|
@@ -121,7 +150,52 @@ module RubyLLM
|
|
|
121
150
|
Model::Info.new(models_dev_model_to_info(model_data, provider_slug, provider_key.to_s))
|
|
122
151
|
end
|
|
123
152
|
end
|
|
124
|
-
models.reject { |model| model.provider.nil? || model.id.nil? }
|
|
153
|
+
{ models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
RubyLLM.logger.warn("Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing.")
|
|
156
|
+
{
|
|
157
|
+
models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
|
|
158
|
+
fetched: false
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def load_existing_models
|
|
163
|
+
existing_models = instance&.all
|
|
164
|
+
existing_models = read_from_json if existing_models.nil? || existing_models.empty?
|
|
165
|
+
existing_models
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def log_provider_fetch(provider_fetch)
|
|
169
|
+
RubyLLM.logger.info "Fetching models from providers: #{provider_fetch[:configured_names].join(', ')}"
|
|
170
|
+
provider_fetch[:failed].each do |failure|
|
|
171
|
+
RubyLLM.logger.warn(
|
|
172
|
+
"Failed to fetch #{failure[:name]} models (#{failure[:error].class}: #{failure[:error].message}). " \
|
|
173
|
+
'Keeping existing.'
|
|
174
|
+
)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def log_models_dev_fetch(models_dev_fetch)
|
|
179
|
+
return if models_dev_fetch[:fetched]
|
|
180
|
+
|
|
181
|
+
RubyLLM.logger.warn('Using cached models.dev data due to fetch failure.')
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def merge_with_existing(existing_models, provider_fetch, models_dev_fetch)
|
|
185
|
+
existing_by_provider = existing_models.group_by(&:provider)
|
|
186
|
+
preserved_models = existing_by_provider
|
|
187
|
+
.except(*provider_fetch[:fetched_providers])
|
|
188
|
+
.values
|
|
189
|
+
.flatten
|
|
190
|
+
|
|
191
|
+
provider_models = provider_fetch[:models] + preserved_models
|
|
192
|
+
models_dev_models = if models_dev_fetch[:fetched]
|
|
193
|
+
models_dev_fetch[:models]
|
|
194
|
+
else
|
|
195
|
+
existing_models.select { |model| model.metadata[:source] == 'models.dev' }
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
merge_models(provider_models, models_dev_models)
|
|
125
199
|
end
|
|
126
200
|
|
|
127
201
|
def merge_models(provider_models, models_dev_models)
|
|
@@ -150,8 +224,23 @@ module RubyLLM
|
|
|
150
224
|
# Direct match
|
|
151
225
|
return models_dev_by_key[key] if models_dev_by_key[key]
|
|
152
226
|
|
|
153
|
-
# VertexAI uses same models as Gemini
|
|
154
227
|
provider, model_id = key.split(':', 2)
|
|
228
|
+
if provider == 'bedrock'
|
|
229
|
+
normalized_id = model_id.sub(/^[a-z]{2}\./, '')
|
|
230
|
+
context_override = nil
|
|
231
|
+
normalized_id = normalized_id.gsub(/:(\d+)k\b/) do
|
|
232
|
+
context_override = Regexp.last_match(1).to_i * 1000
|
|
233
|
+
''
|
|
234
|
+
end
|
|
235
|
+
bedrock_model = models_dev_by_key["bedrock:#{normalized_id}"]
|
|
236
|
+
if bedrock_model
|
|
237
|
+
data = bedrock_model.to_h.merge(id: model_id)
|
|
238
|
+
data[:context_window] = context_override if context_override
|
|
239
|
+
return Model::Info.new(data)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# VertexAI uses same models as Gemini
|
|
155
244
|
return unless provider == 'vertexai'
|
|
156
245
|
|
|
157
246
|
gemini_model = models_dev_by_key["gemini:#{model_id}"]
|
|
@@ -167,18 +256,48 @@ module RubyLLM
|
|
|
167
256
|
end
|
|
168
257
|
end
|
|
169
258
|
|
|
170
|
-
def add_provider_metadata(models_dev_model, provider_model)
|
|
259
|
+
def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
|
|
171
260
|
data = models_dev_model.to_h
|
|
261
|
+
data[:name] = provider_model.name if blank_value?(data[:name])
|
|
262
|
+
data[:family] = provider_model.family if blank_value?(data[:family])
|
|
263
|
+
data[:created_at] = provider_model.created_at if blank_value?(data[:created_at])
|
|
264
|
+
data[:context_window] = provider_model.context_window if blank_value?(data[:context_window])
|
|
265
|
+
data[:max_output_tokens] = provider_model.max_output_tokens if blank_value?(data[:max_output_tokens])
|
|
266
|
+
data[:modalities] = provider_model.modalities.to_h if blank_value?(data[:modalities])
|
|
267
|
+
data[:pricing] = provider_model.pricing.to_h if blank_value?(data[:pricing])
|
|
172
268
|
data[:metadata] = provider_model.metadata.merge(data[:metadata] || {})
|
|
173
269
|
data[:capabilities] = (models_dev_model.capabilities + provider_model.capabilities).uniq
|
|
270
|
+
normalize_embedding_modalities(data)
|
|
174
271
|
Model::Info.new(data)
|
|
175
272
|
end
|
|
176
273
|
|
|
274
|
+
def normalize_embedding_modalities(data)
|
|
275
|
+
return unless data[:id].to_s.include?('embedding')
|
|
276
|
+
|
|
277
|
+
modalities = data[:modalities].to_h
|
|
278
|
+
modalities[:input] = ['text'] if modalities[:input].nil? || modalities[:input].empty?
|
|
279
|
+
modalities[:output] = ['embeddings']
|
|
280
|
+
data[:modalities] = modalities
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def blank_value?(value)
|
|
284
|
+
return true if value.nil?
|
|
285
|
+
return value.empty? if value.is_a?(String) || value.is_a?(Array)
|
|
286
|
+
|
|
287
|
+
if value.is_a?(Hash)
|
|
288
|
+
return true if value.empty?
|
|
289
|
+
|
|
290
|
+
return value.values.all? { |nested| blank_value?(nested) }
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
false
|
|
294
|
+
end
|
|
295
|
+
|
|
177
296
|
def models_dev_model_to_info(model_data, provider_slug, provider_key)
|
|
178
297
|
modalities = normalize_models_dev_modalities(model_data[:modalities])
|
|
179
298
|
capabilities = models_dev_capabilities(model_data, modalities)
|
|
180
299
|
|
|
181
|
-
{
|
|
300
|
+
data = {
|
|
182
301
|
id: model_data[:id],
|
|
183
302
|
name: model_data[:name] || model_data[:id],
|
|
184
303
|
provider: provider_slug,
|
|
@@ -192,6 +311,9 @@ module RubyLLM
|
|
|
192
311
|
pricing: models_dev_pricing(model_data[:cost]),
|
|
193
312
|
metadata: models_dev_metadata(model_data, provider_key)
|
|
194
313
|
}
|
|
314
|
+
|
|
315
|
+
normalize_embedding_modalities(data)
|
|
316
|
+
data
|
|
195
317
|
end
|
|
196
318
|
|
|
197
319
|
def models_dev_capabilities(model_data, modalities)
|
data/lib/ruby_llm/provider.rb
CHANGED
|
@@ -37,7 +37,7 @@ module RubyLLM
|
|
|
37
37
|
self.class.configuration_requirements
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
|
|
40
|
+
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, &) # rubocop:disable Metrics/ParameterLists
|
|
41
41
|
normalized_temperature = maybe_normalize_temperature(temperature, model)
|
|
42
42
|
|
|
43
43
|
payload = Utils.deep_merge(
|
|
@@ -47,7 +47,8 @@ module RubyLLM
|
|
|
47
47
|
temperature: normalized_temperature,
|
|
48
48
|
model: model,
|
|
49
49
|
stream: block_given?,
|
|
50
|
-
schema: schema
|
|
50
|
+
schema: schema,
|
|
51
|
+
thinking: thinking
|
|
51
52
|
),
|
|
52
53
|
params
|
|
53
54
|
)
|
|
@@ -144,7 +145,7 @@ module RubyLLM
|
|
|
144
145
|
end
|
|
145
146
|
|
|
146
147
|
def capabilities
|
|
147
|
-
|
|
148
|
+
nil
|
|
148
149
|
end
|
|
149
150
|
|
|
150
151
|
def configuration_requirements
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Providers
|
|
5
5
|
class Anthropic
|
|
6
|
-
# Chat methods
|
|
6
|
+
# Chat methods for the Anthropic API implementation
|
|
7
7
|
module Chat
|
|
8
8
|
module_function
|
|
9
9
|
|
|
@@ -11,11 +11,11 @@ module RubyLLM
|
|
|
11
11
|
'/v1/messages'
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
15
15
|
system_messages, chat_messages = separate_messages(messages)
|
|
16
16
|
system_content = build_system_content(system_messages)
|
|
17
17
|
|
|
18
|
-
build_base_payload(chat_messages, model, stream).tap do |payload|
|
|
18
|
+
build_base_payload(chat_messages, model, stream, thinking).tap do |payload|
|
|
19
19
|
add_optional_fields(payload, system_content:, tools:, temperature:)
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -45,13 +45,18 @@ module RubyLLM
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
def build_base_payload(chat_messages, model, stream)
|
|
49
|
-
{
|
|
48
|
+
def build_base_payload(chat_messages, model, stream, thinking)
|
|
49
|
+
payload = {
|
|
50
50
|
model: model.id,
|
|
51
|
-
messages: chat_messages.map { |msg| format_message(msg) },
|
|
51
|
+
messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
|
|
52
52
|
stream: stream,
|
|
53
53
|
max_tokens: model.max_tokens || 4096
|
|
54
54
|
}
|
|
55
|
+
|
|
56
|
+
thinking_payload = build_thinking_payload(thinking)
|
|
57
|
+
payload[:thinking] = thinking_payload if thinking_payload
|
|
58
|
+
|
|
59
|
+
payload
|
|
55
60
|
end
|
|
56
61
|
|
|
57
62
|
def add_optional_fields(payload, system_content:, tools:, temperature:)
|
|
@@ -65,9 +70,11 @@ module RubyLLM
|
|
|
65
70
|
content_blocks = data['content'] || []
|
|
66
71
|
|
|
67
72
|
text_content = extract_text_content(content_blocks)
|
|
73
|
+
thinking_content = extract_thinking_content(content_blocks)
|
|
74
|
+
thinking_signature = extract_thinking_signature(content_blocks)
|
|
68
75
|
tool_use_blocks = Tools.find_tool_uses(content_blocks)
|
|
69
76
|
|
|
70
|
-
build_message(data, text_content, tool_use_blocks, response)
|
|
77
|
+
build_message(data, text_content, thinking_content, thinking_signature, tool_use_blocks, response)
|
|
71
78
|
end
|
|
72
79
|
|
|
73
80
|
def extract_text_content(blocks)
|
|
@@ -75,50 +82,158 @@ module RubyLLM
|
|
|
75
82
|
text_blocks.map { |c| c['text'] }.join
|
|
76
83
|
end
|
|
77
84
|
|
|
78
|
-
def
|
|
85
|
+
def extract_thinking_content(blocks)
|
|
86
|
+
thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
|
|
87
|
+
thoughts = thinking_blocks.map { |c| c['thinking'] || c['text'] }.join
|
|
88
|
+
thoughts.empty? ? nil : thoughts
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def extract_thinking_signature(blocks)
|
|
92
|
+
thinking_block = blocks.find { |c| c['type'] == 'thinking' } ||
|
|
93
|
+
blocks.find { |c| c['type'] == 'redacted_thinking' }
|
|
94
|
+
thinking_block&.dig('signature') || thinking_block&.dig('data')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def build_message(data, content, thinking, thinking_signature, tool_use_blocks, response) # rubocop:disable Metrics/ParameterLists
|
|
79
98
|
usage = data['usage'] || {}
|
|
80
99
|
cached_tokens = usage['cache_read_input_tokens']
|
|
81
100
|
cache_creation_tokens = usage['cache_creation_input_tokens']
|
|
82
101
|
if cache_creation_tokens.nil? && usage['cache_creation'].is_a?(Hash)
|
|
83
102
|
cache_creation_tokens = usage['cache_creation'].values.compact.sum
|
|
84
103
|
end
|
|
104
|
+
thinking_tokens = usage.dig('output_tokens_details', 'thinking_tokens') ||
|
|
105
|
+
usage.dig('output_tokens_details', 'reasoning_tokens') ||
|
|
106
|
+
usage['thinking_tokens'] ||
|
|
107
|
+
usage['reasoning_tokens']
|
|
85
108
|
|
|
86
109
|
Message.new(
|
|
87
110
|
role: :assistant,
|
|
88
111
|
content: content,
|
|
112
|
+
thinking: Thinking.build(text: thinking, signature: thinking_signature),
|
|
89
113
|
tool_calls: Tools.parse_tool_calls(tool_use_blocks),
|
|
90
114
|
input_tokens: usage['input_tokens'],
|
|
91
115
|
output_tokens: usage['output_tokens'],
|
|
92
116
|
cached_tokens: cached_tokens,
|
|
93
117
|
cache_creation_tokens: cache_creation_tokens,
|
|
118
|
+
thinking_tokens: thinking_tokens,
|
|
94
119
|
model_id: data['model'],
|
|
95
120
|
raw: response
|
|
96
121
|
)
|
|
97
122
|
end
|
|
98
123
|
|
|
99
|
-
def format_message(msg)
|
|
124
|
+
def format_message(msg, thinking: nil)
|
|
125
|
+
thinking_enabled = thinking&.enabled?
|
|
126
|
+
|
|
100
127
|
if msg.tool_call?
|
|
101
|
-
|
|
128
|
+
format_tool_call_with_thinking(msg, thinking_enabled)
|
|
102
129
|
elsif msg.tool_result?
|
|
103
130
|
Tools.format_tool_result(msg)
|
|
104
131
|
else
|
|
105
|
-
|
|
132
|
+
format_basic_message_with_thinking(msg, thinking_enabled)
|
|
106
133
|
end
|
|
107
134
|
end
|
|
108
135
|
|
|
109
|
-
def
|
|
136
|
+
def format_basic_message_with_thinking(msg, thinking_enabled)
|
|
137
|
+
content_blocks = []
|
|
138
|
+
|
|
139
|
+
if msg.role == :assistant && thinking_enabled
|
|
140
|
+
thinking_block = build_thinking_block(msg.thinking)
|
|
141
|
+
content_blocks << thinking_block if thinking_block
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
append_formatted_content(content_blocks, msg.content)
|
|
145
|
+
|
|
110
146
|
{
|
|
111
147
|
role: convert_role(msg.role),
|
|
112
|
-
content:
|
|
148
|
+
content: content_blocks
|
|
113
149
|
}
|
|
114
150
|
end
|
|
115
151
|
|
|
152
|
+
def format_tool_call_with_thinking(msg, thinking_enabled)
|
|
153
|
+
if msg.content.is_a?(RubyLLM::Content::Raw)
|
|
154
|
+
content_blocks = msg.content.value
|
|
155
|
+
content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
|
|
156
|
+
content_blocks = prepend_thinking_block(content_blocks, msg, thinking_enabled)
|
|
157
|
+
|
|
158
|
+
return { role: 'assistant', content: content_blocks }
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
content_blocks = prepend_thinking_block([], msg, thinking_enabled)
|
|
162
|
+
content_blocks << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
|
|
163
|
+
|
|
164
|
+
msg.tool_calls.each_value do |tool_call|
|
|
165
|
+
content_blocks << {
|
|
166
|
+
type: 'tool_use',
|
|
167
|
+
id: tool_call.id,
|
|
168
|
+
name: tool_call.name,
|
|
169
|
+
input: tool_call.arguments
|
|
170
|
+
}
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
{
|
|
174
|
+
role: 'assistant',
|
|
175
|
+
content: content_blocks
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def prepend_thinking_block(content_blocks, msg, thinking_enabled)
|
|
180
|
+
return content_blocks unless thinking_enabled
|
|
181
|
+
|
|
182
|
+
thinking_block = build_thinking_block(msg.thinking)
|
|
183
|
+
content_blocks.unshift(thinking_block) if thinking_block
|
|
184
|
+
|
|
185
|
+
content_blocks
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def build_thinking_block(thinking)
|
|
189
|
+
return nil unless thinking
|
|
190
|
+
|
|
191
|
+
if thinking.text
|
|
192
|
+
{
|
|
193
|
+
type: 'thinking',
|
|
194
|
+
thinking: thinking.text,
|
|
195
|
+
signature: thinking.signature
|
|
196
|
+
}.compact
|
|
197
|
+
elsif thinking.signature
|
|
198
|
+
{
|
|
199
|
+
type: 'redacted_thinking',
|
|
200
|
+
data: thinking.signature
|
|
201
|
+
}
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def append_formatted_content(content_blocks, content)
|
|
206
|
+
formatted_content = Media.format_content(content)
|
|
207
|
+
if formatted_content.is_a?(Array)
|
|
208
|
+
content_blocks.concat(formatted_content)
|
|
209
|
+
else
|
|
210
|
+
content_blocks << formatted_content
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
116
214
|
def convert_role(role)
|
|
117
215
|
case role
|
|
118
216
|
when :tool, :user then 'user'
|
|
119
217
|
else 'assistant'
|
|
120
218
|
end
|
|
121
219
|
end
|
|
220
|
+
|
|
221
|
+
def build_thinking_payload(thinking)
|
|
222
|
+
return nil unless thinking&.enabled?
|
|
223
|
+
|
|
224
|
+
budget = resolve_budget(thinking)
|
|
225
|
+
raise ArgumentError, 'Anthropic thinking requires a budget' if budget.nil?
|
|
226
|
+
|
|
227
|
+
{
|
|
228
|
+
type: 'enabled',
|
|
229
|
+
budget_tokens: budget
|
|
230
|
+
}
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def resolve_budget(thinking)
|
|
234
|
+
budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
|
|
235
|
+
budget.is_a?(Integer) ? budget : nil
|
|
236
|
+
end
|
|
122
237
|
end
|
|
123
238
|
end
|
|
124
239
|
end
|
|
@@ -12,10 +12,16 @@ module RubyLLM
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def build_chunk(data)
|
|
15
|
+
delta_type = data.dig('delta', 'type')
|
|
16
|
+
|
|
15
17
|
Chunk.new(
|
|
16
18
|
role: :assistant,
|
|
17
19
|
model_id: extract_model_id(data),
|
|
18
|
-
content: data
|
|
20
|
+
content: extract_content_delta(data, delta_type),
|
|
21
|
+
thinking: Thinking.build(
|
|
22
|
+
text: extract_thinking_delta(data, delta_type),
|
|
23
|
+
signature: extract_signature_delta(data, delta_type)
|
|
24
|
+
),
|
|
19
25
|
input_tokens: extract_input_tokens(data),
|
|
20
26
|
output_tokens: extract_output_tokens(data),
|
|
21
27
|
cached_tokens: extract_cached_tokens(data),
|
|
@@ -24,6 +30,24 @@ module RubyLLM
|
|
|
24
30
|
)
|
|
25
31
|
end
|
|
26
32
|
|
|
33
|
+
def extract_content_delta(data, delta_type)
|
|
34
|
+
return data.dig('delta', 'text') if delta_type == 'text_delta'
|
|
35
|
+
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def extract_thinking_delta(data, delta_type)
|
|
40
|
+
return data.dig('delta', 'thinking') if delta_type == 'thinking_delta'
|
|
41
|
+
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def extract_signature_delta(data, delta_type)
|
|
46
|
+
return data.dig('delta', 'signature') if delta_type == 'signature_delta'
|
|
47
|
+
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
27
51
|
def json_delta?(data)
|
|
28
52
|
data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
|
|
29
53
|
end
|
|
@@ -16,46 +16,89 @@ module RubyLLM
|
|
|
16
16
|
Anthropic::Chat.parse_completion_response response
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
def format_message(msg)
|
|
19
|
+
def format_message(msg, thinking: nil)
|
|
20
|
+
thinking_enabled = thinking&.enabled?
|
|
21
|
+
|
|
20
22
|
if msg.tool_call?
|
|
21
|
-
|
|
23
|
+
format_tool_call_with_thinking(msg, thinking_enabled)
|
|
22
24
|
elsif msg.tool_result?
|
|
23
25
|
Anthropic::Tools.format_tool_result(msg)
|
|
24
26
|
else
|
|
25
|
-
|
|
27
|
+
format_basic_message_with_thinking(msg, thinking_enabled)
|
|
26
28
|
end
|
|
27
29
|
end
|
|
28
30
|
|
|
29
|
-
def format_basic_message(msg)
|
|
30
|
-
{
|
|
31
|
-
role: Anthropic::Chat.convert_role(msg.role),
|
|
32
|
-
content: Media.format_content(msg.content)
|
|
33
|
-
}
|
|
34
|
-
end
|
|
35
|
-
|
|
36
31
|
private
|
|
37
32
|
|
|
38
33
|
def completion_url
|
|
39
34
|
"model/#{@model_id}/invoke"
|
|
40
35
|
end
|
|
41
36
|
|
|
42
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
|
|
37
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
|
|
43
38
|
@model_id = model.id
|
|
44
39
|
|
|
45
40
|
system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
|
|
46
41
|
system_content = Anthropic::Chat.build_system_content(system_messages)
|
|
47
42
|
|
|
48
|
-
build_base_payload(chat_messages, model).tap do |payload|
|
|
43
|
+
build_base_payload(chat_messages, model, thinking).tap do |payload|
|
|
49
44
|
Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
|
|
50
45
|
end
|
|
51
46
|
end
|
|
52
47
|
|
|
53
|
-
def build_base_payload(chat_messages, model)
|
|
54
|
-
{
|
|
48
|
+
def build_base_payload(chat_messages, model, thinking)
|
|
49
|
+
payload = {
|
|
55
50
|
anthropic_version: 'bedrock-2023-05-31',
|
|
56
|
-
messages: chat_messages.map { |msg| format_message(msg) },
|
|
51
|
+
messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
|
|
57
52
|
max_tokens: model.max_tokens || 4096
|
|
58
53
|
}
|
|
54
|
+
|
|
55
|
+
thinking_payload = Anthropic::Chat.build_thinking_payload(thinking)
|
|
56
|
+
payload[:thinking] = thinking_payload if thinking_payload
|
|
57
|
+
|
|
58
|
+
payload
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def format_basic_message_with_thinking(msg, thinking_enabled)
|
|
62
|
+
content_blocks = []
|
|
63
|
+
|
|
64
|
+
if msg.role == :assistant && thinking_enabled
|
|
65
|
+
thinking_block = Anthropic::Chat.build_thinking_block(msg.thinking)
|
|
66
|
+
content_blocks << thinking_block if thinking_block
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
Anthropic::Chat.append_formatted_content(content_blocks, msg.content)
|
|
70
|
+
|
|
71
|
+
{
|
|
72
|
+
role: Anthropic::Chat.convert_role(msg.role),
|
|
73
|
+
content: content_blocks
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def format_tool_call_with_thinking(msg, thinking_enabled)
|
|
78
|
+
if msg.content.is_a?(RubyLLM::Content::Raw)
|
|
79
|
+
content_blocks = msg.content.value
|
|
80
|
+
content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
|
|
81
|
+
content_blocks = Anthropic::Chat.prepend_thinking_block(content_blocks, msg, thinking_enabled)
|
|
82
|
+
|
|
83
|
+
return { role: 'assistant', content: content_blocks }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
content_blocks = Anthropic::Chat.prepend_thinking_block([], msg, thinking_enabled)
|
|
87
|
+
content_blocks << Anthropic::Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
|
|
88
|
+
|
|
89
|
+
msg.tool_calls.each_value do |tool_call|
|
|
90
|
+
content_blocks << {
|
|
91
|
+
type: 'tool_use',
|
|
92
|
+
id: tool_call.id,
|
|
93
|
+
name: tool_call.name,
|
|
94
|
+
input: tool_call.arguments
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
{
|
|
99
|
+
role: 'assistant',
|
|
100
|
+
content: content_blocks
|
|
101
|
+
}
|
|
59
102
|
end
|
|
60
103
|
end
|
|
61
104
|
end
|
|
@@ -16,6 +16,31 @@ module RubyLLM
|
|
|
16
16
|
extract_content_by_type(data)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
+
def extract_thinking_delta(data)
|
|
20
|
+
return nil unless data.is_a?(Hash)
|
|
21
|
+
|
|
22
|
+
if data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'thinking_delta'
|
|
23
|
+
return data.dig('delta', 'thinking')
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
if data['type'] == 'content_block_start' && data.dig('content_block', 'type') == 'thinking'
|
|
27
|
+
return data.dig('content_block', 'thinking') || data.dig('content_block', 'text')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def extract_signature_delta(data)
|
|
34
|
+
return nil unless data.is_a?(Hash)
|
|
35
|
+
|
|
36
|
+
signature = extract_signature_from_delta(data)
|
|
37
|
+
return signature if signature
|
|
38
|
+
|
|
39
|
+
return nil unless data['type'] == 'content_block_start'
|
|
40
|
+
|
|
41
|
+
extract_signature_from_block(data['content_block'])
|
|
42
|
+
end
|
|
43
|
+
|
|
19
44
|
def extract_tool_calls(data)
|
|
20
45
|
data.dig('message', 'tool_calls') || data['tool_calls']
|
|
21
46
|
end
|
|
@@ -47,6 +72,17 @@ module RubyLLM
|
|
|
47
72
|
breakdown.values.compact.sum
|
|
48
73
|
end
|
|
49
74
|
|
|
75
|
+
def extract_thinking_tokens(data)
|
|
76
|
+
data.dig('message', 'usage', 'thinking_tokens') ||
|
|
77
|
+
data.dig('message', 'usage', 'output_tokens_details', 'thinking_tokens') ||
|
|
78
|
+
data.dig('usage', 'thinking_tokens') ||
|
|
79
|
+
data.dig('usage', 'output_tokens_details', 'thinking_tokens') ||
|
|
80
|
+
data.dig('message', 'usage', 'reasoning_tokens') ||
|
|
81
|
+
data.dig('message', 'usage', 'output_tokens_details', 'reasoning_tokens') ||
|
|
82
|
+
data.dig('usage', 'reasoning_tokens') ||
|
|
83
|
+
data.dig('usage', 'output_tokens_details', 'reasoning_tokens')
|
|
84
|
+
end
|
|
85
|
+
|
|
50
86
|
private
|
|
51
87
|
|
|
52
88
|
def extract_content_by_type(data)
|
|
@@ -58,11 +94,32 @@ module RubyLLM
|
|
|
58
94
|
end
|
|
59
95
|
|
|
60
96
|
def extract_block_start_content(data)
|
|
61
|
-
data
|
|
97
|
+
content_block = data['content_block'] || {}
|
|
98
|
+
return '' if %w[thinking redacted_thinking].include?(content_block['type'])
|
|
99
|
+
|
|
100
|
+
content_block['text'].to_s
|
|
62
101
|
end
|
|
63
102
|
|
|
64
103
|
def extract_delta_content(data)
|
|
65
|
-
data
|
|
104
|
+
delta = data['delta'] || {}
|
|
105
|
+
return '' if %w[thinking_delta signature_delta].include?(delta['type'])
|
|
106
|
+
|
|
107
|
+
delta['text'].to_s
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def extract_signature_from_delta(data)
|
|
111
|
+
return unless data['type'] == 'content_block_delta'
|
|
112
|
+
return unless data.dig('delta', 'type') == 'signature_delta'
|
|
113
|
+
|
|
114
|
+
data.dig('delta', 'signature')
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def extract_signature_from_block(content_block)
|
|
118
|
+
block = content_block || {}
|
|
119
|
+
return block['signature'] if block['type'] == 'thinking' && block['signature']
|
|
120
|
+
return block['data'] if block['type'] == 'redacted_thinking'
|
|
121
|
+
|
|
122
|
+
nil
|
|
66
123
|
end
|
|
67
124
|
end
|
|
68
125
|
end
|