ruby_llm 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -2
  3. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  4. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
  5. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  6. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  7. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
  8. data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
  9. data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
  10. data/lib/ruby_llm/active_record/message_methods.rb +41 -8
  11. data/lib/ruby_llm/aliases.json +101 -21
  12. data/lib/ruby_llm/chat.rb +10 -7
  13. data/lib/ruby_llm/configuration.rb +1 -1
  14. data/lib/ruby_llm/message.rb +37 -11
  15. data/lib/ruby_llm/models.json +21119 -10230
  16. data/lib/ruby_llm/models.rb +271 -27
  17. data/lib/ruby_llm/models_schema.json +2 -2
  18. data/lib/ruby_llm/provider.rb +4 -3
  19. data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
  20. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  21. data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
  22. data/lib/ruby_llm/providers/bedrock/models.rb +21 -15
  23. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
  24. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
  25. data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
  26. data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
  27. data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
  28. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  29. data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
  30. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  31. data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
  32. data/lib/ruby_llm/providers/openai/chat.rb +87 -3
  33. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  34. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  35. data/lib/ruby_llm/providers/openai.rb +1 -1
  36. data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
  37. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  38. data/lib/ruby_llm/providers/openrouter.rb +2 -0
  39. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  40. data/lib/ruby_llm/stream_accumulator.rb +111 -14
  41. data/lib/ruby_llm/streaming.rb +76 -54
  42. data/lib/ruby_llm/thinking.rb +49 -0
  43. data/lib/ruby_llm/tokens.rb +47 -0
  44. data/lib/ruby_llm/tool.rb +1 -1
  45. data/lib/ruby_llm/tool_call.rb +6 -3
  46. data/lib/ruby_llm/version.rb +1 -1
  47. data/lib/tasks/models.rake +20 -13
  48. metadata +12 -5
@@ -16,46 +16,89 @@ module RubyLLM
16
16
  Anthropic::Chat.parse_completion_response response
17
17
  end
18
18
 
19
- def format_message(msg)
19
+ def format_message(msg, thinking: nil)
20
+ thinking_enabled = thinking&.enabled?
21
+
20
22
  if msg.tool_call?
21
- Anthropic::Tools.format_tool_call(msg)
23
+ format_tool_call_with_thinking(msg, thinking_enabled)
22
24
  elsif msg.tool_result?
23
25
  Anthropic::Tools.format_tool_result(msg)
24
26
  else
25
- format_basic_message(msg)
27
+ format_basic_message_with_thinking(msg, thinking_enabled)
26
28
  end
27
29
  end
28
30
 
29
- def format_basic_message(msg)
30
- {
31
- role: Anthropic::Chat.convert_role(msg.role),
32
- content: Media.format_content(msg.content)
33
- }
34
- end
35
-
36
31
  private
37
32
 
38
33
  def completion_url
39
34
  "model/#{@model_id}/invoke"
40
35
  end
41
36
 
42
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
37
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
43
38
  @model_id = model.id
44
39
 
45
40
  system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
46
41
  system_content = Anthropic::Chat.build_system_content(system_messages)
47
42
 
48
- build_base_payload(chat_messages, model).tap do |payload|
43
+ build_base_payload(chat_messages, model, thinking).tap do |payload|
49
44
  Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
50
45
  end
51
46
  end
52
47
 
53
- def build_base_payload(chat_messages, model)
54
- {
48
+ def build_base_payload(chat_messages, model, thinking)
49
+ payload = {
55
50
  anthropic_version: 'bedrock-2023-05-31',
56
- messages: chat_messages.map { |msg| format_message(msg) },
51
+ messages: chat_messages.map { |msg| format_message(msg, thinking: thinking) },
57
52
  max_tokens: model.max_tokens || 4096
58
53
  }
54
+
55
+ thinking_payload = Anthropic::Chat.build_thinking_payload(thinking)
56
+ payload[:thinking] = thinking_payload if thinking_payload
57
+
58
+ payload
59
+ end
60
+
61
+ def format_basic_message_with_thinking(msg, thinking_enabled)
62
+ content_blocks = []
63
+
64
+ if msg.role == :assistant && thinking_enabled
65
+ thinking_block = Anthropic::Chat.build_thinking_block(msg.thinking)
66
+ content_blocks << thinking_block if thinking_block
67
+ end
68
+
69
+ Anthropic::Chat.append_formatted_content(content_blocks, msg.content)
70
+
71
+ {
72
+ role: Anthropic::Chat.convert_role(msg.role),
73
+ content: content_blocks
74
+ }
75
+ end
76
+
77
+ def format_tool_call_with_thinking(msg, thinking_enabled)
78
+ if msg.content.is_a?(RubyLLM::Content::Raw)
79
+ content_blocks = msg.content.value
80
+ content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
81
+ content_blocks = Anthropic::Chat.prepend_thinking_block(content_blocks, msg, thinking_enabled)
82
+
83
+ return { role: 'assistant', content: content_blocks }
84
+ end
85
+
86
+ content_blocks = Anthropic::Chat.prepend_thinking_block([], msg, thinking_enabled)
87
+ content_blocks << Anthropic::Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
88
+
89
+ msg.tool_calls.each_value do |tool_call|
90
+ content_blocks << {
91
+ type: 'tool_use',
92
+ id: tool_call.id,
93
+ name: tool_call.name,
94
+ input: tool_call.arguments
95
+ }
96
+ end
97
+
98
+ {
99
+ role: 'assistant',
100
+ content: content_blocks
101
+ }
59
102
  end
60
103
  end
61
104
  end
@@ -69,28 +69,34 @@ module RubyLLM
69
69
  end
70
70
 
71
71
  def model_id_with_region(model_id, model_data)
72
- return model_id unless model_data['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
73
- return model_id if model_data['inferenceTypesSupported']&.include?('ON_DEMAND')
72
+ normalize_inference_profile_id(
73
+ model_id,
74
+ model_data['inferenceTypesSupported'],
75
+ @config.bedrock_region
76
+ )
77
+ end
74
78
 
75
- desired_region_prefix = inference_profile_region_prefix
79
+ def region_prefix(region)
80
+ region = region.to_s
81
+ return 'us' if region.empty?
76
82
 
77
- # Return unchanged if model already has the correct region prefix
78
- return model_id if model_id.start_with?("#{desired_region_prefix}.")
83
+ region[0, 2]
84
+ end
79
85
 
80
- # Remove any existing region prefix (e.g., "us.", "eu.", "ap.")
81
- clean_model_id = model_id.sub(/^[a-z]{2}\./, '')
86
+ def with_region_prefix(model_id, region)
87
+ desired_prefix = region_prefix(region)
88
+ return model_id if model_id.start_with?("#{desired_prefix}.")
82
89
 
83
- # Apply the desired region prefix
84
- "#{desired_region_prefix}.#{clean_model_id}"
90
+ clean_model_id = model_id.sub(/^[a-z]{2}\./, '')
91
+ "#{desired_prefix}.#{clean_model_id}"
85
92
  end
86
93
 
87
- def inference_profile_region_prefix
88
- # Extract region prefix from bedrock_region (e.g., "eu-west-3" -> "eu")
89
- region = @config.bedrock_region.to_s
90
- return 'us' if region.empty? # Default fallback
94
+ def normalize_inference_profile_id(model_id, inference_types, region)
95
+ types = Array(inference_types)
96
+ return model_id unless types.include?('INFERENCE_PROFILE')
97
+ return model_id if types.include?('ON_DEMAND')
91
98
 
92
- # Take first two characters as the region prefix
93
- region[0, 2]
99
+ with_region_prefix(model_id, region)
94
100
  end
95
101
  end
96
102
  end
@@ -16,6 +16,31 @@ module RubyLLM
16
16
  extract_content_by_type(data)
17
17
  end
18
18
 
19
+ def extract_thinking_delta(data)
20
+ return nil unless data.is_a?(Hash)
21
+
22
+ if data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'thinking_delta'
23
+ return data.dig('delta', 'thinking')
24
+ end
25
+
26
+ if data['type'] == 'content_block_start' && data.dig('content_block', 'type') == 'thinking'
27
+ return data.dig('content_block', 'thinking') || data.dig('content_block', 'text')
28
+ end
29
+
30
+ nil
31
+ end
32
+
33
+ def extract_signature_delta(data)
34
+ return nil unless data.is_a?(Hash)
35
+
36
+ signature = extract_signature_from_delta(data)
37
+ return signature if signature
38
+
39
+ return nil unless data['type'] == 'content_block_start'
40
+
41
+ extract_signature_from_block(data['content_block'])
42
+ end
43
+
19
44
  def extract_tool_calls(data)
20
45
  data.dig('message', 'tool_calls') || data['tool_calls']
21
46
  end
@@ -47,6 +72,17 @@ module RubyLLM
47
72
  breakdown.values.compact.sum
48
73
  end
49
74
 
75
+ def extract_thinking_tokens(data)
76
+ data.dig('message', 'usage', 'thinking_tokens') ||
77
+ data.dig('message', 'usage', 'output_tokens_details', 'thinking_tokens') ||
78
+ data.dig('usage', 'thinking_tokens') ||
79
+ data.dig('usage', 'output_tokens_details', 'thinking_tokens') ||
80
+ data.dig('message', 'usage', 'reasoning_tokens') ||
81
+ data.dig('message', 'usage', 'output_tokens_details', 'reasoning_tokens') ||
82
+ data.dig('usage', 'reasoning_tokens') ||
83
+ data.dig('usage', 'output_tokens_details', 'reasoning_tokens')
84
+ end
85
+
50
86
  private
51
87
 
52
88
  def extract_content_by_type(data)
@@ -58,11 +94,32 @@ module RubyLLM
58
94
  end
59
95
 
60
96
  def extract_block_start_content(data)
61
- data.dig('content_block', 'text').to_s
97
+ content_block = data['content_block'] || {}
98
+ return '' if %w[thinking redacted_thinking].include?(content_block['type'])
99
+
100
+ content_block['text'].to_s
62
101
  end
63
102
 
64
103
  def extract_delta_content(data)
65
- data.dig('delta', 'text').to_s
104
+ delta = data['delta'] || {}
105
+ return '' if %w[thinking_delta signature_delta].include?(delta['type'])
106
+
107
+ delta['text'].to_s
108
+ end
109
+
110
+ def extract_signature_from_delta(data)
111
+ return unless data['type'] == 'content_block_delta'
112
+ return unless data.dig('delta', 'type') == 'signature_delta'
113
+
114
+ data.dig('delta', 'signature')
115
+ end
116
+
117
+ def extract_signature_from_block(content_block)
118
+ block = content_block || {}
119
+ return block['signature'] if block['type'] == 'thinking' && block['signature']
120
+ return block['data'] if block['type'] == 'redacted_thinking'
121
+
122
+ nil
66
123
  end
67
124
  end
68
125
  end
@@ -57,10 +57,15 @@ module RubyLLM
57
57
  role: :assistant,
58
58
  model_id: extract_model_id(data),
59
59
  content: extract_streaming_content(data),
60
+ thinking: Thinking.build(
61
+ text: extract_thinking_delta(data),
62
+ signature: extract_signature_delta(data)
63
+ ),
60
64
  input_tokens: extract_input_tokens(data),
61
65
  output_tokens: extract_output_tokens(data),
62
66
  cached_tokens: extract_cached_tokens(data),
63
67
  cache_creation_tokens: extract_cache_creation_tokens(data),
68
+ thinking_tokens: extract_thinking_tokens(data),
64
69
  tool_calls: extract_tool_calls(data)
65
70
  }
66
71
  end
@@ -14,7 +14,7 @@ module RubyLLM
14
14
  "models/#{@model}:generateContent"
15
15
  end
16
16
 
17
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
17
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
18
18
  @model = model.id
19
19
  payload = {
20
20
  contents: format_messages(messages),
@@ -24,11 +24,30 @@ module RubyLLM
24
24
  payload[:generationConfig][:temperature] = temperature unless temperature.nil?
25
25
 
26
26
  payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
27
+ payload[:generationConfig][:thinkingConfig] = build_thinking_config(model, thinking) if thinking&.enabled?
27
28
 
28
29
  payload[:tools] = format_tools(tools) if tools.any?
29
30
  payload
30
31
  end
31
32
 
33
+ def build_thinking_config(_model, thinking)
34
+ config = { includeThoughts: true }
35
+
36
+ config[:thinkingLevel] = resolve_effort_level(thinking) if thinking&.effort
37
+ config[:thinkingBudget] = resolve_budget(thinking) if thinking&.budget
38
+
39
+ config
40
+ end
41
+
42
+ def resolve_effort_level(thinking)
43
+ thinking.respond_to?(:effort) ? thinking.effort : thinking
44
+ end
45
+
46
+ def resolve_budget(thinking)
47
+ budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
48
+ budget.is_a?(Integer) ? budget : nil
49
+ end
50
+
32
51
  private
33
52
 
34
53
  def format_messages(messages)
@@ -56,20 +75,43 @@ module RubyLLM
56
75
  elsif msg.tool_result?
57
76
  format_tool_result(msg)
58
77
  else
59
- Media.format_content(msg.content)
78
+ format_message_parts(msg)
60
79
  end
61
80
  end
62
81
 
82
+ def format_message_parts(msg)
83
+ parts = []
84
+
85
+ parts << build_thought_part(msg.thinking) if msg.role == :assistant && msg.thinking
86
+
87
+ content_parts = Media.format_content(msg.content)
88
+ parts.concat(content_parts.is_a?(Array) ? content_parts : [content_parts])
89
+ parts
90
+ end
91
+
92
+ def build_thought_part(thinking)
93
+ part = { thought: true }
94
+ part[:text] = thinking.text if thinking.text
95
+ part[:thoughtSignature] = thinking.signature if thinking.signature
96
+ part
97
+ end
98
+
63
99
  def parse_completion_response(response)
64
100
  data = response.body
101
+ parts = data.dig('candidates', 0, 'content', 'parts') || []
65
102
  tool_calls = extract_tool_calls(data)
66
103
 
67
104
  Message.new(
68
105
  role: :assistant,
69
- content: parse_content(data),
106
+ content: extract_text_parts(parts) || parse_content(data),
107
+ thinking: Thinking.build(
108
+ text: extract_thought_parts(parts),
109
+ signature: extract_thought_signature(parts)
110
+ ),
70
111
  tool_calls: tool_calls,
71
112
  input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
72
113
  output_tokens: calculate_output_tokens(data),
114
+ thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
73
115
  model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
74
116
  raw: response
75
117
  )
@@ -93,6 +135,30 @@ module RubyLLM
93
135
  build_response_content(parts)
94
136
  end
95
137
 
138
+ def extract_text_parts(parts)
139
+ text_parts = parts.reject { |p| p['thought'] }
140
+ content = text_parts.filter_map { |p| p['text'] }.join
141
+ content.empty? ? nil : content
142
+ end
143
+
144
+ def extract_thought_parts(parts)
145
+ thought_parts = parts.select { |p| p['thought'] }
146
+ thoughts = thought_parts.filter_map { |p| p['text'] }.join
147
+ thoughts.empty? ? nil : thoughts
148
+ end
149
+
150
+ def extract_thought_signature(parts)
151
+ parts.each do |part|
152
+ signature = part['thoughtSignature'] ||
153
+ part['thought_signature'] ||
154
+ part.dig('functionCall', 'thoughtSignature') ||
155
+ part.dig('functionCall', 'thought_signature')
156
+ return signature if signature
157
+ end
158
+
159
+ nil
160
+ end
161
+
96
162
  def function_call?(candidate)
97
163
  parts = candidate.dig('content', 'parts')
98
164
  parts&.any? { |p| p['functionCall'] }
@@ -10,12 +10,19 @@ module RubyLLM
10
10
  end
11
11
 
12
12
  def build_chunk(data)
13
+ parts = data.dig('candidates', 0, 'content', 'parts') || []
14
+
13
15
  Chunk.new(
14
16
  role: :assistant,
15
17
  model_id: extract_model_id(data),
16
- content: extract_content(data),
18
+ content: extract_text_content(parts),
19
+ thinking: Thinking.build(
20
+ text: extract_thought_content(parts),
21
+ signature: extract_thought_signature(parts)
22
+ ),
17
23
  input_tokens: extract_input_tokens(data),
18
24
  output_tokens: extract_output_tokens(data),
25
+ thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
19
26
  tool_calls: extract_tool_calls(data)
20
27
  )
21
28
  end
@@ -26,6 +33,30 @@ module RubyLLM
26
33
  data['modelVersion']
27
34
  end
28
35
 
36
+ def extract_text_content(parts)
37
+ text_parts = parts.reject { |p| p['thought'] }
38
+ text = text_parts.filter_map { |p| p['text'] }.join
39
+ text.empty? ? nil : text
40
+ end
41
+
42
+ def extract_thought_content(parts)
43
+ thought_parts = parts.select { |p| p['thought'] }
44
+ thoughts = thought_parts.filter_map { |p| p['text'] }.join
45
+ thoughts.empty? ? nil : thoughts
46
+ end
47
+
48
+ def extract_thought_signature(parts)
49
+ parts.each do |part|
50
+ signature = part['thoughtSignature'] ||
51
+ part['thought_signature'] ||
52
+ part.dig('functionCall', 'thoughtSignature') ||
53
+ part.dig('functionCall', 'thought_signature')
54
+ return signature if signature
55
+ end
56
+
57
+ nil
58
+ end
59
+
29
60
  def extract_content(data)
30
61
  return nil unless data['candidates']&.any?
31
62
 
@@ -13,7 +13,7 @@ module RubyLLM
13
13
  }]
14
14
  end
15
15
 
16
- def format_tool_call(msg)
16
+ def format_tool_call(msg) # rubocop:disable Metrics/PerceivedComplexity
17
17
  parts = []
18
18
 
19
19
  if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
@@ -21,13 +21,24 @@ module RubyLLM
21
21
  parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
22
22
  end
23
23
 
24
+ fallback_signature = msg.thinking&.signature
25
+ used_fallback = false
26
+
24
27
  msg.tool_calls.each_value do |tool_call|
25
- parts << {
28
+ part = {
26
29
  functionCall: {
27
30
  name: tool_call.name,
28
31
  args: tool_call.arguments
29
32
  }
30
33
  }
34
+
35
+ signature = tool_call.thought_signature
36
+ if signature.nil? && fallback_signature && !used_fallback
37
+ signature = fallback_signature
38
+ used_fallback = true
39
+ end
40
+ part[:thoughtSignature] = signature if signature
41
+ parts << part
31
42
  end
32
43
 
33
44
  parts
@@ -61,11 +72,13 @@ module RubyLLM
61
72
  next unless function_data
62
73
 
63
74
  id = SecureRandom.uuid
75
+ thought_signature = part['thoughtSignature'] || part['thought_signature']
64
76
 
65
77
  result[id] = ToolCall.new(
66
78
  id:,
67
79
  name: function_data['name'],
68
- arguments: function_data['args'] || {}
80
+ arguments: function_data['args'] || {},
81
+ thought_signature: thought_signature
69
82
  )
70
83
  end
71
84
 
@@ -14,7 +14,7 @@ module RubyLLM
14
14
  content: GPUStack::Media.format_content(msg.content),
15
15
  tool_calls: format_tool_calls(msg.tool_calls),
16
16
  tool_call_id: msg.tool_call_id
17
- }.compact
17
+ }.compact.merge(OpenAI::Chat.format_thinking(msg))
18
18
  end
19
19
  end
20
20
 
@@ -11,13 +11,70 @@ module RubyLLM
11
11
  role.to_s
12
12
  end
13
13
 
14
+ def format_messages(messages)
15
+ messages.map do |msg|
16
+ {
17
+ role: format_role(msg.role),
18
+ content: format_content_with_thinking(msg),
19
+ tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
20
+ tool_call_id: msg.tool_call_id
21
+ }.compact
22
+ end
23
+ end
24
+
14
25
  # rubocop:disable Metrics/ParameterLists
15
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
26
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil)
16
27
  payload = super
17
28
  payload.delete(:stream_options)
29
+ payload.delete(:reasoning_effort)
30
+ warn_on_unsupported_thinking(model, thinking)
18
31
  payload
19
32
  end
20
33
  # rubocop:enable Metrics/ParameterLists
34
+
35
+ def format_content_with_thinking(msg)
36
+ formatted_content = OpenAI::Media.format_content(msg.content)
37
+ return formatted_content unless msg.role == :assistant && msg.thinking
38
+
39
+ content_blocks = build_thinking_blocks(msg.thinking)
40
+ append_formatted_content(content_blocks, formatted_content)
41
+
42
+ content_blocks
43
+ end
44
+
45
+ def warn_on_unsupported_thinking(model, thinking)
46
+ return unless thinking&.enabled?
47
+ return if model.id.to_s.include?('magistral')
48
+
49
+ RubyLLM.logger.warn(
50
+ 'Mistral thinking is only supported on Magistral models. ' \
51
+ "Ignoring thinking settings for #{model.id}."
52
+ )
53
+ end
54
+
55
+ def build_thinking_blocks(thinking)
56
+ return [] unless thinking
57
+
58
+ if thinking.text
59
+ [{
60
+ type: 'thinking',
61
+ thinking: [{ type: 'text', text: thinking.text }],
62
+ signature: thinking.signature
63
+ }.compact]
64
+ elsif thinking.signature
65
+ [{ type: 'thinking', signature: thinking.signature }]
66
+ else
67
+ []
68
+ end
69
+ end
70
+
71
+ def append_formatted_content(content_blocks, formatted_content)
72
+ if formatted_content.is_a?(Array)
73
+ content_blocks.concat(formatted_content)
74
+ elsif formatted_content
75
+ content_blocks << { type: 'text', text: formatted_content }
76
+ end
77
+ end
21
78
  end
22
79
  end
23
80
  end
@@ -14,7 +14,7 @@ module RubyLLM
14
14
  content: Ollama::Media.format_content(msg.content),
15
15
  tool_calls: format_tool_calls(msg.tool_calls),
16
16
  tool_call_id: msg.tool_call_id
17
- }.compact
17
+ }.compact.merge(OpenAI::Chat.format_thinking(msg))
18
18
  end
19
19
  end
20
20
 
@@ -224,8 +224,8 @@ module RubyLLM
224
224
  end
225
225
 
226
226
  def self.normalize_temperature(temperature, model_id)
227
- if model_id.match?(/^(o\d|gpt-5)/)
228
- RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
227
+ if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
228
+ RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
229
229
  1.0
230
230
  elsif model_id.match?(/-search/)
231
231
  RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
@@ -235,6 +235,10 @@ module RubyLLM
235
235
  end
236
236
  end
237
237
 
238
+ def self.temperature_close_to_one?(temperature)
239
+ (temperature.to_f - 1.0).abs <= Float::EPSILON
240
+ end
241
+
238
242
  def modalities_for(model_id)
239
243
  modalities = {
240
244
  input: ['text'],