ruby_llm 1.9.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
- data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
- data/lib/ruby_llm/active_record/message_methods.rb +41 -8
- data/lib/ruby_llm/aliases.json +0 -12
- data/lib/ruby_llm/chat.rb +10 -7
- data/lib/ruby_llm/configuration.rb +1 -1
- data/lib/ruby_llm/message.rb +37 -11
- data/lib/ruby_llm/models.json +1059 -857
- data/lib/ruby_llm/models.rb +134 -12
- data/lib/ruby_llm/provider.rb +4 -3
- data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
- data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
- data/lib/ruby_llm/providers/openai/chat.rb +87 -3
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +2 -0
- data/lib/ruby_llm/providers/vertexai.rb +5 -1
- data/lib/ruby_llm/stream_accumulator.rb +111 -14
- data/lib/ruby_llm/streaming.rb +54 -51
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +19 -12
- metadata +12 -5
|
@@ -57,10 +57,15 @@ module RubyLLM
|
|
|
57
57
|
role: :assistant,
|
|
58
58
|
model_id: extract_model_id(data),
|
|
59
59
|
content: extract_streaming_content(data),
|
|
60
|
+
thinking: Thinking.build(
|
|
61
|
+
text: extract_thinking_delta(data),
|
|
62
|
+
signature: extract_signature_delta(data)
|
|
63
|
+
),
|
|
60
64
|
input_tokens: extract_input_tokens(data),
|
|
61
65
|
output_tokens: extract_output_tokens(data),
|
|
62
66
|
cached_tokens: extract_cached_tokens(data),
|
|
63
67
|
cache_creation_tokens: extract_cache_creation_tokens(data),
|
|
68
|
+
thinking_tokens: extract_thinking_tokens(data),
|
|
64
69
|
tool_calls: extract_tool_calls(data)
|
|
65
70
|
}
|
|
66
71
|
end
|
|
@@ -14,7 +14,7 @@ module RubyLLM
|
|
|
14
14
|
"models/#{@model}:generateContent"
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
17
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
|
18
18
|
@model = model.id
|
|
19
19
|
payload = {
|
|
20
20
|
contents: format_messages(messages),
|
|
@@ -24,11 +24,30 @@ module RubyLLM
|
|
|
24
24
|
payload[:generationConfig][:temperature] = temperature unless temperature.nil?
|
|
25
25
|
|
|
26
26
|
payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
|
|
27
|
+
payload[:generationConfig][:thinkingConfig] = build_thinking_config(model, thinking) if thinking&.enabled?
|
|
27
28
|
|
|
28
29
|
payload[:tools] = format_tools(tools) if tools.any?
|
|
29
30
|
payload
|
|
30
31
|
end
|
|
31
32
|
|
|
33
|
+
def build_thinking_config(_model, thinking)
|
|
34
|
+
config = { includeThoughts: true }
|
|
35
|
+
|
|
36
|
+
config[:thinkingLevel] = resolve_effort_level(thinking) if thinking&.effort
|
|
37
|
+
config[:thinkingBudget] = resolve_budget(thinking) if thinking&.budget
|
|
38
|
+
|
|
39
|
+
config
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def resolve_effort_level(thinking)
|
|
43
|
+
thinking.respond_to?(:effort) ? thinking.effort : thinking
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def resolve_budget(thinking)
|
|
47
|
+
budget = thinking.respond_to?(:budget) ? thinking.budget : thinking
|
|
48
|
+
budget.is_a?(Integer) ? budget : nil
|
|
49
|
+
end
|
|
50
|
+
|
|
32
51
|
private
|
|
33
52
|
|
|
34
53
|
def format_messages(messages)
|
|
@@ -56,20 +75,43 @@ module RubyLLM
|
|
|
56
75
|
elsif msg.tool_result?
|
|
57
76
|
format_tool_result(msg)
|
|
58
77
|
else
|
|
59
|
-
|
|
78
|
+
format_message_parts(msg)
|
|
60
79
|
end
|
|
61
80
|
end
|
|
62
81
|
|
|
82
|
+
def format_message_parts(msg)
|
|
83
|
+
parts = []
|
|
84
|
+
|
|
85
|
+
parts << build_thought_part(msg.thinking) if msg.role == :assistant && msg.thinking
|
|
86
|
+
|
|
87
|
+
content_parts = Media.format_content(msg.content)
|
|
88
|
+
parts.concat(content_parts.is_a?(Array) ? content_parts : [content_parts])
|
|
89
|
+
parts
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def build_thought_part(thinking)
|
|
93
|
+
part = { thought: true }
|
|
94
|
+
part[:text] = thinking.text if thinking.text
|
|
95
|
+
part[:thoughtSignature] = thinking.signature if thinking.signature
|
|
96
|
+
part
|
|
97
|
+
end
|
|
98
|
+
|
|
63
99
|
def parse_completion_response(response)
|
|
64
100
|
data = response.body
|
|
101
|
+
parts = data.dig('candidates', 0, 'content', 'parts') || []
|
|
65
102
|
tool_calls = extract_tool_calls(data)
|
|
66
103
|
|
|
67
104
|
Message.new(
|
|
68
105
|
role: :assistant,
|
|
69
|
-
content: parse_content(data),
|
|
106
|
+
content: extract_text_parts(parts) || parse_content(data),
|
|
107
|
+
thinking: Thinking.build(
|
|
108
|
+
text: extract_thought_parts(parts),
|
|
109
|
+
signature: extract_thought_signature(parts)
|
|
110
|
+
),
|
|
70
111
|
tool_calls: tool_calls,
|
|
71
112
|
input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
|
|
72
113
|
output_tokens: calculate_output_tokens(data),
|
|
114
|
+
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
|
|
73
115
|
model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
|
|
74
116
|
raw: response
|
|
75
117
|
)
|
|
@@ -93,6 +135,30 @@ module RubyLLM
|
|
|
93
135
|
build_response_content(parts)
|
|
94
136
|
end
|
|
95
137
|
|
|
138
|
+
def extract_text_parts(parts)
|
|
139
|
+
text_parts = parts.reject { |p| p['thought'] }
|
|
140
|
+
content = text_parts.filter_map { |p| p['text'] }.join
|
|
141
|
+
content.empty? ? nil : content
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def extract_thought_parts(parts)
|
|
145
|
+
thought_parts = parts.select { |p| p['thought'] }
|
|
146
|
+
thoughts = thought_parts.filter_map { |p| p['text'] }.join
|
|
147
|
+
thoughts.empty? ? nil : thoughts
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def extract_thought_signature(parts)
|
|
151
|
+
parts.each do |part|
|
|
152
|
+
signature = part['thoughtSignature'] ||
|
|
153
|
+
part['thought_signature'] ||
|
|
154
|
+
part.dig('functionCall', 'thoughtSignature') ||
|
|
155
|
+
part.dig('functionCall', 'thought_signature')
|
|
156
|
+
return signature if signature
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
|
|
96
162
|
def function_call?(candidate)
|
|
97
163
|
parts = candidate.dig('content', 'parts')
|
|
98
164
|
parts&.any? { |p| p['functionCall'] }
|
|
@@ -10,12 +10,19 @@ module RubyLLM
|
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
def build_chunk(data)
|
|
13
|
+
parts = data.dig('candidates', 0, 'content', 'parts') || []
|
|
14
|
+
|
|
13
15
|
Chunk.new(
|
|
14
16
|
role: :assistant,
|
|
15
17
|
model_id: extract_model_id(data),
|
|
16
|
-
content:
|
|
18
|
+
content: extract_text_content(parts),
|
|
19
|
+
thinking: Thinking.build(
|
|
20
|
+
text: extract_thought_content(parts),
|
|
21
|
+
signature: extract_thought_signature(parts)
|
|
22
|
+
),
|
|
17
23
|
input_tokens: extract_input_tokens(data),
|
|
18
24
|
output_tokens: extract_output_tokens(data),
|
|
25
|
+
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
|
|
19
26
|
tool_calls: extract_tool_calls(data)
|
|
20
27
|
)
|
|
21
28
|
end
|
|
@@ -26,6 +33,30 @@ module RubyLLM
|
|
|
26
33
|
data['modelVersion']
|
|
27
34
|
end
|
|
28
35
|
|
|
36
|
+
def extract_text_content(parts)
|
|
37
|
+
text_parts = parts.reject { |p| p['thought'] }
|
|
38
|
+
text = text_parts.filter_map { |p| p['text'] }.join
|
|
39
|
+
text.empty? ? nil : text
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def extract_thought_content(parts)
|
|
43
|
+
thought_parts = parts.select { |p| p['thought'] }
|
|
44
|
+
thoughts = thought_parts.filter_map { |p| p['text'] }.join
|
|
45
|
+
thoughts.empty? ? nil : thoughts
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def extract_thought_signature(parts)
|
|
49
|
+
parts.each do |part|
|
|
50
|
+
signature = part['thoughtSignature'] ||
|
|
51
|
+
part['thought_signature'] ||
|
|
52
|
+
part.dig('functionCall', 'thoughtSignature') ||
|
|
53
|
+
part.dig('functionCall', 'thought_signature')
|
|
54
|
+
return signature if signature
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
nil
|
|
58
|
+
end
|
|
59
|
+
|
|
29
60
|
def extract_content(data)
|
|
30
61
|
return nil unless data['candidates']&.any?
|
|
31
62
|
|
|
@@ -13,7 +13,7 @@ module RubyLLM
|
|
|
13
13
|
}]
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def format_tool_call(msg)
|
|
16
|
+
def format_tool_call(msg) # rubocop:disable Metrics/PerceivedComplexity
|
|
17
17
|
parts = []
|
|
18
18
|
|
|
19
19
|
if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
|
|
@@ -21,13 +21,24 @@ module RubyLLM
|
|
|
21
21
|
parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
+
fallback_signature = msg.thinking&.signature
|
|
25
|
+
used_fallback = false
|
|
26
|
+
|
|
24
27
|
msg.tool_calls.each_value do |tool_call|
|
|
25
|
-
|
|
28
|
+
part = {
|
|
26
29
|
functionCall: {
|
|
27
30
|
name: tool_call.name,
|
|
28
31
|
args: tool_call.arguments
|
|
29
32
|
}
|
|
30
33
|
}
|
|
34
|
+
|
|
35
|
+
signature = tool_call.thought_signature
|
|
36
|
+
if signature.nil? && fallback_signature && !used_fallback
|
|
37
|
+
signature = fallback_signature
|
|
38
|
+
used_fallback = true
|
|
39
|
+
end
|
|
40
|
+
part[:thoughtSignature] = signature if signature
|
|
41
|
+
parts << part
|
|
31
42
|
end
|
|
32
43
|
|
|
33
44
|
parts
|
|
@@ -61,11 +72,13 @@ module RubyLLM
|
|
|
61
72
|
next unless function_data
|
|
62
73
|
|
|
63
74
|
id = SecureRandom.uuid
|
|
75
|
+
thought_signature = part['thoughtSignature'] || part['thought_signature']
|
|
64
76
|
|
|
65
77
|
result[id] = ToolCall.new(
|
|
66
78
|
id:,
|
|
67
79
|
name: function_data['name'],
|
|
68
|
-
arguments: function_data['args'] || {}
|
|
80
|
+
arguments: function_data['args'] || {},
|
|
81
|
+
thought_signature: thought_signature
|
|
69
82
|
)
|
|
70
83
|
end
|
|
71
84
|
|
|
@@ -11,13 +11,70 @@ module RubyLLM
|
|
|
11
11
|
role.to_s
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
def format_messages(messages)
|
|
15
|
+
messages.map do |msg|
|
|
16
|
+
{
|
|
17
|
+
role: format_role(msg.role),
|
|
18
|
+
content: format_content_with_thinking(msg),
|
|
19
|
+
tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
|
|
20
|
+
tool_call_id: msg.tool_call_id
|
|
21
|
+
}.compact
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
14
25
|
# rubocop:disable Metrics/ParameterLists
|
|
15
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
|
|
26
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil)
|
|
16
27
|
payload = super
|
|
17
28
|
payload.delete(:stream_options)
|
|
29
|
+
payload.delete(:reasoning_effort)
|
|
30
|
+
warn_on_unsupported_thinking(model, thinking)
|
|
18
31
|
payload
|
|
19
32
|
end
|
|
20
33
|
# rubocop:enable Metrics/ParameterLists
|
|
34
|
+
|
|
35
|
+
def format_content_with_thinking(msg)
|
|
36
|
+
formatted_content = OpenAI::Media.format_content(msg.content)
|
|
37
|
+
return formatted_content unless msg.role == :assistant && msg.thinking
|
|
38
|
+
|
|
39
|
+
content_blocks = build_thinking_blocks(msg.thinking)
|
|
40
|
+
append_formatted_content(content_blocks, formatted_content)
|
|
41
|
+
|
|
42
|
+
content_blocks
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def warn_on_unsupported_thinking(model, thinking)
|
|
46
|
+
return unless thinking&.enabled?
|
|
47
|
+
return if model.id.to_s.include?('magistral')
|
|
48
|
+
|
|
49
|
+
RubyLLM.logger.warn(
|
|
50
|
+
'Mistral thinking is only supported on Magistral models. ' \
|
|
51
|
+
"Ignoring thinking settings for #{model.id}."
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def build_thinking_blocks(thinking)
|
|
56
|
+
return [] unless thinking
|
|
57
|
+
|
|
58
|
+
if thinking.text
|
|
59
|
+
[{
|
|
60
|
+
type: 'thinking',
|
|
61
|
+
thinking: [{ type: 'text', text: thinking.text }],
|
|
62
|
+
signature: thinking.signature
|
|
63
|
+
}.compact]
|
|
64
|
+
elsif thinking.signature
|
|
65
|
+
[{ type: 'thinking', signature: thinking.signature }]
|
|
66
|
+
else
|
|
67
|
+
[]
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def append_formatted_content(content_blocks, formatted_content)
|
|
72
|
+
if formatted_content.is_a?(Array)
|
|
73
|
+
content_blocks.concat(formatted_content)
|
|
74
|
+
elsif formatted_content
|
|
75
|
+
content_blocks << { type: 'text', text: formatted_content }
|
|
76
|
+
end
|
|
77
|
+
end
|
|
21
78
|
end
|
|
22
79
|
end
|
|
23
80
|
end
|
|
@@ -224,8 +224,8 @@ module RubyLLM
|
|
|
224
224
|
end
|
|
225
225
|
|
|
226
226
|
def self.normalize_temperature(temperature, model_id)
|
|
227
|
-
if model_id.match?(/^(o\d|gpt-5)/)
|
|
228
|
-
RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0,
|
|
227
|
+
if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
|
|
228
|
+
RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
|
|
229
229
|
1.0
|
|
230
230
|
elsif model_id.match?(/-search/)
|
|
231
231
|
RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
|
|
@@ -235,6 +235,10 @@ module RubyLLM
|
|
|
235
235
|
end
|
|
236
236
|
end
|
|
237
237
|
|
|
238
|
+
def self.temperature_close_to_one?(temperature)
|
|
239
|
+
(temperature.to_f - 1.0).abs <= Float::EPSILON
|
|
240
|
+
end
|
|
241
|
+
|
|
238
242
|
def modalities_for(model_id)
|
|
239
243
|
modalities = {
|
|
240
244
|
input: ['text'],
|
|
@@ -11,7 +11,7 @@ module RubyLLM
|
|
|
11
11
|
|
|
12
12
|
module_function
|
|
13
13
|
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
|
|
15
15
|
payload = {
|
|
16
16
|
model: model.id,
|
|
17
17
|
messages: format_messages(messages),
|
|
@@ -34,6 +34,9 @@ module RubyLLM
|
|
|
34
34
|
}
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
+
effort = resolve_effort(thinking)
|
|
38
|
+
payload[:reasoning_effort] = effort if effort
|
|
39
|
+
|
|
37
40
|
payload[:stream_options] = { include_usage: true } if stream
|
|
38
41
|
payload
|
|
39
42
|
end
|
|
@@ -49,15 +52,21 @@ module RubyLLM
|
|
|
49
52
|
|
|
50
53
|
usage = data['usage'] || {}
|
|
51
54
|
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
55
|
+
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
56
|
+
content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
|
|
57
|
+
thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
|
|
58
|
+
thinking_signature = extract_thinking_signature(message_data)
|
|
52
59
|
|
|
53
60
|
Message.new(
|
|
54
61
|
role: :assistant,
|
|
55
|
-
content:
|
|
62
|
+
content: content,
|
|
63
|
+
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
56
64
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
|
57
65
|
input_tokens: usage['prompt_tokens'],
|
|
58
66
|
output_tokens: usage['completion_tokens'],
|
|
59
67
|
cached_tokens: cached_tokens,
|
|
60
68
|
cache_creation_tokens: 0,
|
|
69
|
+
thinking_tokens: thinking_tokens,
|
|
61
70
|
model_id: data['model'],
|
|
62
71
|
raw: response
|
|
63
72
|
)
|
|
@@ -70,7 +79,7 @@ module RubyLLM
|
|
|
70
79
|
content: Media.format_content(msg.content),
|
|
71
80
|
tool_calls: format_tool_calls(msg.tool_calls),
|
|
72
81
|
tool_call_id: msg.tool_call_id
|
|
73
|
-
}.compact
|
|
82
|
+
}.compact.merge(format_thinking(msg))
|
|
74
83
|
end
|
|
75
84
|
end
|
|
76
85
|
|
|
@@ -82,6 +91,81 @@ module RubyLLM
|
|
|
82
91
|
role.to_s
|
|
83
92
|
end
|
|
84
93
|
end
|
|
94
|
+
|
|
95
|
+
def resolve_effort(thinking)
|
|
96
|
+
return nil unless thinking
|
|
97
|
+
|
|
98
|
+
thinking.respond_to?(:effort) ? thinking.effort : thinking
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def format_thinking(msg)
|
|
102
|
+
return {} unless msg.role == :assistant
|
|
103
|
+
|
|
104
|
+
thinking = msg.thinking
|
|
105
|
+
return {} unless thinking
|
|
106
|
+
|
|
107
|
+
payload = {}
|
|
108
|
+
if thinking.text
|
|
109
|
+
payload[:reasoning] = thinking.text
|
|
110
|
+
payload[:reasoning_content] = thinking.text
|
|
111
|
+
end
|
|
112
|
+
payload[:reasoning_signature] = thinking.signature if thinking.signature
|
|
113
|
+
payload
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def extract_thinking_text(message_data)
|
|
117
|
+
candidate = message_data['reasoning_content'] || message_data['reasoning'] || message_data['thinking']
|
|
118
|
+
candidate.is_a?(String) ? candidate : nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def extract_thinking_signature(message_data)
|
|
122
|
+
candidate = message_data['reasoning_signature'] || message_data['signature']
|
|
123
|
+
candidate.is_a?(String) ? candidate : nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def extract_content_and_thinking(content)
|
|
127
|
+
return extract_think_tag_content(content) if content.is_a?(String)
|
|
128
|
+
return [content, nil] unless content.is_a?(Array)
|
|
129
|
+
|
|
130
|
+
text = extract_text_from_blocks(content)
|
|
131
|
+
thinking = extract_thinking_from_blocks(content)
|
|
132
|
+
|
|
133
|
+
[text.empty? ? nil : text, thinking.empty? ? nil : thinking]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_text_from_blocks(blocks)
|
|
137
|
+
blocks.filter_map do |block|
|
|
138
|
+
block['text'] if block['type'] == 'text' && block['text'].is_a?(String)
|
|
139
|
+
end.join
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def extract_thinking_from_blocks(blocks)
|
|
143
|
+
blocks.filter_map do |block|
|
|
144
|
+
next unless block['type'] == 'thinking'
|
|
145
|
+
|
|
146
|
+
extract_thinking_text_from_block(block)
|
|
147
|
+
end.join
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def extract_thinking_text_from_block(block)
|
|
151
|
+
thinking_block = block['thinking']
|
|
152
|
+
return thinking_block if thinking_block.is_a?(String)
|
|
153
|
+
|
|
154
|
+
if thinking_block.is_a?(Array)
|
|
155
|
+
return thinking_block.filter_map { |item| item['text'] if item['type'] == 'text' }.join
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
block['text'] if block['text'].is_a?(String)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def extract_think_tag_content(text)
|
|
162
|
+
return [text, nil] unless text.include?('<think>')
|
|
163
|
+
|
|
164
|
+
thinking = text.scan(%r{<think>(.*?)</think>}m).join
|
|
165
|
+
content = text.gsub(%r{<think>.*?</think>}m, '').strip
|
|
166
|
+
|
|
167
|
+
[content.empty? ? nil : content, thinking.empty? ? nil : thinking]
|
|
168
|
+
end
|
|
85
169
|
end
|
|
86
170
|
end
|
|
87
171
|
end
|
|
@@ -14,16 +14,24 @@ module RubyLLM
|
|
|
14
14
|
def build_chunk(data)
|
|
15
15
|
usage = data['usage'] || {}
|
|
16
16
|
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
|
+
delta = data.dig('choices', 0, 'delta') || {}
|
|
18
|
+
content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
|
|
19
|
+
content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
|
|
17
20
|
|
|
18
21
|
Chunk.new(
|
|
19
22
|
role: :assistant,
|
|
20
23
|
model_id: data['model'],
|
|
21
|
-
content:
|
|
22
|
-
|
|
24
|
+
content: content,
|
|
25
|
+
thinking: Thinking.build(
|
|
26
|
+
text: thinking_from_blocks || delta['reasoning_content'] || delta['reasoning'],
|
|
27
|
+
signature: delta['reasoning_signature']
|
|
28
|
+
),
|
|
29
|
+
tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
|
|
23
30
|
input_tokens: usage['prompt_tokens'],
|
|
24
31
|
output_tokens: usage['completion_tokens'],
|
|
25
32
|
cached_tokens: cached_tokens,
|
|
26
|
-
cache_creation_tokens: 0
|
|
33
|
+
cache_creation_tokens: 0,
|
|
34
|
+
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
27
35
|
)
|
|
28
36
|
end
|
|
29
37
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAI
|
|
6
|
+
# Normalizes temperature for OpenAI models with provider-specific requirements.
|
|
7
|
+
module Temperature
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def normalize(temperature, model_id)
|
|
11
|
+
if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
|
|
12
|
+
RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
|
|
13
|
+
1.0
|
|
14
|
+
elsif model_id.include?('-search')
|
|
15
|
+
RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
|
|
16
|
+
nil
|
|
17
|
+
else
|
|
18
|
+
temperature
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def temperature_close_to_one?(temperature)
|
|
23
|
+
(temperature.to_f - 1.0).abs <= Float::EPSILON
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|