ruby_llm 1.9.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
- data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
- data/lib/ruby_llm/active_record/message_methods.rb +41 -8
- data/lib/ruby_llm/aliases.json +101 -21
- data/lib/ruby_llm/chat.rb +10 -7
- data/lib/ruby_llm/configuration.rb +1 -1
- data/lib/ruby_llm/message.rb +37 -11
- data/lib/ruby_llm/models.json +21119 -10230
- data/lib/ruby_llm/models.rb +271 -27
- data/lib/ruby_llm/models_schema.json +2 -2
- data/lib/ruby_llm/provider.rb +4 -3
- data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
- data/lib/ruby_llm/providers/bedrock/models.rb +21 -15
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
- data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
- data/lib/ruby_llm/providers/openai/chat.rb +87 -3
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +2 -0
- data/lib/ruby_llm/providers/vertexai.rb +5 -1
- data/lib/ruby_llm/stream_accumulator.rb +111 -14
- data/lib/ruby_llm/streaming.rb +76 -54
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool.rb +1 -1
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +20 -13
- metadata +12 -5
|
@@ -11,7 +11,7 @@ module RubyLLM
|
|
|
11
11
|
|
|
12
12
|
module_function
|
|
13
13
|
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
|
|
15
15
|
payload = {
|
|
16
16
|
model: model.id,
|
|
17
17
|
messages: format_messages(messages),
|
|
@@ -34,6 +34,9 @@ module RubyLLM
|
|
|
34
34
|
}
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
+
effort = resolve_effort(thinking)
|
|
38
|
+
payload[:reasoning_effort] = effort if effort
|
|
39
|
+
|
|
37
40
|
payload[:stream_options] = { include_usage: true } if stream
|
|
38
41
|
payload
|
|
39
42
|
end
|
|
@@ -49,15 +52,21 @@ module RubyLLM
|
|
|
49
52
|
|
|
50
53
|
usage = data['usage'] || {}
|
|
51
54
|
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
55
|
+
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
56
|
+
content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
|
|
57
|
+
thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
|
|
58
|
+
thinking_signature = extract_thinking_signature(message_data)
|
|
52
59
|
|
|
53
60
|
Message.new(
|
|
54
61
|
role: :assistant,
|
|
55
|
-
content:
|
|
62
|
+
content: content,
|
|
63
|
+
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
56
64
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
|
57
65
|
input_tokens: usage['prompt_tokens'],
|
|
58
66
|
output_tokens: usage['completion_tokens'],
|
|
59
67
|
cached_tokens: cached_tokens,
|
|
60
68
|
cache_creation_tokens: 0,
|
|
69
|
+
thinking_tokens: thinking_tokens,
|
|
61
70
|
model_id: data['model'],
|
|
62
71
|
raw: response
|
|
63
72
|
)
|
|
@@ -70,7 +79,7 @@ module RubyLLM
|
|
|
70
79
|
content: Media.format_content(msg.content),
|
|
71
80
|
tool_calls: format_tool_calls(msg.tool_calls),
|
|
72
81
|
tool_call_id: msg.tool_call_id
|
|
73
|
-
}.compact
|
|
82
|
+
}.compact.merge(format_thinking(msg))
|
|
74
83
|
end
|
|
75
84
|
end
|
|
76
85
|
|
|
@@ -82,6 +91,81 @@ module RubyLLM
|
|
|
82
91
|
role.to_s
|
|
83
92
|
end
|
|
84
93
|
end
|
|
94
|
+
|
|
95
|
+
def resolve_effort(thinking)
|
|
96
|
+
return nil unless thinking
|
|
97
|
+
|
|
98
|
+
thinking.respond_to?(:effort) ? thinking.effort : thinking
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def format_thinking(msg)
|
|
102
|
+
return {} unless msg.role == :assistant
|
|
103
|
+
|
|
104
|
+
thinking = msg.thinking
|
|
105
|
+
return {} unless thinking
|
|
106
|
+
|
|
107
|
+
payload = {}
|
|
108
|
+
if thinking.text
|
|
109
|
+
payload[:reasoning] = thinking.text
|
|
110
|
+
payload[:reasoning_content] = thinking.text
|
|
111
|
+
end
|
|
112
|
+
payload[:reasoning_signature] = thinking.signature if thinking.signature
|
|
113
|
+
payload
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def extract_thinking_text(message_data)
|
|
117
|
+
candidate = message_data['reasoning_content'] || message_data['reasoning'] || message_data['thinking']
|
|
118
|
+
candidate.is_a?(String) ? candidate : nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def extract_thinking_signature(message_data)
|
|
122
|
+
candidate = message_data['reasoning_signature'] || message_data['signature']
|
|
123
|
+
candidate.is_a?(String) ? candidate : nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def extract_content_and_thinking(content)
|
|
127
|
+
return extract_think_tag_content(content) if content.is_a?(String)
|
|
128
|
+
return [content, nil] unless content.is_a?(Array)
|
|
129
|
+
|
|
130
|
+
text = extract_text_from_blocks(content)
|
|
131
|
+
thinking = extract_thinking_from_blocks(content)
|
|
132
|
+
|
|
133
|
+
[text.empty? ? nil : text, thinking.empty? ? nil : thinking]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_text_from_blocks(blocks)
|
|
137
|
+
blocks.filter_map do |block|
|
|
138
|
+
block['text'] if block['type'] == 'text' && block['text'].is_a?(String)
|
|
139
|
+
end.join
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def extract_thinking_from_blocks(blocks)
|
|
143
|
+
blocks.filter_map do |block|
|
|
144
|
+
next unless block['type'] == 'thinking'
|
|
145
|
+
|
|
146
|
+
extract_thinking_text_from_block(block)
|
|
147
|
+
end.join
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def extract_thinking_text_from_block(block)
|
|
151
|
+
thinking_block = block['thinking']
|
|
152
|
+
return thinking_block if thinking_block.is_a?(String)
|
|
153
|
+
|
|
154
|
+
if thinking_block.is_a?(Array)
|
|
155
|
+
return thinking_block.filter_map { |item| item['text'] if item['type'] == 'text' }.join
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
block['text'] if block['text'].is_a?(String)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def extract_think_tag_content(text)
|
|
162
|
+
return [text, nil] unless text.include?('<think>')
|
|
163
|
+
|
|
164
|
+
thinking = text.scan(%r{<think>(.*?)</think>}m).join
|
|
165
|
+
content = text.gsub(%r{<think>.*?</think>}m, '').strip
|
|
166
|
+
|
|
167
|
+
[content.empty? ? nil : content, thinking.empty? ? nil : thinking]
|
|
168
|
+
end
|
|
85
169
|
end
|
|
86
170
|
end
|
|
87
171
|
end
|
|
@@ -14,16 +14,24 @@ module RubyLLM
|
|
|
14
14
|
def build_chunk(data)
|
|
15
15
|
usage = data['usage'] || {}
|
|
16
16
|
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
|
+
delta = data.dig('choices', 0, 'delta') || {}
|
|
18
|
+
content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
|
|
19
|
+
content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
|
|
17
20
|
|
|
18
21
|
Chunk.new(
|
|
19
22
|
role: :assistant,
|
|
20
23
|
model_id: data['model'],
|
|
21
|
-
content:
|
|
22
|
-
|
|
24
|
+
content: content,
|
|
25
|
+
thinking: Thinking.build(
|
|
26
|
+
text: thinking_from_blocks || delta['reasoning_content'] || delta['reasoning'],
|
|
27
|
+
signature: delta['reasoning_signature']
|
|
28
|
+
),
|
|
29
|
+
tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
|
|
23
30
|
input_tokens: usage['prompt_tokens'],
|
|
24
31
|
output_tokens: usage['completion_tokens'],
|
|
25
32
|
cached_tokens: cached_tokens,
|
|
26
|
-
cache_creation_tokens: 0
|
|
33
|
+
cache_creation_tokens: 0,
|
|
34
|
+
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
27
35
|
)
|
|
28
36
|
end
|
|
29
37
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAI
|
|
6
|
+
# Normalizes temperature for OpenAI models with provider-specific requirements.
|
|
7
|
+
module Temperature
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def normalize(temperature, model_id)
|
|
11
|
+
if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
|
|
12
|
+
RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, setting that instead."
|
|
13
|
+
1.0
|
|
14
|
+
elsif model_id.include?('-search')
|
|
15
|
+
RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
|
|
16
|
+
nil
|
|
17
|
+
else
|
|
18
|
+
temperature
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def temperature_close_to_one?(temperature)
|
|
23
|
+
(temperature.to_f - 1.0).abs <= Float::EPSILON
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenRouter
|
|
6
|
+
# Chat methods of the OpenRouter API integration
|
|
7
|
+
module Chat
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
|
|
11
|
+
payload = {
|
|
12
|
+
model: model.id,
|
|
13
|
+
messages: format_messages(messages),
|
|
14
|
+
stream: stream
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
payload[:temperature] = temperature unless temperature.nil?
|
|
18
|
+
payload[:tools] = tools.map { |_, tool| OpenAI::Tools.tool_for(tool) } if tools.any?
|
|
19
|
+
|
|
20
|
+
if schema
|
|
21
|
+
strict = schema[:strict] != false
|
|
22
|
+
payload[:response_format] = {
|
|
23
|
+
type: 'json_schema',
|
|
24
|
+
json_schema: {
|
|
25
|
+
name: 'response',
|
|
26
|
+
schema: schema,
|
|
27
|
+
strict: strict
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
reasoning = build_reasoning(thinking)
|
|
33
|
+
payload[:reasoning] = reasoning if reasoning
|
|
34
|
+
|
|
35
|
+
payload[:stream_options] = { include_usage: true } if stream
|
|
36
|
+
payload
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def parse_completion_response(response)
|
|
40
|
+
data = response.body
|
|
41
|
+
return if data.empty?
|
|
42
|
+
|
|
43
|
+
raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
|
|
44
|
+
|
|
45
|
+
message_data = data.dig('choices', 0, 'message')
|
|
46
|
+
return unless message_data
|
|
47
|
+
|
|
48
|
+
usage = data['usage'] || {}
|
|
49
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
50
|
+
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
51
|
+
thinking_text = extract_thinking_text(message_data)
|
|
52
|
+
thinking_signature = extract_thinking_signature(message_data)
|
|
53
|
+
|
|
54
|
+
Message.new(
|
|
55
|
+
role: :assistant,
|
|
56
|
+
content: message_data['content'],
|
|
57
|
+
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
58
|
+
tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
|
|
59
|
+
input_tokens: usage['prompt_tokens'],
|
|
60
|
+
output_tokens: usage['completion_tokens'],
|
|
61
|
+
cached_tokens: cached_tokens,
|
|
62
|
+
cache_creation_tokens: 0,
|
|
63
|
+
thinking_tokens: thinking_tokens,
|
|
64
|
+
model_id: data['model'],
|
|
65
|
+
raw: response
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def format_messages(messages)
|
|
70
|
+
messages.map do |msg|
|
|
71
|
+
{
|
|
72
|
+
role: format_role(msg.role),
|
|
73
|
+
content: OpenAI::Media.format_content(msg.content),
|
|
74
|
+
tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
|
|
75
|
+
tool_call_id: msg.tool_call_id
|
|
76
|
+
}.compact.merge(format_thinking(msg))
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def format_role(role)
|
|
81
|
+
case role
|
|
82
|
+
when :system
|
|
83
|
+
@config.openai_use_system_role ? 'system' : 'developer'
|
|
84
|
+
else
|
|
85
|
+
role.to_s
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def build_reasoning(thinking)
|
|
90
|
+
return nil unless thinking&.enabled?
|
|
91
|
+
|
|
92
|
+
reasoning = {}
|
|
93
|
+
reasoning[:effort] = thinking.effort if thinking.respond_to?(:effort) && thinking.effort
|
|
94
|
+
reasoning[:max_tokens] = thinking.budget if thinking.respond_to?(:budget) && thinking.budget
|
|
95
|
+
reasoning[:enabled] = true if reasoning.empty?
|
|
96
|
+
reasoning
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def format_thinking(msg)
|
|
100
|
+
thinking = msg.thinking
|
|
101
|
+
return {} unless thinking && msg.role == :assistant
|
|
102
|
+
|
|
103
|
+
details = []
|
|
104
|
+
if thinking.text
|
|
105
|
+
details << {
|
|
106
|
+
type: 'reasoning.text',
|
|
107
|
+
text: thinking.text,
|
|
108
|
+
signature: thinking.signature
|
|
109
|
+
}.compact
|
|
110
|
+
elsif thinking.signature
|
|
111
|
+
details << {
|
|
112
|
+
type: 'reasoning.encrypted',
|
|
113
|
+
data: thinking.signature
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
details.empty? ? {} : { reasoning_details: details }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def extract_thinking_text(message_data)
|
|
121
|
+
candidate = message_data['reasoning']
|
|
122
|
+
return candidate if candidate.is_a?(String)
|
|
123
|
+
|
|
124
|
+
details = message_data['reasoning_details']
|
|
125
|
+
return nil unless details.is_a?(Array)
|
|
126
|
+
|
|
127
|
+
text = details.filter_map do |detail|
|
|
128
|
+
case detail['type']
|
|
129
|
+
when 'reasoning.text'
|
|
130
|
+
detail['text']
|
|
131
|
+
when 'reasoning.summary'
|
|
132
|
+
detail['summary']
|
|
133
|
+
end
|
|
134
|
+
end.join
|
|
135
|
+
|
|
136
|
+
text.empty? ? nil : text
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def extract_thinking_signature(message_data)
|
|
140
|
+
details = message_data['reasoning_details']
|
|
141
|
+
return nil unless details.is_a?(Array)
|
|
142
|
+
|
|
143
|
+
signature = details.filter_map do |detail|
|
|
144
|
+
detail['signature'] if detail['signature'].is_a?(String)
|
|
145
|
+
end.first
|
|
146
|
+
return signature if signature
|
|
147
|
+
|
|
148
|
+
encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
|
|
149
|
+
encrypted&.dig('data')
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenRouter
|
|
6
|
+
# Streaming methods of the OpenRouter API integration
|
|
7
|
+
module Streaming
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def stream_url
|
|
11
|
+
completion_url
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def build_chunk(data)
|
|
15
|
+
usage = data['usage'] || {}
|
|
16
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
|
+
delta = data.dig('choices', 0, 'delta') || {}
|
|
18
|
+
|
|
19
|
+
Chunk.new(
|
|
20
|
+
role: :assistant,
|
|
21
|
+
model_id: data['model'],
|
|
22
|
+
content: delta['content'],
|
|
23
|
+
thinking: Thinking.build(
|
|
24
|
+
text: extract_thinking_text(delta),
|
|
25
|
+
signature: extract_thinking_signature(delta)
|
|
26
|
+
),
|
|
27
|
+
tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
|
|
28
|
+
input_tokens: usage['prompt_tokens'],
|
|
29
|
+
output_tokens: usage['completion_tokens'],
|
|
30
|
+
cached_tokens: cached_tokens,
|
|
31
|
+
cache_creation_tokens: 0,
|
|
32
|
+
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def parse_streaming_error(data)
|
|
37
|
+
OpenAI::Streaming.parse_streaming_error(data)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def extract_thinking_text(delta)
|
|
41
|
+
candidate = delta['reasoning']
|
|
42
|
+
return candidate if candidate.is_a?(String)
|
|
43
|
+
|
|
44
|
+
details = delta['reasoning_details']
|
|
45
|
+
return nil unless details.is_a?(Array)
|
|
46
|
+
|
|
47
|
+
text = details.filter_map do |detail|
|
|
48
|
+
case detail['type']
|
|
49
|
+
when 'reasoning.text'
|
|
50
|
+
detail['text']
|
|
51
|
+
when 'reasoning.summary'
|
|
52
|
+
detail['summary']
|
|
53
|
+
end
|
|
54
|
+
end.join
|
|
55
|
+
|
|
56
|
+
text.empty? ? nil : text
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def extract_thinking_signature(delta)
|
|
60
|
+
details = delta['reasoning_details']
|
|
61
|
+
return nil unless details.is_a?(Array)
|
|
62
|
+
|
|
63
|
+
signature = details.filter_map do |detail|
|
|
64
|
+
detail['signature'] if detail['signature'].is_a?(String)
|
|
65
|
+
end.first
|
|
66
|
+
return signature if signature
|
|
67
|
+
|
|
68
|
+
encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
|
|
69
|
+
encrypted&.dig('data')
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -16,7 +16,11 @@ module RubyLLM
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def api_base
|
|
19
|
-
|
|
19
|
+
if @config.vertexai_location.to_s == 'global'
|
|
20
|
+
'https://aiplatform.googleapis.com/v1beta1'
|
|
21
|
+
else
|
|
22
|
+
"https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
|
|
23
|
+
end
|
|
20
24
|
end
|
|
21
25
|
|
|
22
26
|
def headers
|
|
@@ -7,11 +7,16 @@ module RubyLLM
|
|
|
7
7
|
|
|
8
8
|
def initialize
|
|
9
9
|
@content = +''
|
|
10
|
+
@thinking_text = +''
|
|
11
|
+
@thinking_signature = nil
|
|
10
12
|
@tool_calls = {}
|
|
11
13
|
@input_tokens = nil
|
|
12
14
|
@output_tokens = nil
|
|
13
15
|
@cached_tokens = nil
|
|
14
16
|
@cache_creation_tokens = nil
|
|
17
|
+
@thinking_tokens = nil
|
|
18
|
+
@inside_think_tag = false
|
|
19
|
+
@pending_think_tag = +''
|
|
15
20
|
@latest_tool_call_id = nil
|
|
16
21
|
end
|
|
17
22
|
|
|
@@ -19,12 +24,8 @@ module RubyLLM
|
|
|
19
24
|
RubyLLM.logger.debug chunk.inspect if RubyLLM.config.log_stream_debug
|
|
20
25
|
@model_id ||= chunk.model_id
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
else
|
|
25
|
-
@content << (chunk.content || '')
|
|
26
|
-
end
|
|
27
|
-
|
|
27
|
+
handle_chunk_content(chunk)
|
|
28
|
+
append_thinking_from_chunk(chunk)
|
|
28
29
|
count_tokens chunk
|
|
29
30
|
RubyLLM.logger.debug inspect if RubyLLM.config.log_stream_debug
|
|
30
31
|
end
|
|
@@ -33,12 +34,19 @@ module RubyLLM
|
|
|
33
34
|
Message.new(
|
|
34
35
|
role: :assistant,
|
|
35
36
|
content: content.empty? ? nil : content,
|
|
37
|
+
thinking: Thinking.build(
|
|
38
|
+
text: @thinking_text.empty? ? nil : @thinking_text,
|
|
39
|
+
signature: @thinking_signature
|
|
40
|
+
),
|
|
41
|
+
tokens: Tokens.build(
|
|
42
|
+
input: @input_tokens,
|
|
43
|
+
output: @output_tokens,
|
|
44
|
+
cached: @cached_tokens,
|
|
45
|
+
cache_creation: @cache_creation_tokens,
|
|
46
|
+
thinking: @thinking_tokens
|
|
47
|
+
),
|
|
36
48
|
model_id: model_id,
|
|
37
49
|
tool_calls: tool_calls_from_stream,
|
|
38
|
-
input_tokens: @input_tokens,
|
|
39
|
-
output_tokens: @output_tokens,
|
|
40
|
-
cached_tokens: @cached_tokens,
|
|
41
|
-
cache_creation_tokens: @cache_creation_tokens,
|
|
42
50
|
raw: response
|
|
43
51
|
)
|
|
44
52
|
end
|
|
@@ -58,12 +66,13 @@ module RubyLLM
|
|
|
58
66
|
ToolCall.new(
|
|
59
67
|
id: tc.id,
|
|
60
68
|
name: tc.name,
|
|
61
|
-
arguments: arguments
|
|
69
|
+
arguments: arguments,
|
|
70
|
+
thought_signature: tc.thought_signature
|
|
62
71
|
)
|
|
63
72
|
end
|
|
64
73
|
end
|
|
65
74
|
|
|
66
|
-
def accumulate_tool_calls(new_tool_calls)
|
|
75
|
+
def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
|
|
67
76
|
RubyLLM.logger.debug "Accumulating tool calls: #{new_tool_calls}" if RubyLLM.config.log_stream_debug
|
|
68
77
|
new_tool_calls.each_value do |tool_call|
|
|
69
78
|
if tool_call.id
|
|
@@ -72,12 +81,18 @@ module RubyLLM
|
|
|
72
81
|
@tool_calls[tool_call.id] = ToolCall.new(
|
|
73
82
|
id: tool_call_id,
|
|
74
83
|
name: tool_call.name,
|
|
75
|
-
arguments: tool_call_arguments
|
|
84
|
+
arguments: tool_call_arguments,
|
|
85
|
+
thought_signature: tool_call.thought_signature
|
|
76
86
|
)
|
|
77
87
|
@latest_tool_call_id = tool_call.id
|
|
78
88
|
else
|
|
79
89
|
existing = @tool_calls[@latest_tool_call_id]
|
|
80
|
-
|
|
90
|
+
if existing
|
|
91
|
+
existing.arguments << tool_call.arguments
|
|
92
|
+
if tool_call.thought_signature && existing.thought_signature.nil?
|
|
93
|
+
existing.thought_signature = tool_call.thought_signature
|
|
94
|
+
end
|
|
95
|
+
end
|
|
81
96
|
end
|
|
82
97
|
end
|
|
83
98
|
end
|
|
@@ -96,6 +111,88 @@ module RubyLLM
|
|
|
96
111
|
@output_tokens = chunk.output_tokens if chunk.output_tokens
|
|
97
112
|
@cached_tokens = chunk.cached_tokens if chunk.cached_tokens
|
|
98
113
|
@cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
|
|
114
|
+
@thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def handle_chunk_content(chunk)
|
|
118
|
+
return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
|
|
119
|
+
|
|
120
|
+
content_text = chunk.content || ''
|
|
121
|
+
if content_text.is_a?(String)
|
|
122
|
+
append_text_with_thinking(content_text)
|
|
123
|
+
else
|
|
124
|
+
@content << content_text.to_s
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def append_text_with_thinking(text)
|
|
129
|
+
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
130
|
+
@content << content_chunk
|
|
131
|
+
@thinking_text << thinking_chunk if thinking_chunk
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def append_thinking_from_chunk(chunk)
|
|
135
|
+
thinking = chunk.thinking
|
|
136
|
+
return unless thinking
|
|
137
|
+
|
|
138
|
+
@thinking_text << thinking.text.to_s if thinking.text
|
|
139
|
+
@thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def extract_think_tags(text)
|
|
143
|
+
start_tag = '<think>'
|
|
144
|
+
end_tag = '</think>'
|
|
145
|
+
remaining = @pending_think_tag + text
|
|
146
|
+
@pending_think_tag = +''
|
|
147
|
+
|
|
148
|
+
output = +''
|
|
149
|
+
thinking = +''
|
|
150
|
+
|
|
151
|
+
until remaining.empty?
|
|
152
|
+
remaining = if @inside_think_tag
|
|
153
|
+
consume_think_content(remaining, end_tag, thinking)
|
|
154
|
+
else
|
|
155
|
+
consume_non_think_content(remaining, start_tag, output)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
[output, thinking.empty? ? nil : thinking]
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def consume_think_content(remaining, end_tag, thinking)
|
|
163
|
+
end_index = remaining.index(end_tag)
|
|
164
|
+
if end_index
|
|
165
|
+
thinking << remaining.slice(0, end_index)
|
|
166
|
+
@inside_think_tag = false
|
|
167
|
+
remaining.slice((end_index + end_tag.length)..) || +''
|
|
168
|
+
else
|
|
169
|
+
suffix_len = longest_suffix_prefix(remaining, end_tag)
|
|
170
|
+
thinking << remaining.slice(0, remaining.length - suffix_len)
|
|
171
|
+
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
172
|
+
+''
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def consume_non_think_content(remaining, start_tag, output)
|
|
177
|
+
start_index = remaining.index(start_tag)
|
|
178
|
+
if start_index
|
|
179
|
+
output << remaining.slice(0, start_index)
|
|
180
|
+
@inside_think_tag = true
|
|
181
|
+
remaining.slice((start_index + start_tag.length)..) || +''
|
|
182
|
+
else
|
|
183
|
+
suffix_len = longest_suffix_prefix(remaining, start_tag)
|
|
184
|
+
output << remaining.slice(0, remaining.length - suffix_len)
|
|
185
|
+
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
186
|
+
+''
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def longest_suffix_prefix(text, tag)
|
|
191
|
+
max = [text.length, tag.length - 1].min
|
|
192
|
+
max.downto(1) do |len|
|
|
193
|
+
return len if text.end_with?(tag[0, len])
|
|
194
|
+
end
|
|
195
|
+
0
|
|
99
196
|
end
|
|
100
197
|
end
|
|
101
198
|
end
|