ruby_llm 1.9.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
- data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
- data/lib/ruby_llm/active_record/message_methods.rb +41 -8
- data/lib/ruby_llm/aliases.json +0 -12
- data/lib/ruby_llm/chat.rb +10 -7
- data/lib/ruby_llm/configuration.rb +1 -1
- data/lib/ruby_llm/message.rb +37 -11
- data/lib/ruby_llm/models.json +1059 -857
- data/lib/ruby_llm/models.rb +134 -12
- data/lib/ruby_llm/provider.rb +4 -3
- data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
- data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
- data/lib/ruby_llm/providers/openai/chat.rb +87 -3
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai.rb +1 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +2 -0
- data/lib/ruby_llm/providers/vertexai.rb +5 -1
- data/lib/ruby_llm/stream_accumulator.rb +111 -14
- data/lib/ruby_llm/streaming.rb +54 -51
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +19 -12
- metadata +12 -5
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenRouter
|
|
6
|
+
# Chat methods of the OpenRouter API integration
|
|
7
|
+
module Chat
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
|
|
11
|
+
payload = {
|
|
12
|
+
model: model.id,
|
|
13
|
+
messages: format_messages(messages),
|
|
14
|
+
stream: stream
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
payload[:temperature] = temperature unless temperature.nil?
|
|
18
|
+
payload[:tools] = tools.map { |_, tool| OpenAI::Tools.tool_for(tool) } if tools.any?
|
|
19
|
+
|
|
20
|
+
if schema
|
|
21
|
+
strict = schema[:strict] != false
|
|
22
|
+
payload[:response_format] = {
|
|
23
|
+
type: 'json_schema',
|
|
24
|
+
json_schema: {
|
|
25
|
+
name: 'response',
|
|
26
|
+
schema: schema,
|
|
27
|
+
strict: strict
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
reasoning = build_reasoning(thinking)
|
|
33
|
+
payload[:reasoning] = reasoning if reasoning
|
|
34
|
+
|
|
35
|
+
payload[:stream_options] = { include_usage: true } if stream
|
|
36
|
+
payload
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def parse_completion_response(response)
|
|
40
|
+
data = response.body
|
|
41
|
+
return if data.empty?
|
|
42
|
+
|
|
43
|
+
raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
|
|
44
|
+
|
|
45
|
+
message_data = data.dig('choices', 0, 'message')
|
|
46
|
+
return unless message_data
|
|
47
|
+
|
|
48
|
+
usage = data['usage'] || {}
|
|
49
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
50
|
+
thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
51
|
+
thinking_text = extract_thinking_text(message_data)
|
|
52
|
+
thinking_signature = extract_thinking_signature(message_data)
|
|
53
|
+
|
|
54
|
+
Message.new(
|
|
55
|
+
role: :assistant,
|
|
56
|
+
content: message_data['content'],
|
|
57
|
+
thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
|
|
58
|
+
tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
|
|
59
|
+
input_tokens: usage['prompt_tokens'],
|
|
60
|
+
output_tokens: usage['completion_tokens'],
|
|
61
|
+
cached_tokens: cached_tokens,
|
|
62
|
+
cache_creation_tokens: 0,
|
|
63
|
+
thinking_tokens: thinking_tokens,
|
|
64
|
+
model_id: data['model'],
|
|
65
|
+
raw: response
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def format_messages(messages)
|
|
70
|
+
messages.map do |msg|
|
|
71
|
+
{
|
|
72
|
+
role: format_role(msg.role),
|
|
73
|
+
content: OpenAI::Media.format_content(msg.content),
|
|
74
|
+
tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
|
|
75
|
+
tool_call_id: msg.tool_call_id
|
|
76
|
+
}.compact.merge(format_thinking(msg))
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def format_role(role)
|
|
81
|
+
case role
|
|
82
|
+
when :system
|
|
83
|
+
@config.openai_use_system_role ? 'system' : 'developer'
|
|
84
|
+
else
|
|
85
|
+
role.to_s
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def build_reasoning(thinking)
|
|
90
|
+
return nil unless thinking&.enabled?
|
|
91
|
+
|
|
92
|
+
reasoning = {}
|
|
93
|
+
reasoning[:effort] = thinking.effort if thinking.respond_to?(:effort) && thinking.effort
|
|
94
|
+
reasoning[:max_tokens] = thinking.budget if thinking.respond_to?(:budget) && thinking.budget
|
|
95
|
+
reasoning[:enabled] = true if reasoning.empty?
|
|
96
|
+
reasoning
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def format_thinking(msg)
|
|
100
|
+
thinking = msg.thinking
|
|
101
|
+
return {} unless thinking && msg.role == :assistant
|
|
102
|
+
|
|
103
|
+
details = []
|
|
104
|
+
if thinking.text
|
|
105
|
+
details << {
|
|
106
|
+
type: 'reasoning.text',
|
|
107
|
+
text: thinking.text,
|
|
108
|
+
signature: thinking.signature
|
|
109
|
+
}.compact
|
|
110
|
+
elsif thinking.signature
|
|
111
|
+
details << {
|
|
112
|
+
type: 'reasoning.encrypted',
|
|
113
|
+
data: thinking.signature
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
details.empty? ? {} : { reasoning_details: details }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def extract_thinking_text(message_data)
|
|
121
|
+
candidate = message_data['reasoning']
|
|
122
|
+
return candidate if candidate.is_a?(String)
|
|
123
|
+
|
|
124
|
+
details = message_data['reasoning_details']
|
|
125
|
+
return nil unless details.is_a?(Array)
|
|
126
|
+
|
|
127
|
+
text = details.filter_map do |detail|
|
|
128
|
+
case detail['type']
|
|
129
|
+
when 'reasoning.text'
|
|
130
|
+
detail['text']
|
|
131
|
+
when 'reasoning.summary'
|
|
132
|
+
detail['summary']
|
|
133
|
+
end
|
|
134
|
+
end.join
|
|
135
|
+
|
|
136
|
+
text.empty? ? nil : text
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def extract_thinking_signature(message_data)
|
|
140
|
+
details = message_data['reasoning_details']
|
|
141
|
+
return nil unless details.is_a?(Array)
|
|
142
|
+
|
|
143
|
+
signature = details.filter_map do |detail|
|
|
144
|
+
detail['signature'] if detail['signature'].is_a?(String)
|
|
145
|
+
end.first
|
|
146
|
+
return signature if signature
|
|
147
|
+
|
|
148
|
+
encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
|
|
149
|
+
encrypted&.dig('data')
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenRouter
|
|
6
|
+
# Streaming methods of the OpenRouter API integration
|
|
7
|
+
module Streaming
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def stream_url
|
|
11
|
+
completion_url
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def build_chunk(data)
|
|
15
|
+
usage = data['usage'] || {}
|
|
16
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
|
+
delta = data.dig('choices', 0, 'delta') || {}
|
|
18
|
+
|
|
19
|
+
Chunk.new(
|
|
20
|
+
role: :assistant,
|
|
21
|
+
model_id: data['model'],
|
|
22
|
+
content: delta['content'],
|
|
23
|
+
thinking: Thinking.build(
|
|
24
|
+
text: extract_thinking_text(delta),
|
|
25
|
+
signature: extract_thinking_signature(delta)
|
|
26
|
+
),
|
|
27
|
+
tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
|
|
28
|
+
input_tokens: usage['prompt_tokens'],
|
|
29
|
+
output_tokens: usage['completion_tokens'],
|
|
30
|
+
cached_tokens: cached_tokens,
|
|
31
|
+
cache_creation_tokens: 0,
|
|
32
|
+
thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def parse_streaming_error(data)
|
|
37
|
+
OpenAI::Streaming.parse_streaming_error(data)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def extract_thinking_text(delta)
|
|
41
|
+
candidate = delta['reasoning']
|
|
42
|
+
return candidate if candidate.is_a?(String)
|
|
43
|
+
|
|
44
|
+
details = delta['reasoning_details']
|
|
45
|
+
return nil unless details.is_a?(Array)
|
|
46
|
+
|
|
47
|
+
text = details.filter_map do |detail|
|
|
48
|
+
case detail['type']
|
|
49
|
+
when 'reasoning.text'
|
|
50
|
+
detail['text']
|
|
51
|
+
when 'reasoning.summary'
|
|
52
|
+
detail['summary']
|
|
53
|
+
end
|
|
54
|
+
end.join
|
|
55
|
+
|
|
56
|
+
text.empty? ? nil : text
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def extract_thinking_signature(delta)
|
|
60
|
+
details = delta['reasoning_details']
|
|
61
|
+
return nil unless details.is_a?(Array)
|
|
62
|
+
|
|
63
|
+
signature = details.filter_map do |detail|
|
|
64
|
+
detail['signature'] if detail['signature'].is_a?(String)
|
|
65
|
+
end.first
|
|
66
|
+
return signature if signature
|
|
67
|
+
|
|
68
|
+
encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
|
|
69
|
+
encrypted&.dig('data')
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -16,7 +16,11 @@ module RubyLLM
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def api_base
|
|
19
|
-
|
|
19
|
+
if @config.vertexai_location.to_s == 'global'
|
|
20
|
+
'https://aiplatform.googleapis.com/v1beta1'
|
|
21
|
+
else
|
|
22
|
+
"https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
|
|
23
|
+
end
|
|
20
24
|
end
|
|
21
25
|
|
|
22
26
|
def headers
|
|
@@ -7,11 +7,16 @@ module RubyLLM
|
|
|
7
7
|
|
|
8
8
|
def initialize
|
|
9
9
|
@content = +''
|
|
10
|
+
@thinking_text = +''
|
|
11
|
+
@thinking_signature = nil
|
|
10
12
|
@tool_calls = {}
|
|
11
13
|
@input_tokens = nil
|
|
12
14
|
@output_tokens = nil
|
|
13
15
|
@cached_tokens = nil
|
|
14
16
|
@cache_creation_tokens = nil
|
|
17
|
+
@thinking_tokens = nil
|
|
18
|
+
@inside_think_tag = false
|
|
19
|
+
@pending_think_tag = +''
|
|
15
20
|
@latest_tool_call_id = nil
|
|
16
21
|
end
|
|
17
22
|
|
|
@@ -19,12 +24,8 @@ module RubyLLM
|
|
|
19
24
|
RubyLLM.logger.debug chunk.inspect if RubyLLM.config.log_stream_debug
|
|
20
25
|
@model_id ||= chunk.model_id
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
else
|
|
25
|
-
@content << (chunk.content || '')
|
|
26
|
-
end
|
|
27
|
-
|
|
27
|
+
handle_chunk_content(chunk)
|
|
28
|
+
append_thinking_from_chunk(chunk)
|
|
28
29
|
count_tokens chunk
|
|
29
30
|
RubyLLM.logger.debug inspect if RubyLLM.config.log_stream_debug
|
|
30
31
|
end
|
|
@@ -33,12 +34,19 @@ module RubyLLM
|
|
|
33
34
|
Message.new(
|
|
34
35
|
role: :assistant,
|
|
35
36
|
content: content.empty? ? nil : content,
|
|
37
|
+
thinking: Thinking.build(
|
|
38
|
+
text: @thinking_text.empty? ? nil : @thinking_text,
|
|
39
|
+
signature: @thinking_signature
|
|
40
|
+
),
|
|
41
|
+
tokens: Tokens.build(
|
|
42
|
+
input: @input_tokens,
|
|
43
|
+
output: @output_tokens,
|
|
44
|
+
cached: @cached_tokens,
|
|
45
|
+
cache_creation: @cache_creation_tokens,
|
|
46
|
+
thinking: @thinking_tokens
|
|
47
|
+
),
|
|
36
48
|
model_id: model_id,
|
|
37
49
|
tool_calls: tool_calls_from_stream,
|
|
38
|
-
input_tokens: @input_tokens,
|
|
39
|
-
output_tokens: @output_tokens,
|
|
40
|
-
cached_tokens: @cached_tokens,
|
|
41
|
-
cache_creation_tokens: @cache_creation_tokens,
|
|
42
50
|
raw: response
|
|
43
51
|
)
|
|
44
52
|
end
|
|
@@ -58,12 +66,13 @@ module RubyLLM
|
|
|
58
66
|
ToolCall.new(
|
|
59
67
|
id: tc.id,
|
|
60
68
|
name: tc.name,
|
|
61
|
-
arguments: arguments
|
|
69
|
+
arguments: arguments,
|
|
70
|
+
thought_signature: tc.thought_signature
|
|
62
71
|
)
|
|
63
72
|
end
|
|
64
73
|
end
|
|
65
74
|
|
|
66
|
-
def accumulate_tool_calls(new_tool_calls)
|
|
75
|
+
def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
|
|
67
76
|
RubyLLM.logger.debug "Accumulating tool calls: #{new_tool_calls}" if RubyLLM.config.log_stream_debug
|
|
68
77
|
new_tool_calls.each_value do |tool_call|
|
|
69
78
|
if tool_call.id
|
|
@@ -72,12 +81,18 @@ module RubyLLM
|
|
|
72
81
|
@tool_calls[tool_call.id] = ToolCall.new(
|
|
73
82
|
id: tool_call_id,
|
|
74
83
|
name: tool_call.name,
|
|
75
|
-
arguments: tool_call_arguments
|
|
84
|
+
arguments: tool_call_arguments,
|
|
85
|
+
thought_signature: tool_call.thought_signature
|
|
76
86
|
)
|
|
77
87
|
@latest_tool_call_id = tool_call.id
|
|
78
88
|
else
|
|
79
89
|
existing = @tool_calls[@latest_tool_call_id]
|
|
80
|
-
|
|
90
|
+
if existing
|
|
91
|
+
existing.arguments << tool_call.arguments
|
|
92
|
+
if tool_call.thought_signature && existing.thought_signature.nil?
|
|
93
|
+
existing.thought_signature = tool_call.thought_signature
|
|
94
|
+
end
|
|
95
|
+
end
|
|
81
96
|
end
|
|
82
97
|
end
|
|
83
98
|
end
|
|
@@ -96,6 +111,88 @@ module RubyLLM
|
|
|
96
111
|
@output_tokens = chunk.output_tokens if chunk.output_tokens
|
|
97
112
|
@cached_tokens = chunk.cached_tokens if chunk.cached_tokens
|
|
98
113
|
@cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
|
|
114
|
+
@thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def handle_chunk_content(chunk)
|
|
118
|
+
return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
|
|
119
|
+
|
|
120
|
+
content_text = chunk.content || ''
|
|
121
|
+
if content_text.is_a?(String)
|
|
122
|
+
append_text_with_thinking(content_text)
|
|
123
|
+
else
|
|
124
|
+
@content << content_text.to_s
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def append_text_with_thinking(text)
|
|
129
|
+
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
130
|
+
@content << content_chunk
|
|
131
|
+
@thinking_text << thinking_chunk if thinking_chunk
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def append_thinking_from_chunk(chunk)
|
|
135
|
+
thinking = chunk.thinking
|
|
136
|
+
return unless thinking
|
|
137
|
+
|
|
138
|
+
@thinking_text << thinking.text.to_s if thinking.text
|
|
139
|
+
@thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def extract_think_tags(text)
|
|
143
|
+
start_tag = '<think>'
|
|
144
|
+
end_tag = '</think>'
|
|
145
|
+
remaining = @pending_think_tag + text
|
|
146
|
+
@pending_think_tag = +''
|
|
147
|
+
|
|
148
|
+
output = +''
|
|
149
|
+
thinking = +''
|
|
150
|
+
|
|
151
|
+
until remaining.empty?
|
|
152
|
+
remaining = if @inside_think_tag
|
|
153
|
+
consume_think_content(remaining, end_tag, thinking)
|
|
154
|
+
else
|
|
155
|
+
consume_non_think_content(remaining, start_tag, output)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
[output, thinking.empty? ? nil : thinking]
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def consume_think_content(remaining, end_tag, thinking)
|
|
163
|
+
end_index = remaining.index(end_tag)
|
|
164
|
+
if end_index
|
|
165
|
+
thinking << remaining.slice(0, end_index)
|
|
166
|
+
@inside_think_tag = false
|
|
167
|
+
remaining.slice((end_index + end_tag.length)..) || +''
|
|
168
|
+
else
|
|
169
|
+
suffix_len = longest_suffix_prefix(remaining, end_tag)
|
|
170
|
+
thinking << remaining.slice(0, remaining.length - suffix_len)
|
|
171
|
+
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
172
|
+
+''
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def consume_non_think_content(remaining, start_tag, output)
|
|
177
|
+
start_index = remaining.index(start_tag)
|
|
178
|
+
if start_index
|
|
179
|
+
output << remaining.slice(0, start_index)
|
|
180
|
+
@inside_think_tag = true
|
|
181
|
+
remaining.slice((start_index + start_tag.length)..) || +''
|
|
182
|
+
else
|
|
183
|
+
suffix_len = longest_suffix_prefix(remaining, start_tag)
|
|
184
|
+
output << remaining.slice(0, remaining.length - suffix_len)
|
|
185
|
+
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
186
|
+
+''
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def longest_suffix_prefix(text, tag)
|
|
191
|
+
max = [text.length, tag.length - 1].min
|
|
192
|
+
max.downto(1) do |len|
|
|
193
|
+
return len if text.end_with?(tag[0, len])
|
|
194
|
+
end
|
|
195
|
+
0
|
|
99
196
|
end
|
|
100
197
|
end
|
|
101
198
|
end
|
data/lib/ruby_llm/streaming.rb
CHANGED
|
@@ -29,7 +29,7 @@ module RubyLLM
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def handle_stream(&block)
|
|
32
|
-
|
|
32
|
+
build_on_data_handler do |data|
|
|
33
33
|
block.call(build_chunk(data)) if data
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -40,19 +40,15 @@ module RubyLLM
|
|
|
40
40
|
Faraday::VERSION.start_with?('1')
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
-
def
|
|
43
|
+
def build_on_data_handler(&handler)
|
|
44
44
|
buffer = +''
|
|
45
45
|
parser = EventStreamParser::Parser.new
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
legacy_stream_processor(parser, &)
|
|
53
|
-
else
|
|
54
|
-
stream_processor(parser, buffer, &)
|
|
55
|
-
end
|
|
47
|
+
FaradayHandlers.build(
|
|
48
|
+
faraday_v1: faraday_1?,
|
|
49
|
+
on_chunk: ->(chunk, env) { process_stream_chunk(chunk, parser, env, &handler) },
|
|
50
|
+
on_failed_response: ->(chunk, env) { handle_failed_response(chunk, buffer, env) }
|
|
51
|
+
)
|
|
56
52
|
end
|
|
57
53
|
|
|
58
54
|
def process_stream_chunk(chunk, parser, env, &)
|
|
@@ -67,22 +63,6 @@ module RubyLLM
|
|
|
67
63
|
end
|
|
68
64
|
end
|
|
69
65
|
|
|
70
|
-
def legacy_stream_processor(parser, &block)
|
|
71
|
-
proc do |chunk, _size|
|
|
72
|
-
process_stream_chunk(chunk, parser, nil, &block)
|
|
73
|
-
end
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def stream_processor(parser, buffer, &block)
|
|
77
|
-
proc do |chunk, _bytes, env|
|
|
78
|
-
if env&.status == 200
|
|
79
|
-
process_stream_chunk(chunk, parser, env, &block)
|
|
80
|
-
else
|
|
81
|
-
handle_failed_response(chunk, buffer, env)
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
66
|
def error_chunk?(chunk)
|
|
87
67
|
chunk.start_with?('event: error')
|
|
88
68
|
end
|
|
@@ -92,30 +72,18 @@ module RubyLLM
|
|
|
92
72
|
end
|
|
93
73
|
|
|
94
74
|
def handle_json_error_chunk(chunk, env)
|
|
95
|
-
|
|
96
|
-
status, _message = parse_streaming_error(parsed_data.to_json)
|
|
97
|
-
error_response = build_stream_error_response(parsed_data, env, status)
|
|
98
|
-
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
99
|
-
rescue JSON::ParserError => e
|
|
100
|
-
RubyLLM.logger.debug "Failed to parse JSON error chunk: #{e.message}"
|
|
75
|
+
parse_error_from_json(chunk, env, 'Failed to parse JSON error chunk')
|
|
101
76
|
end
|
|
102
77
|
|
|
103
78
|
def handle_error_chunk(chunk, env)
|
|
104
79
|
error_data = chunk.split("\n")[1].delete_prefix('data: ')
|
|
105
|
-
|
|
106
|
-
status, _message = parse_streaming_error(parsed_data.to_json)
|
|
107
|
-
error_response = build_stream_error_response(parsed_data, env, status)
|
|
108
|
-
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
109
|
-
rescue JSON::ParserError => e
|
|
110
|
-
RubyLLM.logger.debug "Failed to parse error chunk: #{e.message}"
|
|
80
|
+
parse_error_from_json(error_data, env, 'Failed to parse error chunk')
|
|
111
81
|
end
|
|
112
82
|
|
|
113
83
|
def handle_failed_response(chunk, buffer, env)
|
|
114
84
|
buffer << chunk
|
|
115
85
|
error_data = JSON.parse(buffer)
|
|
116
|
-
|
|
117
|
-
error_response = env.merge(body: error_data, status: status || env.status)
|
|
118
|
-
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
86
|
+
handle_parsed_error(error_data, env)
|
|
119
87
|
rescue JSON::ParserError
|
|
120
88
|
RubyLLM.logger.debug "Accumulating error chunk: #{chunk}"
|
|
121
89
|
end
|
|
@@ -135,20 +103,13 @@ module RubyLLM
|
|
|
135
103
|
parsed = JSON.parse(data)
|
|
136
104
|
return parsed unless parsed.is_a?(Hash) && parsed.key?('error')
|
|
137
105
|
|
|
138
|
-
|
|
139
|
-
error_response = build_stream_error_response(parsed, env, status)
|
|
140
|
-
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
106
|
+
handle_parsed_error(parsed, env)
|
|
141
107
|
rescue JSON::ParserError => e
|
|
142
108
|
RubyLLM.logger.debug "Failed to parse data chunk: #{e.message}"
|
|
143
109
|
end
|
|
144
110
|
|
|
145
111
|
def handle_error_event(data, env)
|
|
146
|
-
|
|
147
|
-
status, _message = parse_streaming_error(parsed_data.to_json)
|
|
148
|
-
error_response = build_stream_error_response(parsed_data, env, status)
|
|
149
|
-
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
150
|
-
rescue JSON::ParserError => e
|
|
151
|
-
RubyLLM.logger.debug "Failed to parse error event: #{e.message}"
|
|
112
|
+
parse_error_from_json(data, env, 'Failed to parse error event')
|
|
152
113
|
end
|
|
153
114
|
|
|
154
115
|
def parse_streaming_error(data)
|
|
@@ -159,6 +120,19 @@ module RubyLLM
|
|
|
159
120
|
[500, "Failed to parse error: #{data}"]
|
|
160
121
|
end
|
|
161
122
|
|
|
123
|
+
def handle_parsed_error(parsed_data, env)
|
|
124
|
+
status, _message = parse_streaming_error(parsed_data.to_json)
|
|
125
|
+
error_response = build_stream_error_response(parsed_data, env, status)
|
|
126
|
+
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def parse_error_from_json(data, env, error_message)
|
|
130
|
+
parsed_data = JSON.parse(data)
|
|
131
|
+
handle_parsed_error(parsed_data, env)
|
|
132
|
+
rescue JSON::ParserError => e
|
|
133
|
+
RubyLLM.logger.debug "#{error_message}: #{e.message}"
|
|
134
|
+
end
|
|
135
|
+
|
|
162
136
|
def build_stream_error_response(parsed_data, env, status)
|
|
163
137
|
error_status = status || env&.status || 500
|
|
164
138
|
|
|
@@ -168,5 +142,34 @@ module RubyLLM
|
|
|
168
142
|
env.merge(body: parsed_data, status: error_status)
|
|
169
143
|
end
|
|
170
144
|
end
|
|
145
|
+
|
|
146
|
+
# Builds Faraday on_data handlers for different major versions.
|
|
147
|
+
module FaradayHandlers
|
|
148
|
+
module_function
|
|
149
|
+
|
|
150
|
+
def build(faraday_v1:, on_chunk:, on_failed_response:)
|
|
151
|
+
if faraday_v1
|
|
152
|
+
v1_on_data(on_chunk)
|
|
153
|
+
else
|
|
154
|
+
v2_on_data(on_chunk, on_failed_response)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def v1_on_data(on_chunk)
|
|
159
|
+
proc do |chunk, _size|
|
|
160
|
+
on_chunk.call(chunk, nil)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def v2_on_data(on_chunk, on_failed_response)
|
|
165
|
+
proc do |chunk, _bytes, env|
|
|
166
|
+
if env&.status == 200
|
|
167
|
+
on_chunk.call(chunk, env)
|
|
168
|
+
else
|
|
169
|
+
on_failed_response.call(chunk, env)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
171
174
|
end
|
|
172
175
|
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
# Represents provider thinking output.
|
|
5
|
+
class Thinking
|
|
6
|
+
attr_reader :text, :signature
|
|
7
|
+
|
|
8
|
+
def initialize(text: nil, signature: nil)
|
|
9
|
+
@text = text
|
|
10
|
+
@signature = signature
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.build(text: nil, signature: nil)
|
|
14
|
+
text = nil if text.is_a?(String) && text.empty?
|
|
15
|
+
signature = nil if signature.is_a?(String) && signature.empty?
|
|
16
|
+
|
|
17
|
+
return nil if text.nil? && signature.nil?
|
|
18
|
+
|
|
19
|
+
new(text: text, signature: signature)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def pretty_print(printer)
|
|
23
|
+
printer.object_group(self) do
|
|
24
|
+
printer.breakable
|
|
25
|
+
printer.text 'text='
|
|
26
|
+
printer.pp text
|
|
27
|
+
printer.comma_breakable
|
|
28
|
+
printer.text 'signature='
|
|
29
|
+
printer.pp(signature ? '[REDACTED]' : nil)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class Thinking
|
|
35
|
+
# Normalized config for thinking across providers.
|
|
36
|
+
class Config
|
|
37
|
+
attr_reader :effort, :budget
|
|
38
|
+
|
|
39
|
+
def initialize(effort: nil, budget: nil)
|
|
40
|
+
@effort = effort.is_a?(Symbol) ? effort.to_s : effort
|
|
41
|
+
@budget = budget
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def enabled?
|
|
45
|
+
!effort.nil? || !budget.nil?
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
# Represents token usage for a response.
|
|
5
|
+
class Tokens
|
|
6
|
+
attr_reader :input, :output, :cached, :cache_creation, :thinking
|
|
7
|
+
|
|
8
|
+
# rubocop:disable Metrics/ParameterLists
|
|
9
|
+
def initialize(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
|
|
10
|
+
@input = input
|
|
11
|
+
@output = output
|
|
12
|
+
@cached = cached
|
|
13
|
+
@cache_creation = cache_creation
|
|
14
|
+
@thinking = thinking || reasoning
|
|
15
|
+
end
|
|
16
|
+
# rubocop:enable Metrics/ParameterLists
|
|
17
|
+
|
|
18
|
+
# rubocop:disable Metrics/ParameterLists
|
|
19
|
+
def self.build(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
|
|
20
|
+
return nil if [input, output, cached, cache_creation, thinking, reasoning].all?(&:nil?)
|
|
21
|
+
|
|
22
|
+
new(
|
|
23
|
+
input: input,
|
|
24
|
+
output: output,
|
|
25
|
+
cached: cached,
|
|
26
|
+
cache_creation: cache_creation,
|
|
27
|
+
thinking: thinking,
|
|
28
|
+
reasoning: reasoning
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
# rubocop:enable Metrics/ParameterLists
|
|
32
|
+
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
input_tokens: input,
|
|
36
|
+
output_tokens: output,
|
|
37
|
+
cached_tokens: cached,
|
|
38
|
+
cache_creation_tokens: cache_creation,
|
|
39
|
+
thinking_tokens: thinking
|
|
40
|
+
}.compact
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def reasoning
|
|
44
|
+
thinking
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|