ruby-pi 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +62 -0
- data/README.md +77 -29
- data/lib/ruby_pi/agent/core.rb +59 -4
- data/lib/ruby_pi/agent/events.rb +17 -3
- data/lib/ruby_pi/agent/loop.rb +103 -18
- data/lib/ruby_pi/agent/result.rb +46 -7
- data/lib/ruby_pi/agent/state.rb +12 -0
- data/lib/ruby_pi/configuration.rb +28 -7
- data/lib/ruby_pi/context/compaction.rb +54 -4
- data/lib/ruby_pi/context/transform.rb +67 -3
- data/lib/ruby_pi/errors.rb +19 -1
- data/lib/ruby_pi/llm/anthropic.rb +243 -67
- data/lib/ruby_pi/llm/base_provider.rb +84 -47
- data/lib/ruby_pi/llm/fallback.rb +106 -1
- data/lib/ruby_pi/llm/gemini.rb +258 -53
- data/lib/ruby_pi/llm/openai.rb +208 -53
- data/lib/ruby_pi/llm/stream_event.rb +13 -3
- data/lib/ruby_pi/llm/tool_call.rb +26 -3
- data/lib/ruby_pi/tools/executor.rb +139 -21
- data/lib/ruby_pi/tools/registry.rb +26 -16
- data/lib/ruby_pi/version.rb +1 -1
- data/lib/ruby_pi.rb +2 -1
- metadata +6 -40
data/lib/ruby_pi/llm/fallback.rb
CHANGED
|
@@ -16,6 +16,12 @@ module RubyPi
|
|
|
16
16
|
# Authentication errors are NOT retried with the fallback since they
|
|
17
17
|
# indicate a configuration problem rather than a transient failure.
|
|
18
18
|
#
|
|
19
|
+
# Issue #23: When streaming, the Fallback now buffers deltas from the
|
|
20
|
+
# primary provider. If the primary fails mid-stream, the buffered deltas
|
|
21
|
+
# are discarded and the fallback provider streams fresh from the start.
|
|
22
|
+
# This prevents the consumer from seeing partial output from the primary
|
|
23
|
+
# concatenated with the complete output from the fallback.
|
|
24
|
+
#
|
|
19
25
|
# @example Setting up a fallback chain
|
|
20
26
|
# primary = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
|
|
21
27
|
# backup = RubyPi::LLM.model(:openai, "gpt-4o")
|
|
@@ -55,6 +61,28 @@ module RubyPi
|
|
|
55
61
|
:fallback
|
|
56
62
|
end
|
|
57
63
|
|
|
64
|
+
# Overrides BaseProvider#complete to skip the outer retry wrapper.
|
|
65
|
+
#
|
|
66
|
+
# Without this override, Fallback inherits BaseProvider#complete which
|
|
67
|
+
# wraps perform_complete in a retry loop. Since perform_complete calls
|
|
68
|
+
# @primary.complete (which has its own retry loop) and @fallback.complete
|
|
69
|
+
# (also with retries), the retry layers compose multiplicatively:
|
|
70
|
+
# outer_retries x (primary_retries + fallback_retries)
|
|
71
|
+
# With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
|
|
72
|
+
# instead of the expected 4 + 4 = 8.
|
|
73
|
+
#
|
|
74
|
+
# This override calls perform_complete directly — no outer retry loop.
|
|
75
|
+
# Each inner provider handles its own retries independently.
|
|
76
|
+
#
|
|
77
|
+
# @param messages [Array<Hash>] conversation messages
|
|
78
|
+
# @param tools [Array<Hash>] tool/function definitions
|
|
79
|
+
# @param stream [Boolean] whether to enable streaming mode
|
|
80
|
+
# @yield [event] yields StreamEvent objects when streaming
|
|
81
|
+
# @return [RubyPi::LLM::Response]
|
|
82
|
+
def complete(messages:, tools: [], stream: false, &block)
|
|
83
|
+
perform_complete(messages: messages, tools: tools, stream: stream, &block)
|
|
84
|
+
end
|
|
85
|
+
|
|
58
86
|
private
|
|
59
87
|
|
|
60
88
|
# Attempts the completion with the primary provider. If it fails with
|
|
@@ -62,12 +90,36 @@ module RubyPi
|
|
|
62
90
|
# the request is retried with the fallback provider. Authentication errors
|
|
63
91
|
# propagate immediately since they indicate misconfiguration.
|
|
64
92
|
#
|
|
93
|
+
# Each inner provider handles its own retries via BaseProvider#complete,
|
|
94
|
+
# so this method does NOT add an additional retry layer.
|
|
95
|
+
#
|
|
96
|
+
# Issue #23 + Issue #12: When streaming with a block, events are
|
|
97
|
+
# delivered to the consumer in real-time (no buffering). If the
|
|
98
|
+
# primary fails mid-stream, a :fallback_start event is emitted
|
|
99
|
+
# so the consumer can clear partial state, then the fallback
|
|
100
|
+
# provider streams directly to the consumer.
|
|
101
|
+
#
|
|
65
102
|
# @param messages [Array<Hash>] conversation messages
|
|
66
103
|
# @param tools [Array<Hash>] tool definitions
|
|
67
104
|
# @param stream [Boolean] streaming mode flag
|
|
68
105
|
# @yield [event] optional block for streaming events
|
|
69
106
|
# @return [RubyPi::LLM::Response]
|
|
70
107
|
def perform_complete(messages:, tools:, stream:, &block)
|
|
108
|
+
if stream && block_given?
|
|
109
|
+
perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
|
|
110
|
+
else
|
|
111
|
+
perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Non-streaming fallback — simple try primary, rescue, try fallback.
|
|
116
|
+
#
|
|
117
|
+
# @param messages [Array<Hash>] conversation messages
|
|
118
|
+
# @param tools [Array<Hash>] tool definitions
|
|
119
|
+
# @param stream [Boolean] streaming mode flag
|
|
120
|
+
# @yield [event] optional block for streaming events
|
|
121
|
+
# @return [RubyPi::LLM::Response]
|
|
122
|
+
def perform_complete_without_streaming(messages:, tools:, stream:, &block)
|
|
71
123
|
@primary.complete(messages: messages, tools: tools, stream: stream, &block)
|
|
72
124
|
rescue RubyPi::AuthenticationError
|
|
73
125
|
# Configuration errors should not trigger fallback
|
|
@@ -77,12 +129,65 @@ module RubyPi
|
|
|
77
129
|
@fallback.complete(messages: messages, tools: tools, stream: stream, &block)
|
|
78
130
|
end
|
|
79
131
|
|
|
132
|
+
# Streaming fallback with real-time event delivery.
|
|
133
|
+
#
|
|
134
|
+
# Issue #23 + Issue #12: Stream events directly to the consumer in
|
|
135
|
+
# real-time (no buffering on the happy path). If the primary provider
|
|
136
|
+
# fails mid-stream, emit a :fallback_start event so the consumer can
|
|
137
|
+
# reset any partial state, then stream from the fallback provider.
|
|
138
|
+
#
|
|
139
|
+
# This preserves the streaming UX: consumers see tokens as they arrive
|
|
140
|
+
# instead of waiting for the entire response to complete. The tradeoff
|
|
141
|
+
# is that on primary failure, the consumer receives a :fallback_start
|
|
142
|
+
# signal and is responsible for clearing partial output.
|
|
143
|
+
#
|
|
144
|
+
# @param messages [Array<Hash>] conversation messages
|
|
145
|
+
# @param tools [Array<Hash>] tool definitions
|
|
146
|
+
# @yield [event] the consumer's streaming block
|
|
147
|
+
# @return [RubyPi::LLM::Response]
|
|
148
|
+
def perform_complete_with_streaming_fallback(messages:, tools:, &block)
|
|
149
|
+
begin
|
|
150
|
+
# Stream primary events directly to the consumer for real-time UX.
|
|
151
|
+
# No buffering — tokens appear immediately as they arrive.
|
|
152
|
+
response = @primary.complete(
|
|
153
|
+
messages: messages,
|
|
154
|
+
tools: tools,
|
|
155
|
+
stream: true,
|
|
156
|
+
&block
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
response
|
|
160
|
+
rescue RubyPi::AuthenticationError
|
|
161
|
+
# Configuration errors should not trigger fallback
|
|
162
|
+
raise
|
|
163
|
+
rescue RubyPi::Error => e
|
|
164
|
+
log_fallback(e)
|
|
165
|
+
|
|
166
|
+
# Signal the consumer that the primary failed mid-stream and a
|
|
167
|
+
# fallback provider is taking over. Consumers should use this event
|
|
168
|
+
# to clear any partial output from the failed primary.
|
|
169
|
+
block.call(StreamEvent.new(type: :fallback_start, data: {
|
|
170
|
+
failed_provider: @primary.provider_name,
|
|
171
|
+
error: e.message,
|
|
172
|
+
fallback_provider: @fallback.provider_name
|
|
173
|
+
}))
|
|
174
|
+
|
|
175
|
+
# Stream directly from the fallback to the consumer's block.
|
|
176
|
+
@fallback.complete(
|
|
177
|
+
messages: messages,
|
|
178
|
+
tools: tools,
|
|
179
|
+
stream: true,
|
|
180
|
+
&block
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
80
185
|
# Logs the fallback event if a logger is configured.
|
|
81
186
|
#
|
|
82
187
|
# @param error [Exception] the error that triggered the fallback
|
|
83
188
|
# @return [void]
|
|
84
189
|
def log_fallback(error)
|
|
85
|
-
logger =
|
|
190
|
+
logger = @config.logger
|
|
86
191
|
return unless logger
|
|
87
192
|
|
|
88
193
|
logger.warn(
|
data/lib/ruby_pi/llm/gemini.rb
CHANGED
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
# the Gemini REST API for both synchronous and streaming completions, including
|
|
7
7
|
# tool/function calling support.
|
|
8
8
|
|
|
9
|
+
require "securerandom"
|
|
10
|
+
|
|
9
11
|
module RubyPi
|
|
10
12
|
module LLM
|
|
11
13
|
# Google Gemini provider implementation. Communicates with the Gemini
|
|
@@ -33,7 +35,7 @@ module RubyPi
|
|
|
33
35
|
# @param options [Hash] additional options passed to BaseProvider
|
|
34
36
|
def initialize(model: nil, api_key: nil, **options)
|
|
35
37
|
super(**options)
|
|
36
|
-
config =
|
|
38
|
+
config = @config
|
|
37
39
|
@model = model || config.default_gemini_model
|
|
38
40
|
@api_key = api_key || config.gemini_api_key
|
|
39
41
|
end
|
|
@@ -77,10 +79,33 @@ module RubyPi
|
|
|
77
79
|
# @param tools [Array<Hash>] tool definitions
|
|
78
80
|
# @return [Hash] the request body
|
|
79
81
|
def build_request_body(messages, tools)
|
|
82
|
+
# Separate system messages from conversation messages. Gemini requires
|
|
83
|
+
# system instructions via a dedicated `systemInstruction` field — they
|
|
84
|
+
# cannot appear as entries in `contents`. The Loop prepends a
|
|
85
|
+
# { role: :system } message; we extract it here.
|
|
86
|
+
system_parts = []
|
|
87
|
+
conversation_messages = []
|
|
88
|
+
|
|
89
|
+
messages.each do |msg|
|
|
90
|
+
role = (msg[:role] || msg["role"]).to_s
|
|
91
|
+
if role == "system"
|
|
92
|
+
system_parts << (msg[:content] || msg["content"]).to_s
|
|
93
|
+
else
|
|
94
|
+
conversation_messages << msg
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
80
98
|
body = {
|
|
81
|
-
contents:
|
|
99
|
+
contents: conversation_messages.map { |msg| format_message(msg) }
|
|
82
100
|
}
|
|
83
101
|
|
|
102
|
+
# Inject system instruction when system messages are present
|
|
103
|
+
unless system_parts.empty?
|
|
104
|
+
body[:systemInstruction] = {
|
|
105
|
+
parts: system_parts.map { |text| { text: text } }
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
84
109
|
unless tools.empty?
|
|
85
110
|
body[:tools] = [{
|
|
86
111
|
functionDeclarations: tools.map { |t| format_tool(t) }
|
|
@@ -92,21 +117,116 @@ module RubyPi
|
|
|
92
117
|
|
|
93
118
|
# Converts a normalized message hash to Gemini's content format.
|
|
94
119
|
#
|
|
120
|
+
# Critically, an assistant message that carries `tool_calls` (set by
|
|
121
|
+
# the agent loop after a tool-using turn) must be rendered with one
|
|
122
|
+
# `functionCall` part per tool call. Without those parts, Gemini
|
|
123
|
+
# rejects any subsequent `functionResponse` on the next turn because
|
|
124
|
+
# the response has nothing to correlate against. Earlier versions
|
|
125
|
+
# dropped `tool_calls` here, breaking multi-turn tool use.
|
|
126
|
+
#
|
|
95
127
|
# @param message [Hash] a message with :role and :content keys
|
|
96
128
|
# @return [Hash] Gemini-formatted content object
|
|
97
129
|
def format_message(message)
|
|
98
130
|
role = message[:role]&.to_s || message["role"]&.to_s || "user"
|
|
99
|
-
content = message[:content] || message["content"]
|
|
131
|
+
content = message[:content] || message["content"]
|
|
132
|
+
|
|
133
|
+
# Tool-role messages carry function-call results. When the tool name
|
|
134
|
+
# is present, send as a Gemini functionResponse so the model can
|
|
135
|
+
# correlate the result with its earlier functionCall. System messages
|
|
136
|
+
# should have been extracted by build_request_body before reaching
|
|
137
|
+
# this method.
|
|
138
|
+
tool_name = message[:name] || message["name"]
|
|
139
|
+
if role == "tool" && tool_name
|
|
140
|
+
# Gemini's functionResponse expects a structured `response` object.
|
|
141
|
+
# Tool results are pre-serialized by the loop as either a JSON
|
|
142
|
+
# string (success) or an "Error: ..." string (failure). Try to
|
|
143
|
+
# parse JSON so the model receives structured data; fall back to
|
|
144
|
+
# wrapping the raw string under :result for plain-text content.
|
|
145
|
+
response_payload = parse_tool_response(content)
|
|
146
|
+
return {
|
|
147
|
+
role: "user",
|
|
148
|
+
parts: [{
|
|
149
|
+
functionResponse: {
|
|
150
|
+
name: tool_name.to_s,
|
|
151
|
+
response: response_payload
|
|
152
|
+
}
|
|
153
|
+
}]
|
|
154
|
+
}
|
|
155
|
+
end
|
|
100
156
|
|
|
101
|
-
#
|
|
102
|
-
|
|
157
|
+
# Assistant messages may carry `tool_calls` from a prior turn. Each
|
|
158
|
+
# one must be emitted as a `functionCall` part on the model turn so
|
|
159
|
+
# that the next turn's `functionResponse` has something to bind to.
|
|
160
|
+
if role == "assistant"
|
|
161
|
+
parts = []
|
|
162
|
+
text = content.to_s
|
|
163
|
+
parts << { text: text } unless text.empty?
|
|
164
|
+
|
|
165
|
+
tool_calls = message[:tool_calls] || message["tool_calls"]
|
|
166
|
+
if tool_calls.is_a?(Array)
|
|
167
|
+
tool_calls.each do |tc|
|
|
168
|
+
tc_name = (tc[:name] || tc["name"]).to_s
|
|
169
|
+
tc_args = tc[:arguments] || tc["arguments"] || {}
|
|
170
|
+
tc_args = parse_tool_arguments(tc_args)
|
|
171
|
+
parts << { functionCall: { name: tc_name, args: tc_args } }
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Gemini rejects an empty parts array on a model turn. If the
|
|
176
|
+
# assistant truly had no content and no tool_calls, fall back to
|
|
177
|
+
# an empty text part.
|
|
178
|
+
parts << { text: "" } if parts.empty?
|
|
179
|
+
|
|
180
|
+
return { role: "model", parts: parts }
|
|
181
|
+
end
|
|
103
182
|
|
|
104
183
|
{
|
|
105
|
-
role:
|
|
184
|
+
role: role,
|
|
106
185
|
parts: [{ text: content.to_s }]
|
|
107
186
|
}
|
|
108
187
|
end
|
|
109
188
|
|
|
189
|
+
# Best-effort parse of a tool-result string into a structured object
|
|
190
|
+
# for Gemini's `functionResponse.response`. JSON content is returned
|
|
191
|
+
# as-is (wrapped in a hash if it parsed to a non-hash); non-JSON
|
|
192
|
+
# content (e.g., "Error: ...") is wrapped under :result.
|
|
193
|
+
#
|
|
194
|
+
# @param content [String, Hash, nil]
|
|
195
|
+
# @return [Hash]
|
|
196
|
+
def parse_tool_response(content)
|
|
197
|
+
return { result: "" } if content.nil?
|
|
198
|
+
return content if content.is_a?(Hash)
|
|
199
|
+
|
|
200
|
+
str = content.to_s
|
|
201
|
+
return { result: str } if str.strip.empty?
|
|
202
|
+
|
|
203
|
+
begin
|
|
204
|
+
parsed = JSON.parse(str)
|
|
205
|
+
parsed.is_a?(Hash) ? parsed : { result: parsed }
|
|
206
|
+
rescue JSON::ParserError
|
|
207
|
+
{ result: str }
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Coerce a tool_call.arguments value (Hash, JSON string, or other)
|
|
212
|
+
# into a Hash suitable for Gemini's `functionCall.args`. Malformed
|
|
213
|
+
# or non-Hash values become an empty hash so the request is still
|
|
214
|
+
# well-formed.
|
|
215
|
+
#
|
|
216
|
+
# @param args [Hash, String, nil]
|
|
217
|
+
# @return [Hash]
|
|
218
|
+
def parse_tool_arguments(args)
|
|
219
|
+
return args if args.is_a?(Hash)
|
|
220
|
+
return {} unless args.is_a?(String) && !args.strip.empty?
|
|
221
|
+
|
|
222
|
+
begin
|
|
223
|
+
parsed = JSON.parse(args)
|
|
224
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
225
|
+
rescue JSON::ParserError
|
|
226
|
+
{}
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
110
230
|
# Converts a tool definition to Gemini's function declaration format.
|
|
111
231
|
# Accepts either a RubyPi::Tools::Definition or a plain Hash.
|
|
112
232
|
#
|
|
@@ -126,17 +246,37 @@ module RubyPi
|
|
|
126
246
|
declaration
|
|
127
247
|
end
|
|
128
248
|
|
|
249
|
+
# Returns the default HTTP headers for Gemini API requests.
|
|
250
|
+
#
|
|
251
|
+
# Issue #13: The API key is now sent via the `x-goog-api-key` header
|
|
252
|
+
# instead of being interpolated into the URL query string. This prevents
|
|
253
|
+
# the key from leaking into debug logs, backtraces, and HTTP intermediary
|
|
254
|
+
# logs (proxies, load balancers, etc.).
|
|
255
|
+
#
|
|
256
|
+
# @return [Hash] headers hash
|
|
257
|
+
def default_headers
|
|
258
|
+
{
|
|
259
|
+
"x-goog-api-key" => @api_key.to_s
|
|
260
|
+
}
|
|
261
|
+
end
|
|
262
|
+
|
|
129
263
|
# Executes a standard (non-streaming) request to the Gemini API.
|
|
130
264
|
#
|
|
265
|
+
# Issue #13: Removed API key from the URL query string. The key is now
|
|
266
|
+
# sent via the `x-goog-api-key` header (set in default_headers) to
|
|
267
|
+
# avoid leaking credentials into logs and backtraces.
|
|
268
|
+
#
|
|
131
269
|
# @param body [Hash] the request body
|
|
132
270
|
# @return [RubyPi::LLM::Response]
|
|
133
271
|
def perform_standard_request(body)
|
|
134
|
-
conn = build_connection(base_url: BASE_URL)
|
|
135
|
-
url = "/#{API_VERSION}/models/#{@model}:generateContent
|
|
272
|
+
conn = build_connection(base_url: BASE_URL, headers: default_headers)
|
|
273
|
+
url = "/#{API_VERSION}/models/#{@model}:generateContent"
|
|
136
274
|
|
|
137
|
-
response =
|
|
138
|
-
|
|
139
|
-
|
|
275
|
+
response = with_transport_errors do
|
|
276
|
+
conn.post(url) do |req|
|
|
277
|
+
req.headers["Content-Type"] = "application/json"
|
|
278
|
+
req.body = JSON.generate(body)
|
|
279
|
+
end
|
|
140
280
|
end
|
|
141
281
|
|
|
142
282
|
handle_error_response(response) unless response.success?
|
|
@@ -145,57 +285,120 @@ module RubyPi
|
|
|
145
285
|
|
|
146
286
|
# Executes a streaming request to the Gemini API, yielding events.
|
|
147
287
|
#
|
|
288
|
+
# Issue #13: Removed API key from the URL query string. The key is now
|
|
289
|
+
# sent via the `x-goog-api-key` header (set in default_headers).
|
|
290
|
+
#
|
|
148
291
|
# @param body [Hash] the request body
|
|
149
292
|
# @yield [event] StreamEvent objects
|
|
150
293
|
# @return [RubyPi::LLM::Response] final aggregated response
|
|
151
294
|
def perform_streaming_request(body, &block)
|
|
152
|
-
conn = build_connection(base_url: BASE_URL)
|
|
153
|
-
url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?
|
|
295
|
+
conn = build_connection(base_url: BASE_URL, headers: default_headers)
|
|
296
|
+
url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
|
|
154
297
|
|
|
155
298
|
accumulated_text = +""
|
|
156
299
|
accumulated_tool_calls = []
|
|
157
300
|
usage_data = {}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
|
|
301
|
+
finish_reason = nil
|
|
302
|
+
|
|
303
|
+
# Buffer for incomplete SSE lines across on_data chunks. Faraday's
|
|
304
|
+
# on_data callback delivers raw bytes as they arrive from the network,
|
|
305
|
+
# which may split SSE events mid-line. We accumulate a line buffer and
|
|
306
|
+
# process complete lines incrementally so that deltas reach the caller
|
|
307
|
+
# as soon as each SSE event is fully received.
|
|
308
|
+
sse_buffer = +""
|
|
309
|
+
response_status = nil
|
|
310
|
+
error_body = +""
|
|
311
|
+
|
|
312
|
+
response = with_transport_errors do
|
|
313
|
+
conn.post(url) do |req|
|
|
314
|
+
req.headers["Content-Type"] = "application/json"
|
|
315
|
+
req.body = JSON.generate(body)
|
|
316
|
+
|
|
317
|
+
# Use Faraday's on_data callback for real incremental streaming.
|
|
318
|
+
# Without this, Faraday buffers the entire response body before
|
|
319
|
+
# returning — no deltas reach the caller until the model finishes
|
|
320
|
+
# generating (fake streaming).
|
|
321
|
+
req.options.on_data = proc do |chunk, _overall_received_bytes, env|
|
|
322
|
+
response_status ||= env&.status
|
|
323
|
+
|
|
324
|
+
# If the HTTP status indicates an error, accumulate the body for
|
|
325
|
+
# the error handler instead of parsing it as SSE events.
|
|
326
|
+
if response_status && response_status >= 400
|
|
327
|
+
error_body << chunk
|
|
328
|
+
next
|
|
187
329
|
end
|
|
188
|
-
end
|
|
189
330
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
331
|
+
sse_buffer << chunk
|
|
332
|
+
# Process all complete lines in the buffer
|
|
333
|
+
while (line_end = sse_buffer.index("\n"))
|
|
334
|
+
line = sse_buffer.slice!(0, line_end + 1).strip
|
|
335
|
+
next if line.empty?
|
|
336
|
+
next unless line.start_with?("data: ")
|
|
337
|
+
|
|
338
|
+
data_str = line.sub(/\Adata: /, "")
|
|
339
|
+
next if data_str == "[DONE]"
|
|
340
|
+
|
|
341
|
+
begin
|
|
342
|
+
data = JSON.parse(data_str)
|
|
343
|
+
rescue JSON::ParserError
|
|
344
|
+
next
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Process this SSE event
|
|
348
|
+
candidates = data.dig("candidates") || []
|
|
349
|
+
candidate = candidates.first
|
|
350
|
+
next unless candidate
|
|
351
|
+
|
|
352
|
+
parts = candidate.dig("content", "parts") || []
|
|
353
|
+
parts.each do |part|
|
|
354
|
+
if part.key?("text")
|
|
355
|
+
text_chunk = part["text"]
|
|
356
|
+
accumulated_text << text_chunk
|
|
357
|
+
block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
|
|
358
|
+
elsif part.key?("functionCall")
|
|
359
|
+
fc = part["functionCall"]
|
|
360
|
+
tool_call = ToolCall.new(
|
|
361
|
+
# Generate a globally-unique ID per tool call. A simple
|
|
362
|
+
# length-based counter ("gemini_0", "gemini_1") collides
|
|
363
|
+
# across turns since each response restarts numbering at
|
|
364
|
+
# 0, breaking any caller that uses ID as a hash key for
|
|
365
|
+
# observability or result correlation.
|
|
366
|
+
id: "gemini_#{SecureRandom.hex(8)}",
|
|
367
|
+
name: fc["name"],
|
|
368
|
+
arguments: fc["args"] || {}
|
|
369
|
+
)
|
|
370
|
+
accumulated_tool_calls << tool_call
|
|
371
|
+
block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Parse the actual finish reason from the streaming response
|
|
376
|
+
# instead of hardcoding "stop". Gemini sends finishReason in
|
|
377
|
+
# the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
|
|
378
|
+
if candidate["finishReason"]
|
|
379
|
+
finish_reason = candidate["finishReason"].downcase
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# Capture usage metadata if present
|
|
383
|
+
if data.key?("usageMetadata")
|
|
384
|
+
meta = data["usageMetadata"]
|
|
385
|
+
usage_data = {
|
|
386
|
+
prompt_tokens: meta["promptTokenCount"],
|
|
387
|
+
completion_tokens: meta["candidatesTokenCount"],
|
|
388
|
+
total_tokens: meta["totalTokenCount"]
|
|
389
|
+
}
|
|
390
|
+
end
|
|
391
|
+
end
|
|
198
392
|
end
|
|
393
|
+
end # conn.post
|
|
394
|
+
end # with_transport_errors
|
|
395
|
+
|
|
396
|
+
# When on_data is active, the response body was consumed by the
|
|
397
|
+
# callback. Pass the accumulated error_body so ApiError carries the
|
|
398
|
+
# full server message instead of an empty body.
|
|
399
|
+
unless response.success?
|
|
400
|
+
error_body_str = error_body.empty? ? response.body : error_body
|
|
401
|
+
handle_error_response(response, override_body: error_body_str)
|
|
199
402
|
end
|
|
200
403
|
|
|
201
404
|
# Signal completion
|
|
@@ -205,7 +408,7 @@ module RubyPi
|
|
|
205
408
|
content: accumulated_text.empty? ? nil : accumulated_text,
|
|
206
409
|
tool_calls: accumulated_tool_calls,
|
|
207
410
|
usage: usage_data,
|
|
208
|
-
finish_reason: "stop"
|
|
411
|
+
finish_reason: finish_reason || "stop"
|
|
209
412
|
)
|
|
210
413
|
end
|
|
211
414
|
|
|
@@ -227,7 +430,9 @@ module RubyPi
|
|
|
227
430
|
elsif part.key?("functionCall")
|
|
228
431
|
fc = part["functionCall"]
|
|
229
432
|
tool_calls << ToolCall.new(
|
|
230
|
-
|
|
433
|
+
# See note in perform_streaming_request: per-response counters
|
|
434
|
+
# collide across turns, so we generate a globally-unique ID.
|
|
435
|
+
id: "gemini_#{SecureRandom.hex(8)}",
|
|
231
436
|
name: fc["name"],
|
|
232
437
|
arguments: fc["args"] || {}
|
|
233
438
|
)
|