ruby-pi 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +51 -0
- data/README.md +77 -29
- data/lib/ruby_pi/agent/core.rb +59 -4
- data/lib/ruby_pi/agent/events.rb +17 -3
- data/lib/ruby_pi/agent/loop.rb +103 -18
- data/lib/ruby_pi/agent/result.rb +46 -7
- data/lib/ruby_pi/agent/state.rb +12 -0
- data/lib/ruby_pi/configuration.rb +28 -7
- data/lib/ruby_pi/context/compaction.rb +17 -2
- data/lib/ruby_pi/context/transform.rb +67 -3
- data/lib/ruby_pi/errors.rb +19 -1
- data/lib/ruby_pi/llm/anthropic.rb +231 -59
- data/lib/ruby_pi/llm/base_provider.rb +44 -46
- data/lib/ruby_pi/llm/fallback.rb +106 -1
- data/lib/ruby_pi/llm/gemini.rb +161 -41
- data/lib/ruby_pi/llm/openai.rb +173 -42
- data/lib/ruby_pi/llm/stream_event.rb +13 -3
- data/lib/ruby_pi/llm/tool_call.rb +26 -3
- data/lib/ruby_pi/tools/executor.rb +130 -21
- data/lib/ruby_pi/tools/registry.rb +26 -16
- data/lib/ruby_pi/version.rb +1 -1
- data/lib/ruby_pi.rb +2 -1
- metadata +5 -39
data/lib/ruby_pi/llm/fallback.rb
CHANGED
|
@@ -16,6 +16,12 @@ module RubyPi
|
|
|
16
16
|
# Authentication errors are NOT retried with the fallback since they
|
|
17
17
|
# indicate a configuration problem rather than a transient failure.
|
|
18
18
|
#
|
|
19
|
+
# Issue #23: When streaming, the Fallback now buffers deltas from the
|
|
20
|
+
# primary provider. If the primary fails mid-stream, the buffered deltas
|
|
21
|
+
# are discarded and the fallback provider streams fresh from the start.
|
|
22
|
+
# This prevents the consumer from seeing partial output from the primary
|
|
23
|
+
# concatenated with the complete output from the fallback.
|
|
24
|
+
#
|
|
19
25
|
# @example Setting up a fallback chain
|
|
20
26
|
# primary = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
|
|
21
27
|
# backup = RubyPi::LLM.model(:openai, "gpt-4o")
|
|
@@ -55,6 +61,28 @@ module RubyPi
|
|
|
55
61
|
:fallback
|
|
56
62
|
end
|
|
57
63
|
|
|
64
|
+
# Overrides BaseProvider#complete to skip the outer retry wrapper.
|
|
65
|
+
#
|
|
66
|
+
# Without this override, Fallback inherits BaseProvider#complete which
|
|
67
|
+
# wraps perform_complete in a retry loop. Since perform_complete calls
|
|
68
|
+
# @primary.complete (which has its own retry loop) and @fallback.complete
|
|
69
|
+
# (also with retries), the retry layers compose multiplicatively:
|
|
70
|
+
# outer_retries x (primary_retries + fallback_retries)
|
|
71
|
+
# With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
|
|
72
|
+
# instead of the expected 4 + 4 = 8.
|
|
73
|
+
#
|
|
74
|
+
# This override calls perform_complete directly — no outer retry loop.
|
|
75
|
+
# Each inner provider handles its own retries independently.
|
|
76
|
+
#
|
|
77
|
+
# @param messages [Array<Hash>] conversation messages
|
|
78
|
+
# @param tools [Array<Hash>] tool/function definitions
|
|
79
|
+
# @param stream [Boolean] whether to enable streaming mode
|
|
80
|
+
# @yield [event] yields StreamEvent objects when streaming
|
|
81
|
+
# @return [RubyPi::LLM::Response]
|
|
82
|
+
def complete(messages:, tools: [], stream: false, &block)
|
|
83
|
+
perform_complete(messages: messages, tools: tools, stream: stream, &block)
|
|
84
|
+
end
|
|
85
|
+
|
|
58
86
|
private
|
|
59
87
|
|
|
60
88
|
# Attempts the completion with the primary provider. If it fails with
|
|
@@ -62,12 +90,36 @@ module RubyPi
|
|
|
62
90
|
# the request is retried with the fallback provider. Authentication errors
|
|
63
91
|
# propagate immediately since they indicate misconfiguration.
|
|
64
92
|
#
|
|
93
|
+
# Each inner provider handles its own retries via BaseProvider#complete,
|
|
94
|
+
# so this method does NOT add an additional retry layer.
|
|
95
|
+
#
|
|
96
|
+
# Issue #23 + Issue #12: When streaming with a block, events are
|
|
97
|
+
# delivered to the consumer in real-time (no buffering). If the
|
|
98
|
+
# primary fails mid-stream, a :fallback_start event is emitted
|
|
99
|
+
# so the consumer can clear partial state, then the fallback
|
|
100
|
+
# provider streams directly to the consumer.
|
|
101
|
+
#
|
|
65
102
|
# @param messages [Array<Hash>] conversation messages
|
|
66
103
|
# @param tools [Array<Hash>] tool definitions
|
|
67
104
|
# @param stream [Boolean] streaming mode flag
|
|
68
105
|
# @yield [event] optional block for streaming events
|
|
69
106
|
# @return [RubyPi::LLM::Response]
|
|
70
107
|
def perform_complete(messages:, tools:, stream:, &block)
|
|
108
|
+
if stream && block_given?
|
|
109
|
+
perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
|
|
110
|
+
else
|
|
111
|
+
perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Non-streaming fallback — simple try primary, rescue, try fallback.
|
|
116
|
+
#
|
|
117
|
+
# @param messages [Array<Hash>] conversation messages
|
|
118
|
+
# @param tools [Array<Hash>] tool definitions
|
|
119
|
+
# @param stream [Boolean] streaming mode flag
|
|
120
|
+
# @yield [event] optional block for streaming events
|
|
121
|
+
# @return [RubyPi::LLM::Response]
|
|
122
|
+
def perform_complete_without_streaming(messages:, tools:, stream:, &block)
|
|
71
123
|
@primary.complete(messages: messages, tools: tools, stream: stream, &block)
|
|
72
124
|
rescue RubyPi::AuthenticationError
|
|
73
125
|
# Configuration errors should not trigger fallback
|
|
@@ -77,12 +129,65 @@ module RubyPi
|
|
|
77
129
|
@fallback.complete(messages: messages, tools: tools, stream: stream, &block)
|
|
78
130
|
end
|
|
79
131
|
|
|
132
|
+
# Streaming fallback with real-time event delivery.
|
|
133
|
+
#
|
|
134
|
+
# Issue #23 + Issue #12: Stream events directly to the consumer in
|
|
135
|
+
# real-time (no buffering on the happy path). If the primary provider
|
|
136
|
+
# fails mid-stream, emit a :fallback_start event so the consumer can
|
|
137
|
+
# reset any partial state, then stream from the fallback provider.
|
|
138
|
+
#
|
|
139
|
+
# This preserves the streaming UX: consumers see tokens as they arrive
|
|
140
|
+
# instead of waiting for the entire response to complete. The tradeoff
|
|
141
|
+
# is that on primary failure, the consumer receives a :fallback_start
|
|
142
|
+
# signal and is responsible for clearing partial output.
|
|
143
|
+
#
|
|
144
|
+
# @param messages [Array<Hash>] conversation messages
|
|
145
|
+
# @param tools [Array<Hash>] tool definitions
|
|
146
|
+
# @yield [event] the consumer's streaming block
|
|
147
|
+
# @return [RubyPi::LLM::Response]
|
|
148
|
+
def perform_complete_with_streaming_fallback(messages:, tools:, &block)
|
|
149
|
+
begin
|
|
150
|
+
# Stream primary events directly to the consumer for real-time UX.
|
|
151
|
+
# No buffering — tokens appear immediately as they arrive.
|
|
152
|
+
response = @primary.complete(
|
|
153
|
+
messages: messages,
|
|
154
|
+
tools: tools,
|
|
155
|
+
stream: true,
|
|
156
|
+
&block
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
response
|
|
160
|
+
rescue RubyPi::AuthenticationError
|
|
161
|
+
# Configuration errors should not trigger fallback
|
|
162
|
+
raise
|
|
163
|
+
rescue RubyPi::Error => e
|
|
164
|
+
log_fallback(e)
|
|
165
|
+
|
|
166
|
+
# Signal the consumer that the primary failed mid-stream and a
|
|
167
|
+
# fallback provider is taking over. Consumers should use this event
|
|
168
|
+
# to clear any partial output from the failed primary.
|
|
169
|
+
block.call(StreamEvent.new(type: :fallback_start, data: {
|
|
170
|
+
failed_provider: @primary.provider_name,
|
|
171
|
+
error: e.message,
|
|
172
|
+
fallback_provider: @fallback.provider_name
|
|
173
|
+
}))
|
|
174
|
+
|
|
175
|
+
# Stream directly from the fallback to the consumer's block.
|
|
176
|
+
@fallback.complete(
|
|
177
|
+
messages: messages,
|
|
178
|
+
tools: tools,
|
|
179
|
+
stream: true,
|
|
180
|
+
&block
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
80
185
|
# Logs the fallback event if a logger is configured.
|
|
81
186
|
#
|
|
82
187
|
# @param error [Exception] the error that triggered the fallback
|
|
83
188
|
# @return [void]
|
|
84
189
|
def log_fallback(error)
|
|
85
|
-
logger =
|
|
190
|
+
logger = @config.logger
|
|
86
191
|
return unless logger
|
|
87
192
|
|
|
88
193
|
logger.warn(
|
data/lib/ruby_pi/llm/gemini.rb
CHANGED
|
@@ -33,7 +33,7 @@ module RubyPi
|
|
|
33
33
|
# @param options [Hash] additional options passed to BaseProvider
|
|
34
34
|
def initialize(model: nil, api_key: nil, **options)
|
|
35
35
|
super(**options)
|
|
36
|
-
config =
|
|
36
|
+
config = @config
|
|
37
37
|
@model = model || config.default_gemini_model
|
|
38
38
|
@api_key = api_key || config.gemini_api_key
|
|
39
39
|
end
|
|
@@ -77,10 +77,33 @@ module RubyPi
|
|
|
77
77
|
# @param tools [Array<Hash>] tool definitions
|
|
78
78
|
# @return [Hash] the request body
|
|
79
79
|
def build_request_body(messages, tools)
|
|
80
|
+
# Separate system messages from conversation messages. Gemini requires
|
|
81
|
+
# system instructions via a dedicated `systemInstruction` field — they
|
|
82
|
+
# cannot appear as entries in `contents`. The Loop prepends a
|
|
83
|
+
# { role: :system } message; we extract it here.
|
|
84
|
+
system_parts = []
|
|
85
|
+
conversation_messages = []
|
|
86
|
+
|
|
87
|
+
messages.each do |msg|
|
|
88
|
+
role = (msg[:role] || msg["role"]).to_s
|
|
89
|
+
if role == "system"
|
|
90
|
+
system_parts << (msg[:content] || msg["content"]).to_s
|
|
91
|
+
else
|
|
92
|
+
conversation_messages << msg
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
80
96
|
body = {
|
|
81
|
-
contents:
|
|
97
|
+
contents: conversation_messages.map { |msg| format_message(msg) }
|
|
82
98
|
}
|
|
83
99
|
|
|
100
|
+
# Inject system instruction when system messages are present
|
|
101
|
+
unless system_parts.empty?
|
|
102
|
+
body[:systemInstruction] = {
|
|
103
|
+
parts: system_parts.map { |text| { text: text } }
|
|
104
|
+
}
|
|
105
|
+
end
|
|
106
|
+
|
|
84
107
|
unless tools.empty?
|
|
85
108
|
body[:tools] = [{
|
|
86
109
|
functionDeclarations: tools.map { |t| format_tool(t) }
|
|
@@ -98,8 +121,31 @@ module RubyPi
|
|
|
98
121
|
role = message[:role]&.to_s || message["role"]&.to_s || "user"
|
|
99
122
|
content = message[:content] || message["content"] || ""
|
|
100
123
|
|
|
101
|
-
# Gemini uses "user" and "model" roles
|
|
102
|
-
|
|
124
|
+
# Gemini uses "user" and "model" roles. Map tool results to "user"
|
|
125
|
+
# role with a functionResponse part when we have the metadata, or
|
|
126
|
+
# plain text otherwise. System messages should have been extracted
|
|
127
|
+
# by build_request_body before reaching this method.
|
|
128
|
+
gemini_role = case role
|
|
129
|
+
when "assistant" then "model"
|
|
130
|
+
when "tool" then "user"
|
|
131
|
+
else role
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Tool-role messages carry function call results. When tool_call_id
|
|
135
|
+
# and name are present, send as a Gemini functionResponse so the
|
|
136
|
+
# model can correlate the result with its earlier functionCall.
|
|
137
|
+
tool_name = message[:name] || message["name"]
|
|
138
|
+
if role == "tool" && tool_name
|
|
139
|
+
return {
|
|
140
|
+
role: "user",
|
|
141
|
+
parts: [{
|
|
142
|
+
functionResponse: {
|
|
143
|
+
name: tool_name.to_s,
|
|
144
|
+
response: { result: content.to_s }
|
|
145
|
+
}
|
|
146
|
+
}]
|
|
147
|
+
}
|
|
148
|
+
end
|
|
103
149
|
|
|
104
150
|
{
|
|
105
151
|
role: gemini_role,
|
|
@@ -126,13 +172,31 @@ module RubyPi
|
|
|
126
172
|
declaration
|
|
127
173
|
end
|
|
128
174
|
|
|
175
|
+
# Returns the default HTTP headers for Gemini API requests.
|
|
176
|
+
#
|
|
177
|
+
# Issue #13: The API key is now sent via the `x-goog-api-key` header
|
|
178
|
+
# instead of being interpolated into the URL query string. This prevents
|
|
179
|
+
# the key from leaking into debug logs, backtraces, and HTTP intermediary
|
|
180
|
+
# logs (proxies, load balancers, etc.).
|
|
181
|
+
#
|
|
182
|
+
# @return [Hash] headers hash
|
|
183
|
+
def default_headers
|
|
184
|
+
{
|
|
185
|
+
"x-goog-api-key" => @api_key.to_s
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
|
|
129
189
|
# Executes a standard (non-streaming) request to the Gemini API.
|
|
130
190
|
#
|
|
191
|
+
# Issue #13: Removed API key from the URL query string. The key is now
|
|
192
|
+
# sent via the `x-goog-api-key` header (set in default_headers) to
|
|
193
|
+
# avoid leaking credentials into logs and backtraces.
|
|
194
|
+
#
|
|
131
195
|
# @param body [Hash] the request body
|
|
132
196
|
# @return [RubyPi::LLM::Response]
|
|
133
197
|
def perform_standard_request(body)
|
|
134
|
-
conn = build_connection(base_url: BASE_URL)
|
|
135
|
-
url = "/#{API_VERSION}/models/#{@model}:generateContent
|
|
198
|
+
conn = build_connection(base_url: BASE_URL, headers: default_headers)
|
|
199
|
+
url = "/#{API_VERSION}/models/#{@model}:generateContent"
|
|
136
200
|
|
|
137
201
|
response = conn.post(url) do |req|
|
|
138
202
|
req.headers["Content-Type"] = "application/json"
|
|
@@ -145,57 +209,113 @@ module RubyPi
|
|
|
145
209
|
|
|
146
210
|
# Executes a streaming request to the Gemini API, yielding events.
|
|
147
211
|
#
|
|
212
|
+
# Issue #13: Removed API key from the URL query string. The key is now
|
|
213
|
+
# sent via the `x-goog-api-key` header (set in default_headers).
|
|
214
|
+
#
|
|
148
215
|
# @param body [Hash] the request body
|
|
149
216
|
# @yield [event] StreamEvent objects
|
|
150
217
|
# @return [RubyPi::LLM::Response] final aggregated response
|
|
151
218
|
def perform_streaming_request(body, &block)
|
|
152
|
-
conn = build_connection(base_url: BASE_URL)
|
|
153
|
-
url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?
|
|
219
|
+
conn = build_connection(base_url: BASE_URL, headers: default_headers)
|
|
220
|
+
url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
|
|
154
221
|
|
|
155
222
|
accumulated_text = +""
|
|
156
223
|
accumulated_tool_calls = []
|
|
157
224
|
usage_data = {}
|
|
225
|
+
finish_reason = nil
|
|
226
|
+
|
|
227
|
+
# Buffer for incomplete SSE lines across on_data chunks. Faraday's
|
|
228
|
+
# on_data callback delivers raw bytes as they arrive from the network,
|
|
229
|
+
# which may split SSE events mid-line. We accumulate a line buffer and
|
|
230
|
+
# process complete lines incrementally so that deltas reach the caller
|
|
231
|
+
# as soon as each SSE event is fully received.
|
|
232
|
+
sse_buffer = +""
|
|
233
|
+
response_status = nil
|
|
234
|
+
error_body = +""
|
|
158
235
|
|
|
159
236
|
response = conn.post(url) do |req|
|
|
160
237
|
req.headers["Content-Type"] = "application/json"
|
|
161
238
|
req.body = JSON.generate(body)
|
|
162
|
-
end
|
|
163
239
|
|
|
164
|
-
|
|
240
|
+
# Use Faraday's on_data callback for real incremental streaming.
|
|
241
|
+
# Without this, Faraday buffers the entire response body before
|
|
242
|
+
# returning — no deltas reach the caller until the model finishes
|
|
243
|
+
# generating (fake streaming).
|
|
244
|
+
req.options.on_data = proc do |chunk, _overall_received_bytes, env|
|
|
245
|
+
response_status ||= env&.status
|
|
246
|
+
|
|
247
|
+
# If the HTTP status indicates an error, accumulate the body for
|
|
248
|
+
# the error handler instead of parsing it as SSE events.
|
|
249
|
+
if response_status && response_status >= 400
|
|
250
|
+
error_body << chunk
|
|
251
|
+
next
|
|
252
|
+
end
|
|
165
253
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
254
|
+
sse_buffer << chunk
|
|
255
|
+
# Process all complete lines in the buffer
|
|
256
|
+
while (line_end = sse_buffer.index("\n"))
|
|
257
|
+
line = sse_buffer.slice!(0, line_end + 1).strip
|
|
258
|
+
next if line.empty?
|
|
259
|
+
next unless line.start_with?("data: ")
|
|
260
|
+
|
|
261
|
+
data_str = line.sub(/\Adata: /, "")
|
|
262
|
+
next if data_str == "[DONE]"
|
|
263
|
+
|
|
264
|
+
begin
|
|
265
|
+
data = JSON.parse(data_str)
|
|
266
|
+
rescue JSON::ParserError
|
|
267
|
+
next
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Process this SSE event
|
|
271
|
+
candidates = data.dig("candidates") || []
|
|
272
|
+
candidate = candidates.first
|
|
273
|
+
next unless candidate
|
|
274
|
+
|
|
275
|
+
parts = candidate.dig("content", "parts") || []
|
|
276
|
+
parts.each do |part|
|
|
277
|
+
if part.key?("text")
|
|
278
|
+
text_chunk = part["text"]
|
|
279
|
+
accumulated_text << text_chunk
|
|
280
|
+
block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
|
|
281
|
+
elsif part.key?("functionCall")
|
|
282
|
+
fc = part["functionCall"]
|
|
283
|
+
tool_call = ToolCall.new(
|
|
284
|
+
id: "gemini_#{accumulated_tool_calls.length}",
|
|
285
|
+
name: fc["name"],
|
|
286
|
+
arguments: fc["args"] || {}
|
|
287
|
+
)
|
|
288
|
+
accumulated_tool_calls << tool_call
|
|
289
|
+
block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Parse the actual finish reason from the streaming response
|
|
294
|
+
# instead of hardcoding "stop". Gemini sends finishReason in
|
|
295
|
+
# the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
|
|
296
|
+
if candidate["finishReason"]
|
|
297
|
+
finish_reason = candidate["finishReason"].downcase
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Capture usage metadata if present
|
|
301
|
+
if data.key?("usageMetadata")
|
|
302
|
+
meta = data["usageMetadata"]
|
|
303
|
+
usage_data = {
|
|
304
|
+
prompt_tokens: meta["promptTokenCount"],
|
|
305
|
+
completion_tokens: meta["candidatesTokenCount"],
|
|
306
|
+
total_tokens: meta["totalTokenCount"]
|
|
307
|
+
}
|
|
308
|
+
end
|
|
187
309
|
end
|
|
188
310
|
end
|
|
311
|
+
end
|
|
189
312
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
total_tokens: meta["totalTokenCount"]
|
|
197
|
-
}
|
|
198
|
-
end
|
|
313
|
+
# When on_data is active, the response body was consumed by the
|
|
314
|
+
# callback. Pass the accumulated error_body so ApiError carries the
|
|
315
|
+
# full server message instead of an empty body.
|
|
316
|
+
unless response.success?
|
|
317
|
+
error_body_str = error_body.empty? ? response.body : error_body
|
|
318
|
+
handle_error_response(response, override_body: error_body_str)
|
|
199
319
|
end
|
|
200
320
|
|
|
201
321
|
# Signal completion
|
|
@@ -205,7 +325,7 @@ module RubyPi
|
|
|
205
325
|
content: accumulated_text.empty? ? nil : accumulated_text,
|
|
206
326
|
tool_calls: accumulated_tool_calls,
|
|
207
327
|
usage: usage_data,
|
|
208
|
-
finish_reason: "stop"
|
|
328
|
+
finish_reason: finish_reason || "stop"
|
|
209
329
|
)
|
|
210
330
|
end
|
|
211
331
|
|