ruby-pi 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,12 @@ module RubyPi
16
16
  # Authentication errors are NOT retried with the fallback since they
17
17
  # indicate a configuration problem rather than a transient failure.
18
18
  #
19
+ # Issue #23: When streaming, the Fallback now buffers deltas from the
20
+ # primary provider. If the primary fails mid-stream, the buffered deltas
21
+ # are discarded and the fallback provider streams fresh from the start.
22
+ # This prevents the consumer from seeing partial output from the primary
23
+ # concatenated with the complete output from the fallback.
24
+ #
19
25
  # @example Setting up a fallback chain
20
26
  # primary = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
21
27
  # backup = RubyPi::LLM.model(:openai, "gpt-4o")
@@ -55,6 +61,28 @@ module RubyPi
55
61
  :fallback
56
62
  end
57
63
 
64
+ # Overrides BaseProvider#complete to skip the outer retry wrapper.
65
+ #
66
+ # Without this override, Fallback inherits BaseProvider#complete which
67
+ # wraps perform_complete in a retry loop. Since perform_complete calls
68
+ # @primary.complete (which has its own retry loop) and @fallback.complete
69
+ # (also with retries), the retry layers compose multiplicatively:
70
+ # outer_retries x (primary_retries + fallback_retries)
71
+ # With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
72
+ # instead of the expected 4 + 4 = 8.
73
+ #
74
+ # This override calls perform_complete directly — no outer retry loop.
75
+ # Each inner provider handles its own retries independently.
76
+ #
77
+ # @param messages [Array<Hash>] conversation messages
78
+ # @param tools [Array<Hash>] tool/function definitions
79
+ # @param stream [Boolean] whether to enable streaming mode
80
+ # @yield [event] yields StreamEvent objects when streaming
81
+ # @return [RubyPi::LLM::Response]
82
+ def complete(messages:, tools: [], stream: false, &block)
83
+ perform_complete(messages: messages, tools: tools, stream: stream, &block)
84
+ end
85
+
58
86
  private
59
87
 
60
88
  # Attempts the completion with the primary provider. If it fails with
@@ -62,12 +90,36 @@ module RubyPi
62
90
  # the request is retried with the fallback provider. Authentication errors
63
91
  # propagate immediately since they indicate misconfiguration.
64
92
  #
93
+ # Each inner provider handles its own retries via BaseProvider#complete,
94
+ # so this method does NOT add an additional retry layer.
95
+ #
96
+ # Issue #23 + Issue #12: When streaming with a block, events are
97
+ # delivered to the consumer in real-time (no buffering). If the
98
+ # primary fails mid-stream, a :fallback_start event is emitted
99
+ # so the consumer can clear partial state, then the fallback
100
+ # provider streams directly to the consumer.
101
+ #
65
102
  # @param messages [Array<Hash>] conversation messages
66
103
  # @param tools [Array<Hash>] tool definitions
67
104
  # @param stream [Boolean] streaming mode flag
68
105
  # @yield [event] optional block for streaming events
69
106
  # @return [RubyPi::LLM::Response]
70
107
  def perform_complete(messages:, tools:, stream:, &block)
108
+ if stream && block_given?
109
+ perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
110
+ else
111
+ perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
112
+ end
113
+ end
114
+
115
+ # Non-streaming fallback — simple try primary, rescue, try fallback.
116
+ #
117
+ # @param messages [Array<Hash>] conversation messages
118
+ # @param tools [Array<Hash>] tool definitions
119
+ # @param stream [Boolean] streaming mode flag
120
+ # @yield [event] optional block for streaming events
121
+ # @return [RubyPi::LLM::Response]
122
+ def perform_complete_without_streaming(messages:, tools:, stream:, &block)
71
123
  @primary.complete(messages: messages, tools: tools, stream: stream, &block)
72
124
  rescue RubyPi::AuthenticationError
73
125
  # Configuration errors should not trigger fallback
@@ -77,12 +129,65 @@ module RubyPi
77
129
  @fallback.complete(messages: messages, tools: tools, stream: stream, &block)
78
130
  end
79
131
 
132
+ # Streaming fallback with real-time event delivery.
133
+ #
134
+ # Issue #23 + Issue #12: Stream events directly to the consumer in
135
+ # real-time (no buffering on the happy path). If the primary provider
136
+ # fails mid-stream, emit a :fallback_start event so the consumer can
137
+ # reset any partial state, then stream from the fallback provider.
138
+ #
139
+ # This preserves the streaming UX: consumers see tokens as they arrive
140
+ # instead of waiting for the entire response to complete. The tradeoff
141
+ # is that on primary failure, the consumer receives a :fallback_start
142
+ # signal and is responsible for clearing partial output.
143
+ #
144
+ # @param messages [Array<Hash>] conversation messages
145
+ # @param tools [Array<Hash>] tool definitions
146
+ # @yield [event] the consumer's streaming block
147
+ # @return [RubyPi::LLM::Response]
148
+ def perform_complete_with_streaming_fallback(messages:, tools:, &block)
149
+ begin
150
+ # Stream primary events directly to the consumer for real-time UX.
151
+ # No buffering — tokens appear immediately as they arrive.
152
+ response = @primary.complete(
153
+ messages: messages,
154
+ tools: tools,
155
+ stream: true,
156
+ &block
157
+ )
158
+
159
+ response
160
+ rescue RubyPi::AuthenticationError
161
+ # Configuration errors should not trigger fallback
162
+ raise
163
+ rescue RubyPi::Error => e
164
+ log_fallback(e)
165
+
166
+ # Signal the consumer that the primary failed mid-stream and a
167
+ # fallback provider is taking over. Consumers should use this event
168
+ # to clear any partial output from the failed primary.
169
+ block.call(StreamEvent.new(type: :fallback_start, data: {
170
+ failed_provider: @primary.provider_name,
171
+ error: e.message,
172
+ fallback_provider: @fallback.provider_name
173
+ }))
174
+
175
+ # Stream directly from the fallback to the consumer's block.
176
+ @fallback.complete(
177
+ messages: messages,
178
+ tools: tools,
179
+ stream: true,
180
+ &block
181
+ )
182
+ end
183
+ end
184
+
80
185
  # Logs the fallback event if a logger is configured.
81
186
  #
82
187
  # @param error [Exception] the error that triggered the fallback
83
188
  # @return [void]
84
189
  def log_fallback(error)
85
- logger = RubyPi.configuration.logger
190
+ logger = @config.logger
86
191
  return unless logger
87
192
 
88
193
  logger.warn(
@@ -33,7 +33,7 @@ module RubyPi
33
33
  # @param options [Hash] additional options passed to BaseProvider
34
34
  def initialize(model: nil, api_key: nil, **options)
35
35
  super(**options)
36
- config = RubyPi.configuration
36
+ config = @config
37
37
  @model = model || config.default_gemini_model
38
38
  @api_key = api_key || config.gemini_api_key
39
39
  end
@@ -77,10 +77,33 @@ module RubyPi
77
77
  # @param tools [Array<Hash>] tool definitions
78
78
  # @return [Hash] the request body
79
79
  def build_request_body(messages, tools)
80
+ # Separate system messages from conversation messages. Gemini requires
81
+ # system instructions via a dedicated `systemInstruction` field — they
82
+ # cannot appear as entries in `contents`. The Loop prepends a
83
+ # { role: :system } message; we extract it here.
84
+ system_parts = []
85
+ conversation_messages = []
86
+
87
+ messages.each do |msg|
88
+ role = (msg[:role] || msg["role"]).to_s
89
+ if role == "system"
90
+ system_parts << (msg[:content] || msg["content"]).to_s
91
+ else
92
+ conversation_messages << msg
93
+ end
94
+ end
95
+
80
96
  body = {
81
- contents: messages.map { |msg| format_message(msg) }
97
+ contents: conversation_messages.map { |msg| format_message(msg) }
82
98
  }
83
99
 
100
+ # Inject system instruction when system messages are present
101
+ unless system_parts.empty?
102
+ body[:systemInstruction] = {
103
+ parts: system_parts.map { |text| { text: text } }
104
+ }
105
+ end
106
+
84
107
  unless tools.empty?
85
108
  body[:tools] = [{
86
109
  functionDeclarations: tools.map { |t| format_tool(t) }
@@ -98,8 +121,31 @@ module RubyPi
98
121
  role = message[:role]&.to_s || message["role"]&.to_s || "user"
99
122
  content = message[:content] || message["content"] || ""
100
123
 
101
- # Gemini uses "user" and "model" roles
102
- gemini_role = role == "assistant" ? "model" : role
124
+ # Gemini uses "user" and "model" roles. Map tool results to "user"
125
+ # role with a functionResponse part when we have the metadata, or
126
+ # plain text otherwise. System messages should have been extracted
127
+ # by build_request_body before reaching this method.
128
+ gemini_role = case role
129
+ when "assistant" then "model"
130
+ when "tool" then "user"
131
+ else role
132
+ end
133
+
134
+ # Tool-role messages carry function call results. When tool_call_id
135
+ # and name are present, send as a Gemini functionResponse so the
136
+ # model can correlate the result with its earlier functionCall.
137
+ tool_name = message[:name] || message["name"]
138
+ if role == "tool" && tool_name
139
+ return {
140
+ role: "user",
141
+ parts: [{
142
+ functionResponse: {
143
+ name: tool_name.to_s,
144
+ response: { result: content.to_s }
145
+ }
146
+ }]
147
+ }
148
+ end
103
149
 
104
150
  {
105
151
  role: gemini_role,
@@ -126,13 +172,31 @@ module RubyPi
126
172
  declaration
127
173
  end
128
174
 
175
+ # Returns the default HTTP headers for Gemini API requests.
176
+ #
177
+ # Issue #13: The API key is now sent via the `x-goog-api-key` header
178
+ # instead of being interpolated into the URL query string. This prevents
179
+ # the key from leaking into debug logs, backtraces, and HTTP intermediary
180
+ # logs (proxies, load balancers, etc.).
181
+ #
182
+ # @return [Hash] headers hash
183
+ def default_headers
184
+ {
185
+ "x-goog-api-key" => @api_key.to_s
186
+ }
187
+ end
188
+
129
189
  # Executes a standard (non-streaming) request to the Gemini API.
130
190
  #
191
+ # Issue #13: Removed API key from the URL query string. The key is now
192
+ # sent via the `x-goog-api-key` header (set in default_headers) to
193
+ # avoid leaking credentials into logs and backtraces.
194
+ #
131
195
  # @param body [Hash] the request body
132
196
  # @return [RubyPi::LLM::Response]
133
197
  def perform_standard_request(body)
134
- conn = build_connection(base_url: BASE_URL)
135
- url = "/#{API_VERSION}/models/#{@model}:generateContent?key=#{@api_key}"
198
+ conn = build_connection(base_url: BASE_URL, headers: default_headers)
199
+ url = "/#{API_VERSION}/models/#{@model}:generateContent"
136
200
 
137
201
  response = conn.post(url) do |req|
138
202
  req.headers["Content-Type"] = "application/json"
@@ -145,57 +209,113 @@ module RubyPi
145
209
 
146
210
  # Executes a streaming request to the Gemini API, yielding events.
147
211
  #
212
+ # Issue #13: Removed API key from the URL query string. The key is now
213
+ # sent via the `x-goog-api-key` header (set in default_headers).
214
+ #
148
215
  # @param body [Hash] the request body
149
216
  # @yield [event] StreamEvent objects
150
217
  # @return [RubyPi::LLM::Response] final aggregated response
151
218
  def perform_streaming_request(body, &block)
152
- conn = build_connection(base_url: BASE_URL)
153
- url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?key=#{@api_key}&alt=sse"
219
+ conn = build_connection(base_url: BASE_URL, headers: default_headers)
220
+ url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
154
221
 
155
222
  accumulated_text = +""
156
223
  accumulated_tool_calls = []
157
224
  usage_data = {}
225
+ finish_reason = nil
226
+
227
+ # Buffer for incomplete SSE lines across on_data chunks. Faraday's
228
+ # on_data callback delivers raw bytes as they arrive from the network,
229
+ # which may split SSE events mid-line. We accumulate a line buffer and
230
+ # process complete lines incrementally so that deltas reach the caller
231
+ # as soon as each SSE event is fully received.
232
+ sse_buffer = +""
233
+ response_status = nil
234
+ error_body = +""
158
235
 
159
236
  response = conn.post(url) do |req|
160
237
  req.headers["Content-Type"] = "application/json"
161
238
  req.body = JSON.generate(body)
162
- end
163
239
 
164
- handle_error_response(response) unless response.success?
240
+ # Use Faraday's on_data callback for real incremental streaming.
241
+ # Without this, Faraday buffers the entire response body before
242
+ # returning — no deltas reach the caller until the model finishes
243
+ # generating (fake streaming).
244
+ req.options.on_data = proc do |chunk, _overall_received_bytes, env|
245
+ response_status ||= env&.status
246
+
247
+ # If the HTTP status indicates an error, accumulate the body for
248
+ # the error handler instead of parsing it as SSE events.
249
+ if response_status && response_status >= 400
250
+ error_body << chunk
251
+ next
252
+ end
165
253
 
166
- # Parse SSE events from the response body
167
- parse_sse_events(response.body) do |data|
168
- candidates = data.dig("candidates") || []
169
- candidate = candidates.first
170
- next unless candidate
171
-
172
- parts = candidate.dig("content", "parts") || []
173
- parts.each do |part|
174
- if part.key?("text")
175
- text_chunk = part["text"]
176
- accumulated_text << text_chunk
177
- block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
178
- elsif part.key?("functionCall")
179
- fc = part["functionCall"]
180
- tool_call = ToolCall.new(
181
- id: "gemini_#{accumulated_tool_calls.length}",
182
- name: fc["name"],
183
- arguments: fc["args"] || {}
184
- )
185
- accumulated_tool_calls << tool_call
186
- block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
254
+ sse_buffer << chunk
255
+ # Process all complete lines in the buffer
256
+ while (line_end = sse_buffer.index("\n"))
257
+ line = sse_buffer.slice!(0, line_end + 1).strip
258
+ next if line.empty?
259
+ next unless line.start_with?("data: ")
260
+
261
+ data_str = line.sub(/\Adata: /, "")
262
+ next if data_str == "[DONE]"
263
+
264
+ begin
265
+ data = JSON.parse(data_str)
266
+ rescue JSON::ParserError
267
+ next
268
+ end
269
+
270
+ # Process this SSE event
271
+ candidates = data.dig("candidates") || []
272
+ candidate = candidates.first
273
+ next unless candidate
274
+
275
+ parts = candidate.dig("content", "parts") || []
276
+ parts.each do |part|
277
+ if part.key?("text")
278
+ text_chunk = part["text"]
279
+ accumulated_text << text_chunk
280
+ block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
281
+ elsif part.key?("functionCall")
282
+ fc = part["functionCall"]
283
+ tool_call = ToolCall.new(
284
+ id: "gemini_#{accumulated_tool_calls.length}",
285
+ name: fc["name"],
286
+ arguments: fc["args"] || {}
287
+ )
288
+ accumulated_tool_calls << tool_call
289
+ block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
290
+ end
291
+ end
292
+
293
+ # Parse the actual finish reason from the streaming response
294
+ # instead of hardcoding "stop". Gemini sends finishReason in
295
+ # the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
296
+ if candidate["finishReason"]
297
+ finish_reason = candidate["finishReason"].downcase
298
+ end
299
+
300
+ # Capture usage metadata if present
301
+ if data.key?("usageMetadata")
302
+ meta = data["usageMetadata"]
303
+ usage_data = {
304
+ prompt_tokens: meta["promptTokenCount"],
305
+ completion_tokens: meta["candidatesTokenCount"],
306
+ total_tokens: meta["totalTokenCount"]
307
+ }
308
+ end
187
309
  end
188
310
  end
311
+ end
189
312
 
190
- # Capture usage metadata if present
191
- if data.key?("usageMetadata")
192
- meta = data["usageMetadata"]
193
- usage_data = {
194
- prompt_tokens: meta["promptTokenCount"],
195
- completion_tokens: meta["candidatesTokenCount"],
196
- total_tokens: meta["totalTokenCount"]
197
- }
198
- end
313
+ # When on_data is active, the response body was consumed by the
314
+ # callback. Pass the accumulated error_body so ApiError carries the
315
+ # full server message instead of an empty body.
316
+ unless response.success?
317
+ error_body_str = error_body.empty? ? response.body : error_body
318
+ handle_error_response(response, override_body: error_body_str)
199
319
  end
200
320
 
201
321
  # Signal completion
@@ -205,7 +325,7 @@ module RubyPi
205
325
  content: accumulated_text.empty? ? nil : accumulated_text,
206
326
  tool_calls: accumulated_tool_calls,
207
327
  usage: usage_data,
208
- finish_reason: "stop"
328
+ finish_reason: finish_reason || "stop"
209
329
  )
210
330
  end
211
331