ruby-pi 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,12 @@ module RubyPi
16
16
  # Authentication errors are NOT retried with the fallback since they
17
17
  # indicate a configuration problem rather than a transient failure.
18
18
  #
19
+ # Issue #23: When streaming, the Fallback now buffers deltas from the
20
+ # primary provider. If the primary fails mid-stream, the buffered deltas
21
+ # are discarded and the fallback provider streams fresh from the start.
22
+ # This prevents the consumer from seeing partial output from the primary
23
+ # concatenated with the complete output from the fallback.
24
+ #
19
25
  # @example Setting up a fallback chain
20
26
  # primary = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
21
27
  # backup = RubyPi::LLM.model(:openai, "gpt-4o")
@@ -55,6 +61,28 @@ module RubyPi
55
61
  :fallback
56
62
  end
57
63
 
64
+ # Overrides BaseProvider#complete to skip the outer retry wrapper.
65
+ #
66
+ # Without this override, Fallback inherits BaseProvider#complete which
67
+ # wraps perform_complete in a retry loop. Since perform_complete calls
68
+ # @primary.complete (which has its own retry loop) and @fallback.complete
69
+ # (also with retries), the retry layers compose multiplicatively:
70
+ # outer_retries x (primary_retries + fallback_retries)
71
+ # With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
72
+ # instead of the expected 4 + 4 = 8.
73
+ #
74
+ # This override calls perform_complete directly — no outer retry loop.
75
+ # Each inner provider handles its own retries independently.
76
+ #
77
+ # @param messages [Array<Hash>] conversation messages
78
+ # @param tools [Array<Hash>] tool/function definitions
79
+ # @param stream [Boolean] whether to enable streaming mode
80
+ # @yield [event] yields StreamEvent objects when streaming
81
+ # @return [RubyPi::LLM::Response]
82
+ def complete(messages:, tools: [], stream: false, &block)
83
+ perform_complete(messages: messages, tools: tools, stream: stream, &block)
84
+ end
85
+
58
86
  private
59
87
 
60
88
  # Attempts the completion with the primary provider. If it fails with
@@ -62,12 +90,36 @@ module RubyPi
62
90
  # the request is retried with the fallback provider. Authentication errors
63
91
  # propagate immediately since they indicate misconfiguration.
64
92
  #
93
+ # Each inner provider handles its own retries via BaseProvider#complete,
94
+ # so this method does NOT add an additional retry layer.
95
+ #
96
+ # Issue #23 + Issue #12: When streaming with a block, events are
97
+ # delivered to the consumer in real-time (no buffering). If the
98
+ # primary fails mid-stream, a :fallback_start event is emitted
99
+ # so the consumer can clear partial state, then the fallback
100
+ # provider streams directly to the consumer.
101
+ #
65
102
  # @param messages [Array<Hash>] conversation messages
66
103
  # @param tools [Array<Hash>] tool definitions
67
104
  # @param stream [Boolean] streaming mode flag
68
105
  # @yield [event] optional block for streaming events
69
106
  # @return [RubyPi::LLM::Response]
70
107
  def perform_complete(messages:, tools:, stream:, &block)
108
+ if stream && block_given?
109
+ perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
110
+ else
111
+ perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
112
+ end
113
+ end
114
+
115
+ # Non-streaming fallback — simple try primary, rescue, try fallback.
116
+ #
117
+ # @param messages [Array<Hash>] conversation messages
118
+ # @param tools [Array<Hash>] tool definitions
119
+ # @param stream [Boolean] streaming mode flag
120
+ # @yield [event] optional block for streaming events
121
+ # @return [RubyPi::LLM::Response]
122
+ def perform_complete_without_streaming(messages:, tools:, stream:, &block)
71
123
  @primary.complete(messages: messages, tools: tools, stream: stream, &block)
72
124
  rescue RubyPi::AuthenticationError
73
125
  # Configuration errors should not trigger fallback
@@ -77,12 +129,65 @@ module RubyPi
77
129
  @fallback.complete(messages: messages, tools: tools, stream: stream, &block)
78
130
  end
79
131
 
132
+ # Streaming fallback with real-time event delivery.
133
+ #
134
+ # Issue #23 + Issue #12: Stream events directly to the consumer in
135
+ # real-time (no buffering on the happy path). If the primary provider
136
+ # fails mid-stream, emit a :fallback_start event so the consumer can
137
+ # reset any partial state, then stream from the fallback provider.
138
+ #
139
+ # This preserves the streaming UX: consumers see tokens as they arrive
140
+ # instead of waiting for the entire response to complete. The tradeoff
141
+ # is that on primary failure, the consumer receives a :fallback_start
142
+ # signal and is responsible for clearing partial output.
143
+ #
144
+ # @param messages [Array<Hash>] conversation messages
145
+ # @param tools [Array<Hash>] tool definitions
146
+ # @yield [event] the consumer's streaming block
147
+ # @return [RubyPi::LLM::Response]
148
+ def perform_complete_with_streaming_fallback(messages:, tools:, &block)
149
+ begin
150
+ # Stream primary events directly to the consumer for real-time UX.
151
+ # No buffering — tokens appear immediately as they arrive.
152
+ response = @primary.complete(
153
+ messages: messages,
154
+ tools: tools,
155
+ stream: true,
156
+ &block
157
+ )
158
+
159
+ response
160
+ rescue RubyPi::AuthenticationError
161
+ # Configuration errors should not trigger fallback
162
+ raise
163
+ rescue RubyPi::Error => e
164
+ log_fallback(e)
165
+
166
+ # Signal the consumer that the primary failed mid-stream and a
167
+ # fallback provider is taking over. Consumers should use this event
168
+ # to clear any partial output from the failed primary.
169
+ block.call(StreamEvent.new(type: :fallback_start, data: {
170
+ failed_provider: @primary.provider_name,
171
+ error: e.message,
172
+ fallback_provider: @fallback.provider_name
173
+ }))
174
+
175
+ # Stream directly from the fallback to the consumer's block.
176
+ @fallback.complete(
177
+ messages: messages,
178
+ tools: tools,
179
+ stream: true,
180
+ &block
181
+ )
182
+ end
183
+ end
184
+
80
185
  # Logs the fallback event if a logger is configured.
81
186
  #
82
187
  # @param error [Exception] the error that triggered the fallback
83
188
  # @return [void]
84
189
  def log_fallback(error)
85
- logger = RubyPi.configuration.logger
190
+ logger = @config.logger
86
191
  return unless logger
87
192
 
88
193
  logger.warn(
@@ -6,6 +6,8 @@
6
6
  # the Gemini REST API for both synchronous and streaming completions, including
7
7
  # tool/function calling support.
8
8
 
9
+ require "securerandom"
10
+
9
11
  module RubyPi
10
12
  module LLM
11
13
  # Google Gemini provider implementation. Communicates with the Gemini
@@ -33,7 +35,7 @@ module RubyPi
33
35
  # @param options [Hash] additional options passed to BaseProvider
34
36
  def initialize(model: nil, api_key: nil, **options)
35
37
  super(**options)
36
- config = RubyPi.configuration
38
+ config = @config
37
39
  @model = model || config.default_gemini_model
38
40
  @api_key = api_key || config.gemini_api_key
39
41
  end
@@ -77,10 +79,33 @@ module RubyPi
77
79
  # @param tools [Array<Hash>] tool definitions
78
80
  # @return [Hash] the request body
79
81
  def build_request_body(messages, tools)
82
+ # Separate system messages from conversation messages. Gemini requires
83
+ # system instructions via a dedicated `systemInstruction` field — they
84
+ # cannot appear as entries in `contents`. The Loop prepends a
85
+ # { role: :system } message; we extract it here.
86
+ system_parts = []
87
+ conversation_messages = []
88
+
89
+ messages.each do |msg|
90
+ role = (msg[:role] || msg["role"]).to_s
91
+ if role == "system"
92
+ system_parts << (msg[:content] || msg["content"]).to_s
93
+ else
94
+ conversation_messages << msg
95
+ end
96
+ end
97
+
80
98
  body = {
81
- contents: messages.map { |msg| format_message(msg) }
99
+ contents: conversation_messages.map { |msg| format_message(msg) }
82
100
  }
83
101
 
102
+ # Inject system instruction when system messages are present
103
+ unless system_parts.empty?
104
+ body[:systemInstruction] = {
105
+ parts: system_parts.map { |text| { text: text } }
106
+ }
107
+ end
108
+
84
109
  unless tools.empty?
85
110
  body[:tools] = [{
86
111
  functionDeclarations: tools.map { |t| format_tool(t) }
@@ -92,21 +117,116 @@ module RubyPi
92
117
 
93
118
  # Converts a normalized message hash to Gemini's content format.
94
119
  #
120
+ # Critically, an assistant message that carries `tool_calls` (set by
121
+ # the agent loop after a tool-using turn) must be rendered with one
122
+ # `functionCall` part per tool call. Without those parts, Gemini
123
+ # rejects any subsequent `functionResponse` on the next turn because
124
+ # the response has nothing to correlate against. Earlier versions
125
+ # dropped `tool_calls` here, breaking multi-turn tool use.
126
+ #
95
127
  # @param message [Hash] a message with :role and :content keys
96
128
  # @return [Hash] Gemini-formatted content object
97
129
  def format_message(message)
98
130
  role = message[:role]&.to_s || message["role"]&.to_s || "user"
99
- content = message[:content] || message["content"] || ""
131
+ content = message[:content] || message["content"]
132
+
133
+ # Tool-role messages carry function-call results. When the tool name
134
+ # is present, send as a Gemini functionResponse so the model can
135
+ # correlate the result with its earlier functionCall. System messages
136
+ # should have been extracted by build_request_body before reaching
137
+ # this method.
138
+ tool_name = message[:name] || message["name"]
139
+ if role == "tool" && tool_name
140
+ # Gemini's functionResponse expects a structured `response` object.
141
+ # Tool results are pre-serialized by the loop as either a JSON
142
+ # string (success) or an "Error: ..." string (failure). Try to
143
+ # parse JSON so the model receives structured data; fall back to
144
+ # wrapping the raw string under :result for plain-text content.
145
+ response_payload = parse_tool_response(content)
146
+ return {
147
+ role: "user",
148
+ parts: [{
149
+ functionResponse: {
150
+ name: tool_name.to_s,
151
+ response: response_payload
152
+ }
153
+ }]
154
+ }
155
+ end
100
156
 
101
- # Gemini uses "user" and "model" roles
102
- gemini_role = role == "assistant" ? "model" : role
157
+ # Assistant messages may carry `tool_calls` from a prior turn. Each
158
+ # one must be emitted as a `functionCall` part on the model turn so
159
+ # that the next turn's `functionResponse` has something to bind to.
160
+ if role == "assistant"
161
+ parts = []
162
+ text = content.to_s
163
+ parts << { text: text } unless text.empty?
164
+
165
+ tool_calls = message[:tool_calls] || message["tool_calls"]
166
+ if tool_calls.is_a?(Array)
167
+ tool_calls.each do |tc|
168
+ tc_name = (tc[:name] || tc["name"]).to_s
169
+ tc_args = tc[:arguments] || tc["arguments"] || {}
170
+ tc_args = parse_tool_arguments(tc_args)
171
+ parts << { functionCall: { name: tc_name, args: tc_args } }
172
+ end
173
+ end
174
+
175
+ # Gemini rejects an empty parts array on a model turn. If the
176
+ # assistant truly had no content and no tool_calls, fall back to
177
+ # an empty text part.
178
+ parts << { text: "" } if parts.empty?
179
+
180
+ return { role: "model", parts: parts }
181
+ end
103
182
 
104
183
  {
105
- role: gemini_role,
184
+ role: role,
106
185
  parts: [{ text: content.to_s }]
107
186
  }
108
187
  end
109
188
 
189
+ # Best-effort parse of a tool-result string into a structured object
190
+ # for Gemini's `functionResponse.response`. JSON content is returned
191
+ # as-is (wrapped in a hash if it parsed to a non-hash); non-JSON
192
+ # content (e.g., "Error: ...") is wrapped under :result.
193
+ #
194
+ # @param content [String, Hash, nil]
195
+ # @return [Hash]
196
+ def parse_tool_response(content)
197
+ return { result: "" } if content.nil?
198
+ return content if content.is_a?(Hash)
199
+
200
+ str = content.to_s
201
+ return { result: str } if str.strip.empty?
202
+
203
+ begin
204
+ parsed = JSON.parse(str)
205
+ parsed.is_a?(Hash) ? parsed : { result: parsed }
206
+ rescue JSON::ParserError
207
+ { result: str }
208
+ end
209
+ end
210
+
211
+ # Coerce a tool_call.arguments value (Hash, JSON string, or other)
212
+ # into a Hash suitable for Gemini's `functionCall.args`. Malformed
213
+ # or non-Hash values become an empty hash so the request is still
214
+ # well-formed.
215
+ #
216
+ # @param args [Hash, String, nil]
217
+ # @return [Hash]
218
+ def parse_tool_arguments(args)
219
+ return args if args.is_a?(Hash)
220
+ return {} unless args.is_a?(String) && !args.strip.empty?
221
+
222
+ begin
223
+ parsed = JSON.parse(args)
224
+ parsed.is_a?(Hash) ? parsed : {}
225
+ rescue JSON::ParserError
226
+ {}
227
+ end
228
+ end
229
+
110
230
  # Converts a tool definition to Gemini's function declaration format.
111
231
  # Accepts either a RubyPi::Tools::Definition or a plain Hash.
112
232
  #
@@ -126,17 +246,37 @@ module RubyPi
126
246
  declaration
127
247
  end
128
248
 
249
+ # Returns the default HTTP headers for Gemini API requests.
250
+ #
251
+ # Issue #13: The API key is now sent via the `x-goog-api-key` header
252
+ # instead of being interpolated into the URL query string. This prevents
253
+ # the key from leaking into debug logs, backtraces, and HTTP intermediary
254
+ # logs (proxies, load balancers, etc.).
255
+ #
256
+ # @return [Hash] headers hash
257
+ def default_headers
258
+ {
259
+ "x-goog-api-key" => @api_key.to_s
260
+ }
261
+ end
262
+
129
263
  # Executes a standard (non-streaming) request to the Gemini API.
130
264
  #
265
+ # Issue #13: Removed API key from the URL query string. The key is now
266
+ # sent via the `x-goog-api-key` header (set in default_headers) to
267
+ # avoid leaking credentials into logs and backtraces.
268
+ #
131
269
  # @param body [Hash] the request body
132
270
  # @return [RubyPi::LLM::Response]
133
271
  def perform_standard_request(body)
134
- conn = build_connection(base_url: BASE_URL)
135
- url = "/#{API_VERSION}/models/#{@model}:generateContent?key=#{@api_key}"
272
+ conn = build_connection(base_url: BASE_URL, headers: default_headers)
273
+ url = "/#{API_VERSION}/models/#{@model}:generateContent"
136
274
 
137
- response = conn.post(url) do |req|
138
- req.headers["Content-Type"] = "application/json"
139
- req.body = JSON.generate(body)
275
+ response = with_transport_errors do
276
+ conn.post(url) do |req|
277
+ req.headers["Content-Type"] = "application/json"
278
+ req.body = JSON.generate(body)
279
+ end
140
280
  end
141
281
 
142
282
  handle_error_response(response) unless response.success?
@@ -145,57 +285,120 @@ module RubyPi
145
285
 
146
286
  # Executes a streaming request to the Gemini API, yielding events.
147
287
  #
288
+ # Issue #13: Removed API key from the URL query string. The key is now
289
+ # sent via the `x-goog-api-key` header (set in default_headers).
290
+ #
148
291
  # @param body [Hash] the request body
149
292
  # @yield [event] StreamEvent objects
150
293
  # @return [RubyPi::LLM::Response] final aggregated response
151
294
  def perform_streaming_request(body, &block)
152
- conn = build_connection(base_url: BASE_URL)
153
- url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?key=#{@api_key}&alt=sse"
295
+ conn = build_connection(base_url: BASE_URL, headers: default_headers)
296
+ url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
154
297
 
155
298
  accumulated_text = +""
156
299
  accumulated_tool_calls = []
157
300
  usage_data = {}
158
-
159
- response = conn.post(url) do |req|
160
- req.headers["Content-Type"] = "application/json"
161
- req.body = JSON.generate(body)
162
- end
163
-
164
- handle_error_response(response) unless response.success?
165
-
166
- # Parse SSE events from the response body
167
- parse_sse_events(response.body) do |data|
168
- candidates = data.dig("candidates") || []
169
- candidate = candidates.first
170
- next unless candidate
171
-
172
- parts = candidate.dig("content", "parts") || []
173
- parts.each do |part|
174
- if part.key?("text")
175
- text_chunk = part["text"]
176
- accumulated_text << text_chunk
177
- block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
178
- elsif part.key?("functionCall")
179
- fc = part["functionCall"]
180
- tool_call = ToolCall.new(
181
- id: "gemini_#{accumulated_tool_calls.length}",
182
- name: fc["name"],
183
- arguments: fc["args"] || {}
184
- )
185
- accumulated_tool_calls << tool_call
186
- block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
301
+ finish_reason = nil
302
+
303
+ # Buffer for incomplete SSE lines across on_data chunks. Faraday's
304
+ # on_data callback delivers raw bytes as they arrive from the network,
305
+ # which may split SSE events mid-line. We accumulate a line buffer and
306
+ # process complete lines incrementally so that deltas reach the caller
307
+ # as soon as each SSE event is fully received.
308
+ sse_buffer = +""
309
+ response_status = nil
310
+ error_body = +""
311
+
312
+ response = with_transport_errors do
313
+ conn.post(url) do |req|
314
+ req.headers["Content-Type"] = "application/json"
315
+ req.body = JSON.generate(body)
316
+
317
+ # Use Faraday's on_data callback for real incremental streaming.
318
+ # Without this, Faraday buffers the entire response body before
319
+ # returning — no deltas reach the caller until the model finishes
320
+ # generating (fake streaming).
321
+ req.options.on_data = proc do |chunk, _overall_received_bytes, env|
322
+ response_status ||= env&.status
323
+
324
+ # If the HTTP status indicates an error, accumulate the body for
325
+ # the error handler instead of parsing it as SSE events.
326
+ if response_status && response_status >= 400
327
+ error_body << chunk
328
+ next
187
329
  end
188
- end
189
330
 
190
- # Capture usage metadata if present
191
- if data.key?("usageMetadata")
192
- meta = data["usageMetadata"]
193
- usage_data = {
194
- prompt_tokens: meta["promptTokenCount"],
195
- completion_tokens: meta["candidatesTokenCount"],
196
- total_tokens: meta["totalTokenCount"]
197
- }
331
+ sse_buffer << chunk
332
+ # Process all complete lines in the buffer
333
+ while (line_end = sse_buffer.index("\n"))
334
+ line = sse_buffer.slice!(0, line_end + 1).strip
335
+ next if line.empty?
336
+ next unless line.start_with?("data: ")
337
+
338
+ data_str = line.sub(/\Adata: /, "")
339
+ next if data_str == "[DONE]"
340
+
341
+ begin
342
+ data = JSON.parse(data_str)
343
+ rescue JSON::ParserError
344
+ next
345
+ end
346
+
347
+ # Process this SSE event
348
+ candidates = data.dig("candidates") || []
349
+ candidate = candidates.first
350
+ next unless candidate
351
+
352
+ parts = candidate.dig("content", "parts") || []
353
+ parts.each do |part|
354
+ if part.key?("text")
355
+ text_chunk = part["text"]
356
+ accumulated_text << text_chunk
357
+ block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
358
+ elsif part.key?("functionCall")
359
+ fc = part["functionCall"]
360
+ tool_call = ToolCall.new(
361
+ # Generate a globally-unique ID per tool call. A simple
362
+ # length-based counter ("gemini_0", "gemini_1") collides
363
+ # across turns since each response restarts numbering at
364
+ # 0, breaking any caller that uses ID as a hash key for
365
+ # observability or result correlation.
366
+ id: "gemini_#{SecureRandom.hex(8)}",
367
+ name: fc["name"],
368
+ arguments: fc["args"] || {}
369
+ )
370
+ accumulated_tool_calls << tool_call
371
+ block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
372
+ end
373
+ end
374
+
375
+ # Parse the actual finish reason from the streaming response
376
+ # instead of hardcoding "stop". Gemini sends finishReason in
377
+ # the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
378
+ if candidate["finishReason"]
379
+ finish_reason = candidate["finishReason"].downcase
380
+ end
381
+
382
+ # Capture usage metadata if present
383
+ if data.key?("usageMetadata")
384
+ meta = data["usageMetadata"]
385
+ usage_data = {
386
+ prompt_tokens: meta["promptTokenCount"],
387
+ completion_tokens: meta["candidatesTokenCount"],
388
+ total_tokens: meta["totalTokenCount"]
389
+ }
390
+ end
391
+ end
198
392
  end
393
+ end # conn.post
394
+ end # with_transport_errors
395
+
396
+ # When on_data is active, the response body was consumed by the
397
+ # callback. Pass the accumulated error_body so ApiError carries the
398
+ # full server message instead of an empty body.
399
+ unless response.success?
400
+ error_body_str = error_body.empty? ? response.body : error_body
401
+ handle_error_response(response, override_body: error_body_str)
199
402
  end
200
403
 
201
404
  # Signal completion
@@ -205,7 +408,7 @@ module RubyPi
205
408
  content: accumulated_text.empty? ? nil : accumulated_text,
206
409
  tool_calls: accumulated_tool_calls,
207
410
  usage: usage_data,
208
- finish_reason: "stop"
411
+ finish_reason: finish_reason || "stop"
209
412
  )
210
413
  end
211
414
 
@@ -227,7 +430,9 @@ module RubyPi
227
430
  elsif part.key?("functionCall")
228
431
  fc = part["functionCall"]
229
432
  tool_calls << ToolCall.new(
230
- id: "gemini_#{tool_calls.length}",
433
+ # See note in perform_streaming_request: per-response counters
434
+ # collide across turns, so we generate a globally-unique ID.
435
+ id: "gemini_#{SecureRandom.hex(8)}",
231
436
  name: fc["name"],
232
437
  arguments: fc["args"] || {}
233
438
  )