RubyGems - ruby-pi - Versions diffs - 0.1.3 → 0.1.5 - Mend

ruby-pi 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +51 -0
data/README.md +77 -29
data/lib/ruby_pi/agent/core.rb +59 -4
data/lib/ruby_pi/agent/events.rb +17 -3
data/lib/ruby_pi/agent/loop.rb +103 -18
data/lib/ruby_pi/agent/result.rb +46 -7
data/lib/ruby_pi/agent/state.rb +12 -0
data/lib/ruby_pi/configuration.rb +28 -7
data/lib/ruby_pi/context/compaction.rb +17 -2
data/lib/ruby_pi/context/transform.rb +67 -3
data/lib/ruby_pi/errors.rb +19 -1
data/lib/ruby_pi/llm/anthropic.rb +231 -59
data/lib/ruby_pi/llm/base_provider.rb +44 -46
data/lib/ruby_pi/llm/fallback.rb +106 -1
data/lib/ruby_pi/llm/gemini.rb +161 -41
data/lib/ruby_pi/llm/openai.rb +173 -42
data/lib/ruby_pi/llm/stream_event.rb +13 -3
data/lib/ruby_pi/llm/tool_call.rb +26 -3
data/lib/ruby_pi/tools/executor.rb +130 -21
data/lib/ruby_pi/tools/registry.rb +26 -16
data/lib/ruby_pi/version.rb +1 -1
data/lib/ruby_pi.rb +2 -1
metadata +5 -39

data/lib/ruby_pi/llm/fallback.rb CHANGED Viewed

@@ -16,6 +16,12 @@ module RubyPi
     # Authentication errors are NOT retried with the fallback since they
     # indicate a configuration problem rather than a transient failure.
     #
+    # Issue #23: When streaming, the Fallback now buffers deltas from the
+    # primary provider. If the primary fails mid-stream, the buffered deltas
+    # are discarded and the fallback provider streams fresh from the start.
+    # This prevents the consumer from seeing partial output from the primary
+    # concatenated with the complete output from the fallback.
+    #
     # @example Setting up a fallback chain
     #   primary  = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
     #   backup   = RubyPi::LLM.model(:openai, "gpt-4o")
@@ -55,6 +61,28 @@ module RubyPi
         :fallback
       end
+      # Overrides BaseProvider#complete to skip the outer retry wrapper.
+      #
+      # Without this override, Fallback inherits BaseProvider#complete which
+      # wraps perform_complete in a retry loop. Since perform_complete calls
+      # @primary.complete (which has its own retry loop) and @fallback.complete
+      # (also with retries), the retry layers compose multiplicatively:
+      #   outer_retries x (primary_retries + fallback_retries)
+      # With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
+      # instead of the expected 4 + 4 = 8.
+      #
+      # This override calls perform_complete directly — no outer retry loop.
+      # Each inner provider handles its own retries independently.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool/function definitions
+      # @param stream [Boolean] whether to enable streaming mode
+      # @yield [event] yields StreamEvent objects when streaming
+      # @return [RubyPi::LLM::Response]
+      def complete(messages:, tools: [], stream: false, &block)
+        perform_complete(messages: messages, tools: tools, stream: stream, &block)
+      end
       private
       # Attempts the completion with the primary provider. If it fails with
@@ -62,12 +90,36 @@ module RubyPi
       # the request is retried with the fallback provider. Authentication errors
       # propagate immediately since they indicate misconfiguration.
       #
+      # Each inner provider handles its own retries via BaseProvider#complete,
+      # so this method does NOT add an additional retry layer.
+      #
+      # Issue #23 + Issue #12: When streaming with a block, events are
+      # delivered to the consumer in real-time (no buffering). If the
+      # primary fails mid-stream, a :fallback_start event is emitted
+      # so the consumer can clear partial state, then the fallback
+      # provider streams directly to the consumer.
+      #
       # @param messages [Array<Hash>] conversation messages
       # @param tools [Array<Hash>] tool definitions
       # @param stream [Boolean] streaming mode flag
       # @yield [event] optional block for streaming events
       # @return [RubyPi::LLM::Response]
       def perform_complete(messages:, tools:, stream:, &block)
+        if stream && block_given?
+          perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
+        else
+          perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
+        end
+      end
+      # Non-streaming fallback — simple try primary, rescue, try fallback.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool definitions
+      # @param stream [Boolean] streaming mode flag
+      # @yield [event] optional block for streaming events
+      # @return [RubyPi::LLM::Response]
+      def perform_complete_without_streaming(messages:, tools:, stream:, &block)
         @primary.complete(messages: messages, tools: tools, stream: stream, &block)
       rescue RubyPi::AuthenticationError
         # Configuration errors should not trigger fallback
@@ -77,12 +129,65 @@ module RubyPi
         @fallback.complete(messages: messages, tools: tools, stream: stream, &block)
       end
+      # Streaming fallback with real-time event delivery.
+      #
+      # Issue #23 + Issue #12: Stream events directly to the consumer in
+      # real-time (no buffering on the happy path). If the primary provider
+      # fails mid-stream, emit a :fallback_start event so the consumer can
+      # reset any partial state, then stream from the fallback provider.
+      #
+      # This preserves the streaming UX: consumers see tokens as they arrive
+      # instead of waiting for the entire response to complete. The tradeoff
+      # is that on primary failure, the consumer receives a :fallback_start
+      # signal and is responsible for clearing partial output.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool definitions
+      # @yield [event] the consumer's streaming block
+      # @return [RubyPi::LLM::Response]
+      def perform_complete_with_streaming_fallback(messages:, tools:, &block)
+        begin
+          # Stream primary events directly to the consumer for real-time UX.
+          # No buffering — tokens appear immediately as they arrive.
+          response = @primary.complete(
+            messages: messages,
+            tools: tools,
+            stream: true,
+            &block
+          )
+          response
+        rescue RubyPi::AuthenticationError
+          # Configuration errors should not trigger fallback
+          raise
+        rescue RubyPi::Error => e
+          log_fallback(e)
+          # Signal the consumer that the primary failed mid-stream and a
+          # fallback provider is taking over. Consumers should use this event
+          # to clear any partial output from the failed primary.
+          block.call(StreamEvent.new(type: :fallback_start, data: {
+            failed_provider: @primary.provider_name,
+            error: e.message,
+            fallback_provider: @fallback.provider_name
+          }))
+          # Stream directly from the fallback to the consumer's block.
+          @fallback.complete(
+            messages: messages,
+            tools: tools,
+            stream: true,
+            &block
+          )
+        end
+      end
       # Logs the fallback event if a logger is configured.
       #
       # @param error [Exception] the error that triggered the fallback
       # @return [void]
       def log_fallback(error)
-        logger = RubyPi.configuration.logger
+        logger = @config.logger
         return unless logger
         logger.warn(

data/lib/ruby_pi/llm/gemini.rb CHANGED Viewed

@@ -33,7 +33,7 @@ module RubyPi
       # @param options [Hash] additional options passed to BaseProvider
       def initialize(model: nil, api_key: nil, **options)
         super(**options)
-        config = RubyPi.configuration
+        config = @config
         @model = model || config.default_gemini_model
         @api_key = api_key || config.gemini_api_key
       end
@@ -77,10 +77,33 @@ module RubyPi
       # @param tools [Array<Hash>] tool definitions
       # @return [Hash] the request body
       def build_request_body(messages, tools)
+        # Separate system messages from conversation messages. Gemini requires
+        # system instructions via a dedicated `systemInstruction` field — they
+        # cannot appear as entries in `contents`. The Loop prepends a
+        # { role: :system } message; we extract it here.
+        system_parts = []
+        conversation_messages = []
+        messages.each do |msg|
+          role = (msg[:role] || msg["role"]).to_s
+          if role == "system"
+            system_parts << (msg[:content] || msg["content"]).to_s
+          else
+            conversation_messages << msg
+          end
+        end
         body = {
-          contents: messages.map { |msg| format_message(msg) }
+          contents: conversation_messages.map { |msg| format_message(msg) }
         }
+        # Inject system instruction when system messages are present
+        unless system_parts.empty?
+          body[:systemInstruction] = {
+            parts: system_parts.map { |text| { text: text } }
+          }
+        end
         unless tools.empty?
           body[:tools] = [{
             functionDeclarations: tools.map { |t| format_tool(t) }
@@ -98,8 +121,31 @@ module RubyPi
         role = message[:role]&.to_s || message["role"]&.to_s || "user"
         content = message[:content] || message["content"] || ""
-        # Gemini uses "user" and "model" roles
-        gemini_role = role == "assistant" ? "model" : role
+        # Gemini uses "user" and "model" roles. Map tool results to "user"
+        # role with a functionResponse part when we have the metadata, or
+        # plain text otherwise. System messages should have been extracted
+        # by build_request_body before reaching this method.
+        gemini_role = case role
+                      when "assistant" then "model"
+                      when "tool"      then "user"
+                      else                  role
+                      end
+        # Tool-role messages carry function call results. When tool_call_id
+        # and name are present, send as a Gemini functionResponse so the
+        # model can correlate the result with its earlier functionCall.
+        tool_name = message[:name] || message["name"]
+        if role == "tool" && tool_name
+          return {
+            role: "user",
+            parts: [{
+              functionResponse: {
+                name: tool_name.to_s,
+                response: { result: content.to_s }
+              }
+            }]
+          }
+        end
         {
           role: gemini_role,
@@ -126,13 +172,31 @@ module RubyPi
         declaration
       end
+      # Returns the default HTTP headers for Gemini API requests.
+      #
+      # Issue #13: The API key is now sent via the `x-goog-api-key` header
+      # instead of being interpolated into the URL query string. This prevents
+      # the key from leaking into debug logs, backtraces, and HTTP intermediary
+      # logs (proxies, load balancers, etc.).
+      #
+      # @return [Hash] headers hash
+      def default_headers
+        {
+          "x-goog-api-key" => @api_key.to_s
+        }
+      end
       # Executes a standard (non-streaming) request to the Gemini API.
       #
+      # Issue #13: Removed API key from the URL query string. The key is now
+      # sent via the `x-goog-api-key` header (set in default_headers) to
+      # avoid leaking credentials into logs and backtraces.
+      #
       # @param body [Hash] the request body
       # @return [RubyPi::LLM::Response]
       def perform_standard_request(body)
-        conn = build_connection(base_url: BASE_URL)
-        url = "/#{API_VERSION}/models/#{@model}:generateContent?key=#{@api_key}"
+        conn = build_connection(base_url: BASE_URL, headers: default_headers)
+        url = "/#{API_VERSION}/models/#{@model}:generateContent"
         response = conn.post(url) do |req|
           req.headers["Content-Type"] = "application/json"
@@ -145,57 +209,113 @@ module RubyPi
       # Executes a streaming request to the Gemini API, yielding events.
       #
+      # Issue #13: Removed API key from the URL query string. The key is now
+      # sent via the `x-goog-api-key` header (set in default_headers).
+      #
       # @param body [Hash] the request body
       # @yield [event] StreamEvent objects
       # @return [RubyPi::LLM::Response] final aggregated response
       def perform_streaming_request(body, &block)
-        conn = build_connection(base_url: BASE_URL)
-        url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?key=#{@api_key}&alt=sse"
+        conn = build_connection(base_url: BASE_URL, headers: default_headers)
+        url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
         accumulated_text = +""
         accumulated_tool_calls = []
         usage_data = {}
+        finish_reason = nil
+        # Buffer for incomplete SSE lines across on_data chunks. Faraday's
+        # on_data callback delivers raw bytes as they arrive from the network,
+        # which may split SSE events mid-line. We accumulate a line buffer and
+        # process complete lines incrementally so that deltas reach the caller
+        # as soon as each SSE event is fully received.
+        sse_buffer = +""
+        response_status = nil
+        error_body = +""
         response = conn.post(url) do |req|
           req.headers["Content-Type"] = "application/json"
           req.body = JSON.generate(body)
-        end
-        handle_error_response(response) unless response.success?
+          # Use Faraday's on_data callback for real incremental streaming.
+          # Without this, Faraday buffers the entire response body before
+          # returning — no deltas reach the caller until the model finishes
+          # generating (fake streaming).
+          req.options.on_data = proc do |chunk, _overall_received_bytes, env|
+            response_status ||= env&.status
+            # If the HTTP status indicates an error, accumulate the body for
+            # the error handler instead of parsing it as SSE events.
+            if response_status && response_status >= 400
+              error_body << chunk
+              next
+            end
-        # Parse SSE events from the response body
-        parse_sse_events(response.body) do |data|
-          candidates = data.dig("candidates") || []
-          candidate = candidates.first
-          next unless candidate
-          parts = candidate.dig("content", "parts") || []
-          parts.each do |part|
-            if part.key?("text")
-              text_chunk = part["text"]
-              accumulated_text << text_chunk
-              block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
-            elsif part.key?("functionCall")
-              fc = part["functionCall"]
-              tool_call = ToolCall.new(
-                id: "gemini_#{accumulated_tool_calls.length}",
-                name: fc["name"],
-                arguments: fc["args"] || {}
-              )
-              accumulated_tool_calls << tool_call
-              block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
+            sse_buffer << chunk
+            # Process all complete lines in the buffer
+            while (line_end = sse_buffer.index("\n"))
+              line = sse_buffer.slice!(0, line_end + 1).strip
+              next if line.empty?
+              next unless line.start_with?("data: ")
+              data_str = line.sub(/\Adata: /, "")
+              next if data_str == "[DONE]"
+              begin
+                data = JSON.parse(data_str)
+              rescue JSON::ParserError
+                next
+              end
+              # Process this SSE event
+              candidates = data.dig("candidates") || []
+              candidate = candidates.first
+              next unless candidate
+              parts = candidate.dig("content", "parts") || []
+              parts.each do |part|
+                if part.key?("text")
+                  text_chunk = part["text"]
+                  accumulated_text << text_chunk
+                  block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
+                elsif part.key?("functionCall")
+                  fc = part["functionCall"]
+                  tool_call = ToolCall.new(
+                    id: "gemini_#{accumulated_tool_calls.length}",
+                    name: fc["name"],
+                    arguments: fc["args"] || {}
+                  )
+                  accumulated_tool_calls << tool_call
+                  block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
+                end
+              end
+              # Parse the actual finish reason from the streaming response
+              # instead of hardcoding "stop". Gemini sends finishReason in
+              # the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
+              if candidate["finishReason"]
+                finish_reason = candidate["finishReason"].downcase
+              end
+              # Capture usage metadata if present
+              if data.key?("usageMetadata")
+                meta = data["usageMetadata"]
+                usage_data = {
+                  prompt_tokens: meta["promptTokenCount"],
+                  completion_tokens: meta["candidatesTokenCount"],
+                  total_tokens: meta["totalTokenCount"]
+                }
+              end
             end
           end
+        end
-          # Capture usage metadata if present
-          if data.key?("usageMetadata")
-            meta = data["usageMetadata"]
-            usage_data = {
-              prompt_tokens: meta["promptTokenCount"],
-              completion_tokens: meta["candidatesTokenCount"],
-              total_tokens: meta["totalTokenCount"]
-            }
-          end
+        # When on_data is active, the response body was consumed by the
+        # callback. Pass the accumulated error_body so ApiError carries the
+        # full server message instead of an empty body.
+        unless response.success?
+          error_body_str = error_body.empty? ? response.body : error_body
+          handle_error_response(response, override_body: error_body_str)
         end
         # Signal completion
@@ -205,7 +325,7 @@ module RubyPi
           content: accumulated_text.empty? ? nil : accumulated_text,
           tool_calls: accumulated_tool_calls,
           usage: usage_data,
-          finish_reason: "stop"
+          finish_reason: finish_reason || "stop"
         )
       end