RubyGems - ruby-pi - Versions diffs - 0.1.3 → 0.1.6 - Mend

ruby-pi 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +62 -0
data/README.md +77 -29
data/lib/ruby_pi/agent/core.rb +59 -4
data/lib/ruby_pi/agent/events.rb +17 -3
data/lib/ruby_pi/agent/loop.rb +103 -18
data/lib/ruby_pi/agent/result.rb +46 -7
data/lib/ruby_pi/agent/state.rb +12 -0
data/lib/ruby_pi/configuration.rb +28 -7
data/lib/ruby_pi/context/compaction.rb +54 -4
data/lib/ruby_pi/context/transform.rb +67 -3
data/lib/ruby_pi/errors.rb +19 -1
data/lib/ruby_pi/llm/anthropic.rb +243 -67
data/lib/ruby_pi/llm/base_provider.rb +84 -47
data/lib/ruby_pi/llm/fallback.rb +106 -1
data/lib/ruby_pi/llm/gemini.rb +258 -53
data/lib/ruby_pi/llm/openai.rb +208 -53
data/lib/ruby_pi/llm/stream_event.rb +13 -3
data/lib/ruby_pi/llm/tool_call.rb +26 -3
data/lib/ruby_pi/tools/executor.rb +139 -21
data/lib/ruby_pi/tools/registry.rb +26 -16
data/lib/ruby_pi/version.rb +1 -1
data/lib/ruby_pi.rb +2 -1
metadata +6 -40

data/lib/ruby_pi/llm/fallback.rb CHANGED Viewed

@@ -16,6 +16,12 @@ module RubyPi
     # Authentication errors are NOT retried with the fallback since they
     # indicate a configuration problem rather than a transient failure.
     #
+    # Issue #23: When streaming, the Fallback now buffers deltas from the
+    # primary provider. If the primary fails mid-stream, the buffered deltas
+    # are discarded and the fallback provider streams fresh from the start.
+    # This prevents the consumer from seeing partial output from the primary
+    # concatenated with the complete output from the fallback.
+    #
     # @example Setting up a fallback chain
     #   primary  = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
     #   backup   = RubyPi::LLM.model(:openai, "gpt-4o")
@@ -55,6 +61,28 @@ module RubyPi
         :fallback
       end
+      # Overrides BaseProvider#complete to skip the outer retry wrapper.
+      #
+      # Without this override, Fallback inherits BaseProvider#complete which
+      # wraps perform_complete in a retry loop. Since perform_complete calls
+      # @primary.complete (which has its own retry loop) and @fallback.complete
+      # (also with retries), the retry layers compose multiplicatively:
+      #   outer_retries x (primary_retries + fallback_retries)
+      # With default max_retries=3, that's 4 x (4 + 4) = 32 total attempts
+      # instead of the expected 4 + 4 = 8.
+      #
+      # This override calls perform_complete directly — no outer retry loop.
+      # Each inner provider handles its own retries independently.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool/function definitions
+      # @param stream [Boolean] whether to enable streaming mode
+      # @yield [event] yields StreamEvent objects when streaming
+      # @return [RubyPi::LLM::Response]
+      def complete(messages:, tools: [], stream: false, &block)
+        perform_complete(messages: messages, tools: tools, stream: stream, &block)
+      end
       private
       # Attempts the completion with the primary provider. If it fails with
@@ -62,12 +90,36 @@ module RubyPi
       # the request is retried with the fallback provider. Authentication errors
       # propagate immediately since they indicate misconfiguration.
       #
+      # Each inner provider handles its own retries via BaseProvider#complete,
+      # so this method does NOT add an additional retry layer.
+      #
+      # Issue #23 + Issue #12: When streaming with a block, events are
+      # delivered to the consumer in real-time (no buffering). If the
+      # primary fails mid-stream, a :fallback_start event is emitted
+      # so the consumer can clear partial state, then the fallback
+      # provider streams directly to the consumer.
+      #
       # @param messages [Array<Hash>] conversation messages
       # @param tools [Array<Hash>] tool definitions
       # @param stream [Boolean] streaming mode flag
       # @yield [event] optional block for streaming events
       # @return [RubyPi::LLM::Response]
       def perform_complete(messages:, tools:, stream:, &block)
+        if stream && block_given?
+          perform_complete_with_streaming_fallback(messages: messages, tools: tools, &block)
+        else
+          perform_complete_without_streaming(messages: messages, tools: tools, stream: stream, &block)
+        end
+      end
+      # Non-streaming fallback — simple try primary, rescue, try fallback.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool definitions
+      # @param stream [Boolean] streaming mode flag
+      # @yield [event] optional block for streaming events
+      # @return [RubyPi::LLM::Response]
+      def perform_complete_without_streaming(messages:, tools:, stream:, &block)
         @primary.complete(messages: messages, tools: tools, stream: stream, &block)
       rescue RubyPi::AuthenticationError
         # Configuration errors should not trigger fallback
@@ -77,12 +129,65 @@ module RubyPi
         @fallback.complete(messages: messages, tools: tools, stream: stream, &block)
       end
+      # Streaming fallback with real-time event delivery.
+      #
+      # Issue #23 + Issue #12: Stream events directly to the consumer in
+      # real-time (no buffering on the happy path). If the primary provider
+      # fails mid-stream, emit a :fallback_start event so the consumer can
+      # reset any partial state, then stream from the fallback provider.
+      #
+      # This preserves the streaming UX: consumers see tokens as they arrive
+      # instead of waiting for the entire response to complete. The tradeoff
+      # is that on primary failure, the consumer receives a :fallback_start
+      # signal and is responsible for clearing partial output.
+      #
+      # @param messages [Array<Hash>] conversation messages
+      # @param tools [Array<Hash>] tool definitions
+      # @yield [event] the consumer's streaming block
+      # @return [RubyPi::LLM::Response]
+      def perform_complete_with_streaming_fallback(messages:, tools:, &block)
+        begin
+          # Stream primary events directly to the consumer for real-time UX.
+          # No buffering — tokens appear immediately as they arrive.
+          response = @primary.complete(
+            messages: messages,
+            tools: tools,
+            stream: true,
+            &block
+          )
+          response
+        rescue RubyPi::AuthenticationError
+          # Configuration errors should not trigger fallback
+          raise
+        rescue RubyPi::Error => e
+          log_fallback(e)
+          # Signal the consumer that the primary failed mid-stream and a
+          # fallback provider is taking over. Consumers should use this event
+          # to clear any partial output from the failed primary.
+          block.call(StreamEvent.new(type: :fallback_start, data: {
+            failed_provider: @primary.provider_name,
+            error: e.message,
+            fallback_provider: @fallback.provider_name
+          }))
+          # Stream directly from the fallback to the consumer's block.
+          @fallback.complete(
+            messages: messages,
+            tools: tools,
+            stream: true,
+            &block
+          )
+        end
+      end
       # Logs the fallback event if a logger is configured.
       #
       # @param error [Exception] the error that triggered the fallback
       # @return [void]
       def log_fallback(error)
-        logger = RubyPi.configuration.logger
+        logger = @config.logger
         return unless logger
         logger.warn(

data/lib/ruby_pi/llm/gemini.rb CHANGED Viewed

@@ -6,6 +6,8 @@
 # the Gemini REST API for both synchronous and streaming completions, including
 # tool/function calling support.
+require "securerandom"
 module RubyPi
   module LLM
     # Google Gemini provider implementation. Communicates with the Gemini
@@ -33,7 +35,7 @@ module RubyPi
       # @param options [Hash] additional options passed to BaseProvider
       def initialize(model: nil, api_key: nil, **options)
         super(**options)
-        config = RubyPi.configuration
+        config = @config
         @model = model || config.default_gemini_model
         @api_key = api_key || config.gemini_api_key
       end
@@ -77,10 +79,33 @@ module RubyPi
       # @param tools [Array<Hash>] tool definitions
       # @return [Hash] the request body
       def build_request_body(messages, tools)
+        # Separate system messages from conversation messages. Gemini requires
+        # system instructions via a dedicated `systemInstruction` field — they
+        # cannot appear as entries in `contents`. The Loop prepends a
+        # { role: :system } message; we extract it here.
+        system_parts = []
+        conversation_messages = []
+        messages.each do |msg|
+          role = (msg[:role] || msg["role"]).to_s
+          if role == "system"
+            system_parts << (msg[:content] || msg["content"]).to_s
+          else
+            conversation_messages << msg
+          end
+        end
         body = {
-          contents: messages.map { |msg| format_message(msg) }
+          contents: conversation_messages.map { |msg| format_message(msg) }
         }
+        # Inject system instruction when system messages are present
+        unless system_parts.empty?
+          body[:systemInstruction] = {
+            parts: system_parts.map { |text| { text: text } }
+          }
+        end
         unless tools.empty?
           body[:tools] = [{
             functionDeclarations: tools.map { |t| format_tool(t) }
@@ -92,21 +117,116 @@ module RubyPi
       # Converts a normalized message hash to Gemini's content format.
       #
+      # Critically, an assistant message that carries `tool_calls` (set by
+      # the agent loop after a tool-using turn) must be rendered with one
+      # `functionCall` part per tool call. Without those parts, Gemini
+      # rejects any subsequent `functionResponse` on the next turn because
+      # the response has nothing to correlate against. Earlier versions
+      # dropped `tool_calls` here, breaking multi-turn tool use.
+      #
       # @param message [Hash] a message with :role and :content keys
       # @return [Hash] Gemini-formatted content object
       def format_message(message)
         role = message[:role]&.to_s || message["role"]&.to_s || "user"
-        content = message[:content] || message["content"] || ""
+        content = message[:content] || message["content"]
+        # Tool-role messages carry function-call results. When the tool name
+        # is present, send as a Gemini functionResponse so the model can
+        # correlate the result with its earlier functionCall. System messages
+        # should have been extracted by build_request_body before reaching
+        # this method.
+        tool_name = message[:name] || message["name"]
+        if role == "tool" && tool_name
+          # Gemini's functionResponse expects a structured `response` object.
+          # Tool results are pre-serialized by the loop as either a JSON
+          # string (success) or an "Error: ..." string (failure). Try to
+          # parse JSON so the model receives structured data; fall back to
+          # wrapping the raw string under :result for plain-text content.
+          response_payload = parse_tool_response(content)
+          return {
+            role: "user",
+            parts: [{
+              functionResponse: {
+                name: tool_name.to_s,
+                response: response_payload
+              }
+            }]
+          }
+        end
-        # Gemini uses "user" and "model" roles
-        gemini_role = role == "assistant" ? "model" : role
+        # Assistant messages may carry `tool_calls` from a prior turn. Each
+        # one must be emitted as a `functionCall` part on the model turn so
+        # that the next turn's `functionResponse` has something to bind to.
+        if role == "assistant"
+          parts = []
+          text = content.to_s
+          parts << { text: text } unless text.empty?
+          tool_calls = message[:tool_calls] || message["tool_calls"]
+          if tool_calls.is_a?(Array)
+            tool_calls.each do |tc|
+              tc_name = (tc[:name] || tc["name"]).to_s
+              tc_args = tc[:arguments] || tc["arguments"] || {}
+              tc_args = parse_tool_arguments(tc_args)
+              parts << { functionCall: { name: tc_name, args: tc_args } }
+            end
+          end
+          # Gemini rejects an empty parts array on a model turn. If the
+          # assistant truly had no content and no tool_calls, fall back to
+          # an empty text part.
+          parts << { text: "" } if parts.empty?
+          return { role: "model", parts: parts }
+        end
         {
-          role: gemini_role,
+          role: role,
           parts: [{ text: content.to_s }]
         }
       end
+      # Best-effort parse of a tool-result string into a structured object
+      # for Gemini's `functionResponse.response`. JSON content is returned
+      # as-is (wrapped in a hash if it parsed to a non-hash); non-JSON
+      # content (e.g., "Error: ...") is wrapped under :result.
+      #
+      # @param content [String, Hash, nil]
+      # @return [Hash]
+      def parse_tool_response(content)
+        return { result: "" } if content.nil?
+        return content if content.is_a?(Hash)
+        str = content.to_s
+        return { result: str } if str.strip.empty?
+        begin
+          parsed = JSON.parse(str)
+          parsed.is_a?(Hash) ? parsed : { result: parsed }
+        rescue JSON::ParserError
+          { result: str }
+        end
+      end
+      # Coerce a tool_call.arguments value (Hash, JSON string, or other)
+      # into a Hash suitable for Gemini's `functionCall.args`. Malformed
+      # or non-Hash values become an empty hash so the request is still
+      # well-formed.
+      #
+      # @param args [Hash, String, nil]
+      # @return [Hash]
+      def parse_tool_arguments(args)
+        return args if args.is_a?(Hash)
+        return {} unless args.is_a?(String) && !args.strip.empty?
+        begin
+          parsed = JSON.parse(args)
+          parsed.is_a?(Hash) ? parsed : {}
+        rescue JSON::ParserError
+          {}
+        end
+      end
       # Converts a tool definition to Gemini's function declaration format.
       # Accepts either a RubyPi::Tools::Definition or a plain Hash.
       #
@@ -126,17 +246,37 @@ module RubyPi
         declaration
       end
+      # Returns the default HTTP headers for Gemini API requests.
+      #
+      # Issue #13: The API key is now sent via the `x-goog-api-key` header
+      # instead of being interpolated into the URL query string. This prevents
+      # the key from leaking into debug logs, backtraces, and HTTP intermediary
+      # logs (proxies, load balancers, etc.).
+      #
+      # @return [Hash] headers hash
+      def default_headers
+        {
+          "x-goog-api-key" => @api_key.to_s
+        }
+      end
       # Executes a standard (non-streaming) request to the Gemini API.
       #
+      # Issue #13: Removed API key from the URL query string. The key is now
+      # sent via the `x-goog-api-key` header (set in default_headers) to
+      # avoid leaking credentials into logs and backtraces.
+      #
       # @param body [Hash] the request body
       # @return [RubyPi::LLM::Response]
       def perform_standard_request(body)
-        conn = build_connection(base_url: BASE_URL)
-        url = "/#{API_VERSION}/models/#{@model}:generateContent?key=#{@api_key}"
+        conn = build_connection(base_url: BASE_URL, headers: default_headers)
+        url = "/#{API_VERSION}/models/#{@model}:generateContent"
-        response = conn.post(url) do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post(url) do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+          end
         end
         handle_error_response(response) unless response.success?
@@ -145,57 +285,120 @@ module RubyPi
       # Executes a streaming request to the Gemini API, yielding events.
       #
+      # Issue #13: Removed API key from the URL query string. The key is now
+      # sent via the `x-goog-api-key` header (set in default_headers).
+      #
       # @param body [Hash] the request body
       # @yield [event] StreamEvent objects
       # @return [RubyPi::LLM::Response] final aggregated response
       def perform_streaming_request(body, &block)
-        conn = build_connection(base_url: BASE_URL)
-        url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?key=#{@api_key}&alt=sse"
+        conn = build_connection(base_url: BASE_URL, headers: default_headers)
+        url = "/#{API_VERSION}/models/#{@model}:streamGenerateContent?alt=sse"
         accumulated_text = +""
         accumulated_tool_calls = []
         usage_data = {}
-        response = conn.post(url) do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
-        end
-        handle_error_response(response) unless response.success?
-        # Parse SSE events from the response body
-        parse_sse_events(response.body) do |data|
-          candidates = data.dig("candidates") || []
-          candidate = candidates.first
-          next unless candidate
-          parts = candidate.dig("content", "parts") || []
-          parts.each do |part|
-            if part.key?("text")
-              text_chunk = part["text"]
-              accumulated_text << text_chunk
-              block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
-            elsif part.key?("functionCall")
-              fc = part["functionCall"]
-              tool_call = ToolCall.new(
-                id: "gemini_#{accumulated_tool_calls.length}",
-                name: fc["name"],
-                arguments: fc["args"] || {}
-              )
-              accumulated_tool_calls << tool_call
-              block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
+        finish_reason = nil
+        # Buffer for incomplete SSE lines across on_data chunks. Faraday's
+        # on_data callback delivers raw bytes as they arrive from the network,
+        # which may split SSE events mid-line. We accumulate a line buffer and
+        # process complete lines incrementally so that deltas reach the caller
+        # as soon as each SSE event is fully received.
+        sse_buffer = +""
+        response_status = nil
+        error_body = +""
+        response = with_transport_errors do
+          conn.post(url) do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+            # Use Faraday's on_data callback for real incremental streaming.
+          # Without this, Faraday buffers the entire response body before
+          # returning — no deltas reach the caller until the model finishes
+          # generating (fake streaming).
+          req.options.on_data = proc do |chunk, _overall_received_bytes, env|
+            response_status ||= env&.status
+            # If the HTTP status indicates an error, accumulate the body for
+            # the error handler instead of parsing it as SSE events.
+            if response_status && response_status >= 400
+              error_body << chunk
+              next
             end
-          end
-          # Capture usage metadata if present
-          if data.key?("usageMetadata")
-            meta = data["usageMetadata"]
-            usage_data = {
-              prompt_tokens: meta["promptTokenCount"],
-              completion_tokens: meta["candidatesTokenCount"],
-              total_tokens: meta["totalTokenCount"]
-            }
+            sse_buffer << chunk
+            # Process all complete lines in the buffer
+            while (line_end = sse_buffer.index("\n"))
+              line = sse_buffer.slice!(0, line_end + 1).strip
+              next if line.empty?
+              next unless line.start_with?("data: ")
+              data_str = line.sub(/\Adata: /, "")
+              next if data_str == "[DONE]"
+              begin
+                data = JSON.parse(data_str)
+              rescue JSON::ParserError
+                next
+              end
+              # Process this SSE event
+              candidates = data.dig("candidates") || []
+              candidate = candidates.first
+              next unless candidate
+              parts = candidate.dig("content", "parts") || []
+              parts.each do |part|
+                if part.key?("text")
+                  text_chunk = part["text"]
+                  accumulated_text << text_chunk
+                  block.call(StreamEvent.new(type: :text_delta, data: text_chunk))
+                elsif part.key?("functionCall")
+                  fc = part["functionCall"]
+                  tool_call = ToolCall.new(
+                    # Generate a globally-unique ID per tool call. A simple
+                    # length-based counter ("gemini_0", "gemini_1") collides
+                    # across turns since each response restarts numbering at
+                    # 0, breaking any caller that uses ID as a hash key for
+                    # observability or result correlation.
+                    id: "gemini_#{SecureRandom.hex(8)}",
+                    name: fc["name"],
+                    arguments: fc["args"] || {}
+                  )
+                  accumulated_tool_calls << tool_call
+                  block.call(StreamEvent.new(type: :tool_call_delta, data: tool_call.to_h))
+                end
+              end
+              # Parse the actual finish reason from the streaming response
+              # instead of hardcoding "stop". Gemini sends finishReason in
+              # the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
+              if candidate["finishReason"]
+                finish_reason = candidate["finishReason"].downcase
+              end
+              # Capture usage metadata if present
+              if data.key?("usageMetadata")
+                meta = data["usageMetadata"]
+                usage_data = {
+                  prompt_tokens: meta["promptTokenCount"],
+                  completion_tokens: meta["candidatesTokenCount"],
+                  total_tokens: meta["totalTokenCount"]
+                }
+              end
+            end
           end
+          end # conn.post
+        end # with_transport_errors
+        # When on_data is active, the response body was consumed by the
+        # callback. Pass the accumulated error_body so ApiError carries the
+        # full server message instead of an empty body.
+        unless response.success?
+          error_body_str = error_body.empty? ? response.body : error_body
+          handle_error_response(response, override_body: error_body_str)
         end
         # Signal completion
@@ -205,7 +408,7 @@ module RubyPi
           content: accumulated_text.empty? ? nil : accumulated_text,
           tool_calls: accumulated_tool_calls,
           usage: usage_data,
-          finish_reason: "stop"
+          finish_reason: finish_reason || "stop"
         )
       end
@@ -227,7 +430,9 @@ module RubyPi
           elsif part.key?("functionCall")
             fc = part["functionCall"]
             tool_calls << ToolCall.new(
-              id: "gemini_#{tool_calls.length}",
+              # See note in perform_streaming_request: per-response counters
+              # collide across turns, so we generate a globally-unique ID.
+              id: "gemini_#{SecureRandom.hex(8)}",
               name: fc["name"],
               arguments: fc["args"] || {}
             )