RubyGems - ruby-pi - Versions diffs - 0.1.5 → 0.1.8 - Mend

ruby-pi 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +51 -0
data/lib/ruby_pi/agent/core.rb +6 -0
data/lib/ruby_pi/agent/loop.rb +40 -25
data/lib/ruby_pi/agent/state.rb +6 -0
data/lib/ruby_pi/configuration.rb +50 -5
data/lib/ruby_pi/context/compaction.rb +61 -24
data/lib/ruby_pi/llm/anthropic.rb +38 -17
data/lib/ruby_pi/llm/base_provider.rb +72 -1
data/lib/ruby_pi/llm/fallback.rb +30 -9
data/lib/ruby_pi/llm/gemini.rb +136 -37
data/lib/ruby_pi/llm/openai.rb +53 -19
data/lib/ruby_pi/llm/tool_call.rb +2 -0
data/lib/ruby_pi/tools/definition.rb +39 -4
data/lib/ruby_pi/tools/executor.rb +24 -7
data/lib/ruby_pi/tools/schema.rb +10 -0
data/lib/ruby_pi/version.rb +1 -1
data/lib/ruby_pi.rb +7 -0
metadata +15 -1

data/lib/ruby_pi/llm/fallback.rb CHANGED Viewed

@@ -16,11 +16,14 @@ module RubyPi
     # Authentication errors are NOT retried with the fallback since they
     # indicate a configuration problem rather than a transient failure.
     #
-    # Issue #23: When streaming, the Fallback now buffers deltas from the
-    # primary provider. If the primary fails mid-stream, the buffered deltas
-    # are discarded and the fallback provider streams fresh from the start.
-    # This prevents the consumer from seeing partial output from the primary
-    # concatenated with the complete output from the fallback.
+    # Issue #23 + Issue #12: When streaming, events flow from the primary
+    # provider directly to the consumer in real time (no buffering), preserving
+    # the streaming UX on the happy path. If the primary fails mid-stream, a
+    # :fallback_start StreamEvent is emitted before the fallback takes over, so
+    # the consumer can discard any partial output already rendered from the
+    # failed primary. (The agent loop translates :fallback_start into a
+    # :provider_fallback event; raw Fallback consumers should handle
+    # :fallback_start themselves.)
     #
     # @example Setting up a fallback chain
     #   primary  = RubyPi::LLM.model(:gemini, "gemini-2.0-flash")
@@ -146,6 +149,19 @@ module RubyPi
       # @yield [event] the consumer's streaming block
       # @return [RubyPi::LLM::Response]
       def perform_complete_with_streaming_fallback(messages:, tools:, &block)
+        # Count the characters of text already delivered to the consumer from
+        # the primary. If the primary fails mid-stream AFTER yielding text,
+        # the fallback streams a complete fresh response — a consumer that
+        # merely appends deltas would render the primary's partial text
+        # followed by the full fallback text. The :fallback_start payload
+        # carries partial_output/partial_chars so consumers can deterministically
+        # truncate what they already rendered.
+        partial_chars = 0
+        counting_block = proc do |event|
+          partial_chars += event.data.to_s.length if event.text_delta?
+          block.call(event)
+        end
         begin
           # Stream primary events directly to the consumer for real-time UX.
           # No buffering — tokens appear immediately as they arrive.
@@ -153,7 +169,7 @@ module RubyPi
             messages: messages,
             tools: tools,
             stream: true,
-            &block
+            &counting_block
           )
           response
@@ -164,12 +180,17 @@ module RubyPi
           log_fallback(e)
           # Signal the consumer that the primary failed mid-stream and a
-          # fallback provider is taking over. Consumers should use this event
-          # to clear any partial output from the failed primary.
+          # fallback provider is taking over. Consumers MUST use this event
+          # to clear any partial output from the failed primary:
+          #   partial_output — true when the primary yielded any text deltas
+          #   partial_chars  — how many characters were yielded (truncate by
+          #                    this amount if appending to a shared buffer)
           block.call(StreamEvent.new(type: :fallback_start, data: {
             failed_provider: @primary.provider_name,
             error: e.message,
-            fallback_provider: @fallback.provider_name
+            fallback_provider: @fallback.provider_name,
+            partial_output: partial_chars.positive?,
+            partial_chars: partial_chars
           }))
           # Stream directly from the fallback to the consumer's block.

data/lib/ruby_pi/llm/gemini.rb CHANGED Viewed

@@ -6,6 +6,9 @@
 # the Gemini REST API for both synchronous and streaming completions, including
 # tool/function calling support.
+require "json"
+require "securerandom"
 module RubyPi
   module LLM
     # Google Gemini provider implementation. Communicates with the Gemini
@@ -115,44 +118,116 @@ module RubyPi
       # Converts a normalized message hash to Gemini's content format.
       #
+      # Critically, an assistant message that carries `tool_calls` (set by
+      # the agent loop after a tool-using turn) must be rendered with one
+      # `functionCall` part per tool call. Without those parts, Gemini
+      # rejects any subsequent `functionResponse` on the next turn because
+      # the response has nothing to correlate against. Earlier versions
+      # dropped `tool_calls` here, breaking multi-turn tool use.
+      #
       # @param message [Hash] a message with :role and :content keys
       # @return [Hash] Gemini-formatted content object
       def format_message(message)
         role = message[:role]&.to_s || message["role"]&.to_s || "user"
-        content = message[:content] || message["content"] || ""
-        # Gemini uses "user" and "model" roles. Map tool results to "user"
-        # role with a functionResponse part when we have the metadata, or
-        # plain text otherwise. System messages should have been extracted
-        # by build_request_body before reaching this method.
-        gemini_role = case role
-                      when "assistant" then "model"
-                      when "tool"      then "user"
-                      else                  role
-                      end
-        # Tool-role messages carry function call results. When tool_call_id
-        # and name are present, send as a Gemini functionResponse so the
-        # model can correlate the result with its earlier functionCall.
+        content = message[:content] || message["content"]
+        # Tool-role messages carry function-call results. When the tool name
+        # is present, send as a Gemini functionResponse so the model can
+        # correlate the result with its earlier functionCall. System messages
+        # should have been extracted by build_request_body before reaching
+        # this method.
         tool_name = message[:name] || message["name"]
         if role == "tool" && tool_name
+          # Gemini's functionResponse expects a structured `response` object.
+          # Tool results are pre-serialized by the loop as either a JSON
+          # string (success) or an "Error: ..." string (failure). Try to
+          # parse JSON so the model receives structured data; fall back to
+          # wrapping the raw string under :result for plain-text content.
+          response_payload = parse_tool_response(content)
           return {
             role: "user",
             parts: [{
               functionResponse: {
                 name: tool_name.to_s,
-                response: { result: content.to_s }
+                response: response_payload
               }
             }]
           }
         end
+        # Assistant messages may carry `tool_calls` from a prior turn. Each
+        # one must be emitted as a `functionCall` part on the model turn so
+        # that the next turn's `functionResponse` has something to bind to.
+        if role == "assistant"
+          parts = []
+          text = content.to_s
+          parts << { text: text } unless text.empty?
+          tool_calls = message[:tool_calls] || message["tool_calls"]
+          if tool_calls.is_a?(Array)
+            tool_calls.each do |tc|
+              tc_name = (tc[:name] || tc["name"]).to_s
+              tc_args = tc[:arguments] || tc["arguments"] || {}
+              tc_args = parse_tool_arguments(tc_args)
+              parts << { functionCall: { name: tc_name, args: tc_args } }
+            end
+          end
+          # Gemini rejects an empty parts array on a model turn. If the
+          # assistant truly had no content and no tool_calls, fall back to
+          # an empty text part.
+          parts << { text: "" } if parts.empty?
+          return { role: "model", parts: parts }
+        end
         {
-          role: gemini_role,
+          role: role,
           parts: [{ text: content.to_s }]
         }
       end
+      # Best-effort parse of a tool-result string into a structured object
+      # for Gemini's `functionResponse.response`. JSON content is returned
+      # as-is (wrapped in a hash if it parsed to a non-hash); non-JSON
+      # content (e.g., "Error: ...") is wrapped under :result.
+      #
+      # @param content [String, Hash, nil]
+      # @return [Hash]
+      def parse_tool_response(content)
+        return { result: "" } if content.nil?
+        return content if content.is_a?(Hash)
+        str = content.to_s
+        return { result: str } if str.strip.empty?
+        begin
+          parsed = JSON.parse(str)
+          parsed.is_a?(Hash) ? parsed : { result: parsed }
+        rescue JSON::ParserError
+          { result: str }
+        end
+      end
+      # Coerce a tool_call.arguments value (Hash, JSON string, or other)
+      # into a Hash suitable for Gemini's `functionCall.args`. Malformed
+      # or non-Hash values become an empty hash so the request is still
+      # well-formed.
+      #
+      # @param args [Hash, String, nil]
+      # @return [Hash]
+      def parse_tool_arguments(args)
+        return args if args.is_a?(Hash)
+        return {} unless args.is_a?(String) && !args.strip.empty?
+        begin
+          parsed = JSON.parse(args)
+          parsed.is_a?(Hash) ? parsed : {}
+        rescue JSON::ParserError
+          {}
+        end
+      end
       # Converts a tool definition to Gemini's function declaration format.
       # Accepts either a RubyPi::Tools::Definition or a plain Hash.
       #
@@ -198,9 +273,11 @@ module RubyPi
         conn = build_connection(base_url: BASE_URL, headers: default_headers)
         url = "/#{API_VERSION}/models/#{@model}:generateContent"
-        response = conn.post(url) do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post(url) do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+          end
         end
         handle_error_response(response) unless response.success?
@@ -229,15 +306,21 @@ module RubyPi
         # which may split SSE events mid-line. We accumulate a line buffer and
         # process complete lines incrementally so that deltas reach the caller
         # as soon as each SSE event is fully received.
-        sse_buffer = +""
+        # BINARY buffer: chunks arrive as ASCII-8BIT and may end mid-way
+        # through a multi-byte UTF-8 character; appending such a chunk to a
+        # UTF-8 buffer holding non-ASCII text raises
+        # Encoding::CompatibilityError. Complete lines are re-encoded to
+        # UTF-8 (and scrubbed) before parsing.
+        sse_buffer = (+"").force_encoding(Encoding::BINARY)
         response_status = nil
-        error_body = +""
+        error_body = (+"").force_encoding(Encoding::BINARY)
-        response = conn.post(url) do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post(url) do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
-          # Use Faraday's on_data callback for real incremental streaming.
+            # Use Faraday's on_data callback for real incremental streaming.
           # Without this, Faraday buffers the entire response body before
           # returning — no deltas reach the caller until the model finishes
           # generating (fake streaming).
@@ -247,14 +330,17 @@ module RubyPi
             # If the HTTP status indicates an error, accumulate the body for
             # the error handler instead of parsing it as SSE events.
             if response_status && response_status >= 400
-              error_body << chunk
+              error_body << chunk.b
               next
             end
-            sse_buffer << chunk
-            # Process all complete lines in the buffer
+            sse_buffer << chunk.b
+            # Process all complete lines in the buffer. A complete line holds
+            # complete UTF-8 sequences (multi-byte characters split across
+            # chunks are repaired by the buffering), so re-encode it to UTF-8
+            # here; scrub guards against a server sending invalid bytes.
             while (line_end = sse_buffer.index("\n"))
-              line = sse_buffer.slice!(0, line_end + 1).strip
+              line = sse_buffer.slice!(0, line_end + 1).force_encoding(Encoding::UTF_8).scrub.strip
               next if line.empty?
               next unless line.start_with?("data: ")
@@ -281,7 +367,12 @@ module RubyPi
                 elsif part.key?("functionCall")
                   fc = part["functionCall"]
                   tool_call = ToolCall.new(
-                    id: "gemini_#{accumulated_tool_calls.length}",
+                    # Generate a globally-unique ID per tool call. A simple
+                    # length-based counter ("gemini_0", "gemini_1") collides
+                    # across turns since each response restarts numbering at
+                    # 0, breaking any caller that uses ID as a hash key for
+                    # observability or result correlation.
+                    id: "gemini_#{SecureRandom.hex(8)}",
                     name: fc["name"],
                     arguments: fc["args"] || {}
                   )
@@ -293,8 +384,11 @@ module RubyPi
               # Parse the actual finish reason from the streaming response
               # instead of hardcoding "stop". Gemini sends finishReason in
               # the candidate object (e.g., "STOP", "MAX_TOKENS", "SAFETY").
+              # Coerce via to_s before downcase so a non-String payload can
+              # never raise NoMethodError mid-stream (mirrors the &.to_s in
+              # the non-streaming parse path).
               if candidate["finishReason"]
-                finish_reason = candidate["finishReason"].downcase
+                finish_reason = candidate["finishReason"].to_s.downcase
               end
               # Capture usage metadata if present
@@ -308,13 +402,14 @@ module RubyPi
               end
             end
           end
-        end
+          end # conn.post
+        end # with_transport_errors
         # When on_data is active, the response body was consumed by the
         # callback. Pass the accumulated error_body so ApiError carries the
         # full server message instead of an empty body.
         unless response.success?
-          error_body_str = error_body.empty? ? response.body : error_body
+          error_body_str = error_body.empty? ? response.body : error_body.force_encoding(Encoding::UTF_8).scrub
           handle_error_response(response, override_body: error_body_str)
         end
@@ -347,7 +442,9 @@ module RubyPi
           elsif part.key?("functionCall")
             fc = part["functionCall"]
             tool_calls << ToolCall.new(
-              id: "gemini_#{tool_calls.length}",
+              # See note in perform_streaming_request: per-response counters
+              # collide across turns, so we generate a globally-unique ID.
+              id: "gemini_#{SecureRandom.hex(8)}",
               name: fc["name"],
               arguments: fc["args"] || {}
             )
@@ -365,8 +462,10 @@ module RubyPi
           }
         end
-        # Map Gemini finish reason to normalized string
-        finish_reason = candidate["finishReason"]&.downcase
+        # Map Gemini finish reason to normalized string. to_s guards against
+        # a non-String payload (mirrors the streaming path); &. keeps a
+        # missing finishReason as nil.
+        finish_reason = candidate["finishReason"]&.to_s&.downcase
         Response.new(
           content: content,

data/lib/ruby_pi/llm/openai.rb CHANGED Viewed

@@ -6,6 +6,8 @@
 # OpenAI Chat Completions API for both synchronous and streaming completions,
 # including function/tool calling support.
+require "json"
 module RubyPi
   module LLM
     # OpenAI provider implementation. Communicates with the OpenAI Chat
@@ -183,11 +185,31 @@ module RubyPi
             tc_name = tc[:name] || tc["name"]
             tc_args = tc[:arguments] || tc["arguments"] || {}
-            # OpenAI requires arguments as a JSON string
-            args_string = if tc_args.is_a?(String)
-                            tc_args
-                          elsif tc_args.is_a?(Hash)
+            # OpenAI requires arguments to be a JSON-encoded string. We
+            # validate up-front so a malformed string fails fast with a
+            # typed error here rather than as an opaque HTTP 400 from
+            # OpenAI. This mirrors Anthropic's input validation in
+            # build_assistant_message.
+            args_string = case tc_args
+                          when Hash
                             JSON.generate(tc_args)
+                          when String
+                            stripped = tc_args.strip
+                            if stripped.empty?
+                              "{}"
+                            else
+                              begin
+                                JSON.parse(tc_args)
+                                tc_args
+                              rescue JSON::ParserError => e
+                                raise RubyPi::ProviderError.new(
+                                  "Invalid JSON in assistant tool_call.arguments " \
+                                  "for tool '#{tc_name || "unknown"}': #{e.message} " \
+                                  "(raw: #{tc_args.inspect})",
+                                  provider: :openai
+                                )
+                              end
+                            end
                           else
                             "{}"
                           end
@@ -261,9 +283,11 @@ module RubyPi
           headers: default_headers
         )
-        response = conn.post("/v1/chat/completions") do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post("/v1/chat/completions") do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+          end
         end
         handle_error_response(response) unless response.success?
@@ -296,15 +320,21 @@ module RubyPi
         # which may split SSE events mid-line. We accumulate a line buffer and
         # process complete lines incrementally so that deltas reach the caller
         # as soon as each SSE event is fully received.
-        sse_buffer = +""
+        # BINARY buffer: chunks arrive as ASCII-8BIT and may end mid-way
+        # through a multi-byte UTF-8 character; appending such a chunk to a
+        # UTF-8 buffer holding non-ASCII text raises
+        # Encoding::CompatibilityError. Complete lines are re-encoded to
+        # UTF-8 (and scrubbed) before parsing.
+        sse_buffer = (+"").force_encoding(Encoding::BINARY)
         response_status = nil
-        error_body = +""
+        error_body = (+"").force_encoding(Encoding::BINARY)
-        response = conn.post("/v1/chat/completions") do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post("/v1/chat/completions") do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
-          # Use Faraday's on_data callback for real incremental streaming.
+            # Use Faraday's on_data callback for real incremental streaming.
           # Without this, Faraday buffers the entire response body before
           # returning — no deltas reach the caller until the model finishes
           # generating (fake streaming).
@@ -314,14 +344,17 @@ module RubyPi
             # If the HTTP status indicates an error, accumulate the body for
             # the error handler instead of parsing it as SSE events.
             if response_status && response_status >= 400
-              error_body << chunk
+              error_body << chunk.b
               next
             end
-            sse_buffer << chunk
-            # Process all complete lines in the buffer
+            sse_buffer << chunk.b
+            # Process all complete lines in the buffer. A complete line holds
+            # complete UTF-8 sequences (multi-byte characters split across
+            # chunks are repaired by the buffering), so re-encode it to UTF-8
+            # here; scrub guards against a server sending invalid bytes.
             while (line_end = sse_buffer.index("\n"))
-              line = sse_buffer.slice!(0, line_end + 1).strip
+              line = sse_buffer.slice!(0, line_end + 1).force_encoding(Encoding::UTF_8).scrub.strip
               next if line.empty?
               next unless line.start_with?("data: ")
@@ -389,13 +422,14 @@ module RubyPi
               end
             end
           end
-        end
+          end # conn.post
+        end # with_transport_errors
         # When on_data is active, the response body was consumed by the
         # callback. Pass the accumulated error_body so ApiError carries the
         # full server message instead of an empty body.
         unless response.success?
-          error_body_str = error_body.empty? ? response.body : error_body
+          error_body_str = error_body.empty? ? response.body : error_body.force_encoding(Encoding::UTF_8).scrub
           handle_error_response(response, override_body: error_body_str)
         end

data/lib/ruby_pi/llm/tool_call.rb CHANGED Viewed

@@ -6,6 +6,8 @@
 # decides to invoke a tool, it returns one or more ToolCall objects describing
 # which function to call and with what arguments.
+require "json"
 module RubyPi
   module LLM
     # A tool call extracted from an LLM response. Contains the unique call ID,

data/lib/ruby_pi/tools/definition.rb CHANGED Viewed

@@ -37,16 +37,32 @@ module RubyPi
       # @return [Hash] A JSON Schema hash describing the tool's parameters.
       attr_reader :parameters
+      # Tool names must satisfy the strictest provider constraint (Anthropic's
+      # ^[a-zA-Z0-9_-]{1,64}$). Without this guard, a name like "send.email"
+      # registers fine and then 400s on every API request with an opaque
+      # server-side validation error that doesn't point back to the tool.
+      NAME_FORMAT = /\A[a-zA-Z0-9_-]{1,64}\z/
       # Creates a new tool definition.
       #
-      # @param name [String, Symbol] Unique identifier for the tool.
+      # @param name [String, Symbol] Unique identifier for the tool. Must match
+      #   NAME_FORMAT (letters, digits, underscore, hyphen; max 64 chars).
       # @param description [String] What the tool does (shown to the LLM).
       # @param category [Symbol, nil] Optional grouping category.
       # @param parameters [Hash] JSON Schema hash for the tool's input parameters.
-      # @yield [Hash] Block that implements the tool logic. Receives a hash of arguments.
-      # @raise [ArgumentError] If name or description is missing, or no block given.
+      # @yield [Hash] Block that implements the tool logic. Receives a hash of
+      #   symbol-keyed arguments, or keyword arguments if the block declares
+      #   keyword parameters (see #call).
+      # @raise [ArgumentError] If name is missing or violates NAME_FORMAT,
+      #   description is missing, or no block given.
       def initialize(name:, description:, category: nil, parameters: {}, &block)
         raise ArgumentError, "Tool name is required" if name.nil? || name.to_s.strip.empty?
+        unless name.to_s.match?(NAME_FORMAT)
+          raise ArgumentError,
+                "Tool name #{name.to_s.inspect} is invalid — provider APIs require " \
+                "names matching #{NAME_FORMAT.inspect} (letters, digits, underscore, " \
+                "hyphen; 1-64 characters)"
+        end
         raise ArgumentError, "Tool description is required" if description.nil? || description.strip.empty?
         raise ArgumentError, "Tool implementation block is required" unless block_given?
@@ -55,14 +71,33 @@ module RubyPi
         @category = category&.to_sym
         @parameters = parameters
         @implementation = block
+        # On Ruby 3.x a positional Hash is never auto-splatted to keywords, so
+        # a block written `{ |content:, platform:| ... }` — the natural style
+        # given named schema parameters — would fail every call with
+        # "missing keyword". Detect keyword parameters once here and splat in
+        # #call accordingly.
+        @expects_keywords = block.parameters.any? { |type, _| %i[key keyreq keyrest].include?(type) }
       end
       # Invokes the tool with the given arguments.
       #
+      # Blocks may be written either style:
+      #   { |args| args[:content] }            # single positional Hash
+      #   { |content:, platform: "x"| ... }    # keyword parameters
+      #
+      # When the block declares keyword parameters, the arguments hash is
+      # splatted to keywords. Note that a keyword-style block without **rest
+      # raises ArgumentError on unexpected keys — strict by design, since the
+      # keys come from the LLM.
+      #
       # @param args [Hash] The arguments to pass to the tool implementation.
       # @return [Object] Whatever the implementation block returns.
       def call(args = {})
-        @implementation.call(args)
+        if @expects_keywords
+          @implementation.call(**args)
+        else
+          @implementation.call(args)
+        end
       end
       # Converts this tool definition to Google Gemini function declaration format.

data/lib/ruby_pi/tools/executor.rb CHANGED Viewed

@@ -115,7 +115,12 @@ module RubyPi
         end
         # Collect results, respecting the configured timeout for each future.
-        futures.map do |future|
+        # Zip each future with its originating call so failure Results carry
+        # the real tool name — with several tools timing out in parallel,
+        # "unknown" Results are indistinguishable in logs and extension events.
+        calls.zip(futures).map do |call, future|
+          tool_name = (call[:name] || call["name"]).to_s
           # Issue #10: Wait for the future to complete, then check its state
           # explicitly. Future#value returns nil both on timeout AND when the
           # block legitimately returned nil, so we cannot use || to distinguish.
@@ -128,13 +133,16 @@ module RubyPi
             else
               # Future was rejected (raised an exception within the block).
               # This shouldn't normally happen since execute_single rescues
-              # internally, but handle it defensively.
+              # internally, but handle it defensively. The actual run time is
+              # unknown here (the future failed at some point before the wait
+              # elapsed), so report 0.0 rather than a misleading full-timeout
+              # duration for what may have been an instant failure.
               error = future.reason
               Result.new(
-                name: "unknown",
+                name: tool_name,
                 success: false,
                 error: "#{error.class}: #{error.message}",
-                duration_ms: @timeout * 1000.0
+                duration_ms: 0.0
               )
             end
           else
@@ -147,9 +155,9 @@ module RubyPi
             future.cancel if future.respond_to?(:cancel)
             Result.new(
-              name: "unknown",
+              name: tool_name,
               success: false,
-              error: "Tool execution timed out after #{@timeout}s",
+              error: "Tool '#{tool_name}' timed out after #{@timeout}s",
               duration_ms: @timeout * 1000.0
             )
           end
@@ -195,9 +203,18 @@ module RubyPi
         error = nil
         worker = Thread.new do
+          # Don't spam stderr from the rescued worker thread.
+          Thread.current.report_on_exception = false
           begin
             value = tool.call(arguments)
-          rescue StandardError => e
+          rescue Exception => e # rubocop:disable Lint/RescueException
+            # Rescue the full Exception hierarchy (not just StandardError).
+            # If a tool block raises Interrupt, SystemExit, or any other
+            # non-StandardError, rescuing only StandardError leaves both
+            # `value` and `error` nil; the join then reports a successful
+            # nil result — a panic in a tool silently becomes "returned nil".
+            # Capture the failure here; the main thread surfaces it as a
+            # failed Result. The worker thread itself does not propagate.
             error = e
           end
         end

data/lib/ruby_pi/tools/schema.rb CHANGED Viewed

@@ -13,6 +13,16 @@
 # flag consumed by `.object` to populate the top-level "required" array.
 # It is stripped from the property's own schema hash before inclusion.
 #
+# IMPORTANT: Schemas are LLM-facing hints, NOT runtime input validation.
+# Nothing in the execution pipeline validates the model's arguments against
+# the schema before invoking the tool block: `required`, `enum`, `minimum`,
+# and type declarations constrain what the model is *asked* to produce, but a
+# misbehaving model can still omit required fields, send extra keys, or pass
+# a String where an Integer is declared — no coercion is performed. Tool
+# blocks should treat their arguments as untrusted input and validate or
+# coerce what they depend on. (This is deliberate, per the anti-framework
+# philosophy: validation policy belongs to the tool, not the harness.)
+#
 # Usage:
 #   schema = RubyPi::Schema.object(
 #     name: RubyPi::Schema.string("User's name", required: true),

data/lib/ruby_pi/version.rb CHANGED Viewed

@@ -7,5 +7,5 @@
 module RubyPi
   # The current version of the RubyPi gem, following Semantic Versioning.
-  VERSION = "0.1.5"
+  VERSION = "0.1.8"
 end

data/lib/ruby_pi.rb CHANGED Viewed

@@ -82,6 +82,13 @@ module RubyPi
     end
   end
+  # Eagerly initialize the global configuration at load time. The lazy
+  # `@configuration ||= ...` in .configuration is not synchronized; two
+  # threads hitting it concurrently on first access could each construct a
+  # Configuration, with one silently discarded. Initializing here (requires
+  # run single-threaded) removes the race without adding a mutex to every read.
+  @configuration = Configuration.new
   # Namespace for large language model providers and related abstractions.
   module LLM
     class << self