RubyGems - ruby-pi - Versions diffs - 0.1.3 → 0.1.6 - Mend

ruby-pi 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +62 -0
data/README.md +77 -29
data/lib/ruby_pi/agent/core.rb +59 -4
data/lib/ruby_pi/agent/events.rb +17 -3
data/lib/ruby_pi/agent/loop.rb +103 -18
data/lib/ruby_pi/agent/result.rb +46 -7
data/lib/ruby_pi/agent/state.rb +12 -0
data/lib/ruby_pi/configuration.rb +28 -7
data/lib/ruby_pi/context/compaction.rb +54 -4
data/lib/ruby_pi/context/transform.rb +67 -3
data/lib/ruby_pi/errors.rb +19 -1
data/lib/ruby_pi/llm/anthropic.rb +243 -67
data/lib/ruby_pi/llm/base_provider.rb +84 -47
data/lib/ruby_pi/llm/fallback.rb +106 -1
data/lib/ruby_pi/llm/gemini.rb +258 -53
data/lib/ruby_pi/llm/openai.rb +208 -53
data/lib/ruby_pi/llm/stream_event.rb +13 -3
data/lib/ruby_pi/llm/tool_call.rb +26 -3
data/lib/ruby_pi/tools/executor.rb +139 -21
data/lib/ruby_pi/tools/registry.rb +26 -16
data/lib/ruby_pi/version.rb +1 -1
data/lib/ruby_pi.rb +2 -1
metadata +6 -40

data/lib/ruby_pi/llm/openai.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module RubyPi
       # @param options [Hash] additional options passed to BaseProvider
       def initialize(model: nil, api_key: nil, **options)
         super(**options)
-        config = RubyPi.configuration
+        config = @config
         @model = model || config.default_openai_model
         @api_key = api_key || config.openai_api_key
       end
@@ -84,6 +84,9 @@ module RubyPi
       # Structured content (Arrays, Hashes) is preserved for multimodal content
       # blocks (e.g., vision messages with image_url content parts).
       #
+      # Issue #21: When streaming, includes `stream_options: { include_usage: true }`
+      # so OpenAI returns usage data in the final SSE chunk.
+      #
       # @param messages [Array<Hash>] conversation messages
       # @param tools [Array<Hash>] tool definitions
       # @param stream [Boolean] whether streaming is enabled
@@ -94,7 +97,12 @@ module RubyPi
           messages: messages.map { |msg| format_message(msg) }
         }
-        body[:stream] = true if stream
+        if stream
+          body[:stream] = true
+          # Issue #21: Request usage data in streaming mode. OpenAI supports
+          # returning token usage in the final SSE chunk when this option is set.
+          body[:stream_options] = { include_usage: true }
+        end
         unless tools.empty?
           body[:tools] = tools.map { |t| format_tool(t) }
@@ -121,9 +129,22 @@ module RubyPi
           # OpenAI accepts role "tool" with a required tool_call_id field
           # to match this result back to the assistant's tool_call.
           tool_call_id = message[:tool_call_id] || message["tool_call_id"]
+          # Fail fast with a descriptive error instead of sending "unknown" as
+          # the tool_call_id. OpenAI requires tool_call_id to match a preceding
+          # tool_calls entry; sending "unknown" causes an opaque HTTP 400.
+          if tool_call_id.nil? || tool_call_id.to_s.strip.empty?
+            raise RubyPi::ProviderError.new(
+              "Missing tool_call_id in tool result message. OpenAI requires " \
+              "tool_call_id to match a preceding tool_calls entry. Ensure every " \
+              "tool result message includes a valid :tool_call_id.",
+              provider: :openai
+            )
+          end
           {
             role: "tool",
-            tool_call_id: tool_call_id || "unknown",
+            tool_call_id: tool_call_id,
             content: format_content(content)
           }
@@ -162,20 +183,50 @@ module RubyPi
             tc_name = tc[:name] || tc["name"]
             tc_args = tc[:arguments] || tc["arguments"] || {}
-            # OpenAI requires arguments as a JSON string
-            args_string = if tc_args.is_a?(String)
-                            tc_args
-                          elsif tc_args.is_a?(Hash)
+            # OpenAI requires arguments to be a JSON-encoded string. We
+            # validate up-front so a malformed string fails fast with a
+            # typed error here rather than as an opaque HTTP 400 from
+            # OpenAI. This mirrors Anthropic's input validation in
+            # build_assistant_message.
+            args_string = case tc_args
+                          when Hash
                             JSON.generate(tc_args)
+                          when String
+                            stripped = tc_args.strip
+                            if stripped.empty?
+                              "{}"
+                            else
+                              begin
+                                JSON.parse(tc_args)
+                                tc_args
+                              rescue JSON::ParserError => e
+                                raise RubyPi::ProviderError.new(
+                                  "Invalid JSON in assistant tool_call.arguments " \
+                                  "for tool '#{tc_name || "unknown"}': #{e.message} " \
+                                  "(raw: #{tc_args.inspect})",
+                                  provider: :openai
+                                )
+                              end
+                            end
                           else
                             "{}"
                           end
+            # Fail fast if tool call id is missing — OpenAI requires it for
+            # conversation continuity and tool result matching.
+            if tc_id.nil? || tc_id.to_s.strip.empty?
+              raise RubyPi::ProviderError.new(
+                "Missing id in assistant tool_call. OpenAI requires each " \
+                "tool_call to have a valid id for result matching.",
+                provider: :openai
+              )
+            end
             {
-              id: tc_id || "unknown",
+              id: tc_id,
               type: "function",
               function: {
-                name: tc_name || "unknown",
+                name: tc_name || "unknown_function",
                 arguments: args_string
               }
             }
@@ -230,9 +281,11 @@ module RubyPi
           headers: default_headers
         )
-        response = conn.post("/v1/chat/completions") do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
+        response = with_transport_errors do
+          conn.post("/v1/chat/completions") do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+          end
         end
         handle_error_response(response) unless response.success?
@@ -241,6 +294,10 @@ module RubyPi
       # Executes a streaming request to the OpenAI API, yielding events.
       #
+      # Issue #21: Parses usage data from the final SSE chunk (when
+      # stream_options: { include_usage: true } is set in the request).
+      # The final chunk contains the aggregated token usage.
+      #
       # @param body [Hash] the request body
       # @yield [event] StreamEvent objects
       # @return [RubyPi::LLM::Response] final aggregated response
@@ -253,62 +310,126 @@ module RubyPi
         accumulated_text = +""
         tool_call_accumulators = {}
         finish_reason = nil
+        # Issue #21: Accumulate usage data from the final SSE chunk
+        streaming_usage = {}
+        # Buffer for incomplete SSE lines across on_data chunks. Faraday's
+        # on_data callback delivers raw bytes as they arrive from the network,
+        # which may split SSE events mid-line. We accumulate a line buffer and
+        # process complete lines incrementally so that deltas reach the caller
+        # as soon as each SSE event is fully received.
+        sse_buffer = +""
+        response_status = nil
+        error_body = +""
+        response = with_transport_errors do
+          conn.post("/v1/chat/completions") do |req|
+            req.headers["Content-Type"] = "application/json"
+            req.body = JSON.generate(body)
+            # Use Faraday's on_data callback for real incremental streaming.
+          # Without this, Faraday buffers the entire response body before
+          # returning — no deltas reach the caller until the model finishes
+          # generating (fake streaming).
+          req.options.on_data = proc do |chunk, _overall_received_bytes, env|
+            response_status ||= env&.status
+            # If the HTTP status indicates an error, accumulate the body for
+            # the error handler instead of parsing it as SSE events.
+            if response_status && response_status >= 400
+              error_body << chunk
+              next
+            end
-        response = conn.post("/v1/chat/completions") do |req|
-          req.headers["Content-Type"] = "application/json"
-          req.body = JSON.generate(body)
-        end
+            sse_buffer << chunk
+            # Process all complete lines in the buffer
+            while (line_end = sse_buffer.index("\n"))
+              line = sse_buffer.slice!(0, line_end + 1).strip
+              next if line.empty?
+              next unless line.start_with?("data: ")
-        handle_error_response(response) unless response.success?
+              data_str = line.sub(/\Adata: /, "")
+              next if data_str == "[DONE]"
-        # Parse SSE events from the response body
-        parse_sse_events(response.body) do |data|
-          choices = data["choices"] || []
-          choice = choices.first
-          next unless choice
+              begin
+                data = JSON.parse(data_str)
+              rescue JSON::ParserError
+                next
+              end
-          delta = choice["delta"] || {}
-          finish_reason = choice["finish_reason"] if choice["finish_reason"]
+              # Issue #21: Capture usage data from the final SSE chunk.
+              # OpenAI sends usage in a dedicated chunk when include_usage is true.
+              if data.key?("usage") && data["usage"]
+                usage_info = data["usage"]
+                streaming_usage = {
+                  prompt_tokens: usage_info["prompt_tokens"],
+                  completion_tokens: usage_info["completion_tokens"],
+                  total_tokens: usage_info["total_tokens"]
+                }
+              end
-          # Handle text content deltas
-          if delta.key?("content") && delta["content"]
-            text = delta["content"]
-            accumulated_text << text
-            block.call(StreamEvent.new(type: :text_delta, data: text))
-          end
+              # Process this SSE event
+              choices = data["choices"] || []
+              choice = choices.first
+              next unless choice
-          # Handle tool call deltas
-          if delta.key?("tool_calls")
-            delta["tool_calls"].each do |tc_delta|
-              index = tc_delta["index"] || 0
+              delta = choice["delta"] || {}
+              finish_reason = choice["finish_reason"] if choice["finish_reason"]
-              # Initialize accumulator for this tool call
-              tool_call_accumulators[index] ||= { id: nil, name: +"", arguments: +"" }
-              acc = tool_call_accumulators[index]
+              # Handle text content deltas
+              if delta.key?("content") && delta["content"]
+                text = delta["content"]
+                accumulated_text << text
+                block.call(StreamEvent.new(type: :text_delta, data: text))
+              end
-              acc[:id] = tc_delta["id"] if tc_delta["id"]
+              # Handle tool call deltas
+              if delta.key?("tool_calls")
+                delta["tool_calls"].each do |tc_delta|
+                  index = tc_delta["index"] || 0
-              if tc_delta.dig("function", "name")
-                acc[:name] << tc_delta["function"]["name"]
-              end
+                  # Initialize accumulator for this tool call
+                  tool_call_accumulators[index] ||= { id: nil, name: +"", arguments: +"" }
+                  acc = tool_call_accumulators[index]
-              if tc_delta.dig("function", "arguments")
-                acc[:arguments] << tc_delta["function"]["arguments"]
-              end
+                  acc[:id] = tc_delta["id"] if tc_delta["id"]
-              block.call(StreamEvent.new(type: :tool_call_delta, data: {
-                index: index,
-                id: acc[:id],
-                name: acc[:name],
-                arguments_fragment: tc_delta.dig("function", "arguments") || ""
-              }))
+                  if tc_delta.dig("function", "name")
+                    acc[:name] << tc_delta["function"]["name"]
+                  end
+                  if tc_delta.dig("function", "arguments")
+                    acc[:arguments] << tc_delta["function"]["arguments"]
+                  end
+                  block.call(StreamEvent.new(type: :tool_call_delta, data: {
+                    index: index,
+                    id: acc[:id],
+                    name: acc[:name],
+                    arguments_fragment: tc_delta.dig("function", "arguments") || ""
+                  }))
+                end
+              end
             end
           end
+          end # conn.post
+        end # with_transport_errors
+        # When on_data is active, the response body was consumed by the
+        # callback. Pass the accumulated error_body so ApiError carries the
+        # full server message instead of an empty body.
+        unless response.success?
+          error_body_str = error_body.empty? ? response.body : error_body
+          handle_error_response(response, override_body: error_body_str)
         end
         # Build final tool calls from accumulators
+        # Issue #12: Guard JSON.parse against empty strings. An empty string
+        # is truthy in Ruby, so the previous `empty? ? {} : JSON.parse(...)` check
+        # was correct for empty strings, but we also guard against malformed JSON
+        # from truncated streams.
         tool_calls = tool_call_accumulators.sort_by { |k, _| k }.map do |_, acc|
-          arguments = acc[:arguments].empty? ? {} : JSON.parse(acc[:arguments])
+          arguments = safe_parse_arguments(acc[:arguments])
           ToolCall.new(id: acc[:id], name: acc[:name], arguments: arguments)
         end
@@ -318,7 +439,7 @@ module RubyPi
         Response.new(
           content: accumulated_text.empty? ? nil : accumulated_text,
           tool_calls: tool_calls,
-          usage: {},
+          usage: streaming_usage,
           finish_reason: normalize_finish_reason(finish_reason)
         )
       end
@@ -334,6 +455,11 @@ module RubyPi
       # Parses an OpenAI Chat Completions response into a normalized Response.
       #
+      # Issue #12: Guards JSON.parse(func["arguments"]) against empty strings.
+      # An empty string is truthy in Ruby but causes JSON::ParserError. We now
+      # check that arguments is a non-empty string before parsing, and rescue
+      # JSON::ParserError to wrap in a ProviderError.
+      #
       # @param data [Hash] parsed JSON response from OpenAI
       # @return [RubyPi::LLM::Response]
       def parse_response(data)
@@ -345,7 +471,7 @@ module RubyPi
         (message["tool_calls"] || []).each do |tc|
           func = tc["function"] || {}
-          arguments = func["arguments"] ? JSON.parse(func["arguments"]) : {}
+          arguments = safe_parse_arguments(func["arguments"])
           tool_calls << ToolCall.new(
             id: tc["id"],
             name: func["name"],
@@ -384,6 +510,35 @@ module RubyPi
         else reason
         end
       end
+      # Safely parses a JSON arguments string into a Hash.
+      #
+      # Issue #12: Handles nil, empty strings, and malformed JSON gracefully.
+      # Previously, `func["arguments"] ? JSON.parse(func["arguments"]) : {}`
+      # would crash on empty strings (truthy but unparseable). Now we validate
+      # the input and rescue parse errors with a typed ProviderError.
+      #
+      # @param raw_args [String, Hash, nil] raw arguments from the API
+      # @return [Hash] parsed arguments hash
+      # @raise [RubyPi::ProviderError] if JSON parsing fails on non-empty input
+      def safe_parse_arguments(raw_args)
+        # Already a Hash — pass through
+        return raw_args if raw_args.is_a?(Hash)
+        # nil or non-string — return empty hash
+        return {} unless raw_args.is_a?(String)
+        # Empty or whitespace-only string — return empty hash
+        return {} if raw_args.strip.empty?
+        # Attempt to parse the JSON string
+        JSON.parse(raw_args)
+      rescue JSON::ParserError => e
+        raise RubyPi::ProviderError.new(
+          "Failed to parse tool call arguments from OpenAI: #{e.message} (raw: #{raw_args.inspect})",
+          provider: :openai
+        )
+      end
     end
   end
 end

data/lib/ruby_pi/llm/stream_event.rb CHANGED Viewed

@@ -25,10 +25,10 @@ module RubyPi
     #   end
     class StreamEvent
       # Valid event types for stream events.
-      VALID_TYPES = %i[text_delta tool_call_delta done].freeze
+      VALID_TYPES = %i[text_delta tool_call_delta done fallback_start].freeze
       # @return [Symbol] the type of stream event — one of :text_delta,
-      #   :tool_call_delta, or :done
+      #   :tool_call_delta, :done, or :fallback_start
       attr_reader :type
       # @return [Object] the event payload. For :text_delta this is a String
@@ -38,7 +38,7 @@ module RubyPi
       # Creates a new StreamEvent instance.
       #
-      # @param type [Symbol] event type (:text_delta, :tool_call_delta, :done)
+      # @param type [Symbol] event type (:text_delta, :tool_call_delta, :done, :fallback_start)
       # @param data [Object] event payload
       # @raise [ArgumentError] if the type is not recognized
       def initialize(type:, data: nil)
@@ -71,6 +71,16 @@ module RubyPi
         @type == :done
       end
+      # Returns true if this is a fallback_start event, signaling that the
+      # primary provider failed mid-stream and the fallback provider is
+      # taking over. Consumers should clear any partial output rendered
+      # from the failed primary.
+      #
+      # @return [Boolean]
+      def fallback_start?
+        @type == :fallback_start
+      end
       # Returns a hash representation of the stream event.
       #
       # @return [Hash]

data/lib/ruby_pi/llm/tool_call.rb CHANGED Viewed

@@ -64,13 +64,36 @@ module RubyPi
       # Attempts to parse a JSON string into a Hash. Falls back to wrapping
       # the raw value in a hash if parsing fails.
       #
+      # Issue #15: Guards against non-string, non-hash inputs (e.g., Integer,
+      # nil, or any object that doesn't respond to `empty?`). Previously,
+      # calling `raw.empty?` on an Integer would raise NoMethodError.
+      # Now we check `raw.is_a?(String)` before calling string methods,
+      # and handle nil/non-string types gracefully.
+      #
       # @param raw [String, Object] raw arguments data
       # @return [Hash] parsed arguments
       def parse_arguments(raw)
-        return {} if raw.nil? || raw.empty?
+        # Handle nil explicitly
+        return {} if raw.nil?
+        # If it's a String, attempt JSON parse (guard empty strings)
+        if raw.is_a?(String)
+          return {} if raw.strip.empty?
+          begin
+            parsed = JSON.parse(raw)
+            return parsed if parsed.is_a?(Hash)
+            # JSON.parse succeeded but didn't return a Hash (e.g., an array
+            # or scalar) — wrap it so callers always get a Hash.
+            return { "_raw" => parsed }
+          rescue JSON::ParserError
+            return { "_raw" => raw }
+          end
+        end
-        JSON.parse(raw.to_s)
-      rescue JSON::ParserError
+        # For any other type (Integer, Float, Array, etc.) that isn't a Hash,
+        # wrap it in a hash to maintain the Hash return type contract.
         { "_raw" => raw.to_s }
       end
     end

data/lib/ruby_pi/tools/executor.rb CHANGED Viewed

@@ -57,9 +57,21 @@ module RubyPi
       # Tools are looked up in the registry; if a tool is not found, a failure
       # Result is returned for that call.
       #
+      # Issue #17: Raises NoToolsRegisteredError if the registry is nil and
+      # tool calls are attempted, preventing a confusing NoMethodError.
+      #
       # @param calls [Array<Hash>] Tool call requests, each with :name and :arguments.
       # @return [Array<RubyPi::Tools::Result>] Results in the same order as the calls.
+      # @raise [RubyPi::NoToolsRegisteredError] if registry is nil
       def execute(calls)
+        # Issue #17: Guard against nil registry — if the LLM hallucinated tool
+        # calls but no tools are registered, raise a typed error immediately
+        # rather than crashing with NoMethodError on nil.find.
+        if @registry.nil?
+          raise RubyPi::NoToolsRegisteredError,
+                "Model returned #{calls.size} tool call(s) but no tools are registered"
+        end
         case @mode
         when :parallel
           execute_parallel(calls)
@@ -83,6 +95,16 @@ module RubyPi
       # Each call is dispatched as a Future on the global I/O thread pool.
       # Results are collected in order, respecting the per-tool timeout.
       #
+      # Issue #10: Uses future.wait(@timeout) + future.complete? to distinguish
+      # a legitimate nil return value from a timeout. Previously, the || operator
+      # treated nil return values as timeouts.
+      #
+      # Issue #11: After detecting a timeout, attempts to cancel the future.
+      # Note: Ruby threads cannot be forcibly killed safely; we use the future's
+      # cancellation mechanism which sets a flag. The underlying thread may
+      # continue running until it reaches a natural exit point. This is a known
+      # tradeoff — hard cancellation in Ruby risks corrupted state.
+      #
       # @param calls [Array<Hash>] The tool call requests.
       # @return [Array<RubyPi::Tools::Result>] Ordered results.
       def execute_parallel(calls)
@@ -94,22 +116,62 @@ module RubyPi
         # Collect results, respecting the configured timeout for each future.
         futures.map do |future|
-          future.value(@timeout) || Result.new(
-            name: "unknown",
-            success: false,
-            error: "Tool execution timed out after #{@timeout}s",
-            duration_ms: @timeout * 1000.0
-          )
+          # Issue #10: Wait for the future to complete, then check its state
+          # explicitly. Future#value returns nil both on timeout AND when the
+          # block legitimately returned nil, so we cannot use || to distinguish.
+          future.wait(@timeout)
+          if future.complete?
+            if future.fulfilled?
+              # Future completed successfully — return its value (which may be nil)
+              future.value
+            else
+              # Future was rejected (raised an exception within the block).
+              # This shouldn't normally happen since execute_single rescues
+              # internally, but handle it defensively.
+              error = future.reason
+              Result.new(
+                name: "unknown",
+                success: false,
+                error: "#{error.class}: #{error.message}",
+                duration_ms: @timeout * 1000.0
+              )
+            end
+          else
+            # Issue #11: Future did not complete within the timeout window.
+            # Attempt to cancel the future to signal the thread to stop.
+            # Concurrent::Future does not support hard cancellation — the
+            # underlying thread will continue until it naturally exits.
+            # This is the safest approach in Ruby since Thread#raise/Thread#kill
+            # can interrupt mid-mutation and corrupt shared state.
+            future.cancel if future.respond_to?(:cancel)
+            Result.new(
+              name: "unknown",
+              success: false,
+              error: "Tool execution timed out after #{@timeout}s",
+              duration_ms: @timeout * 1000.0
+            )
+          end
         end
       end
       # Executes a single tool call with error handling and timing.
       #
+      # Issue #9: Replaced the stdlib timeout mechanism with a thread+join approach for
+      # sequential mode. The stdlib timeout uses Thread#raise internally, which
+      # is unsafe — it can interrupt code mid-mutation, leak file handles,
+      # and corrupt state. The thread+join approach runs the tool in a
+      # separate thread and waits with a timeout; if the thread doesn't
+      # finish in time, we report a timeout error. The worker thread is
+      # left running (it cannot be safely killed in Ruby) but its result
+      # is discarded.
+      #
       # @param call [Hash] A tool call with :name and :arguments keys.
       # @return [RubyPi::Tools::Result] The execution result.
       def execute_single(call)
         tool_name = (call[:name] || call["name"]).to_s
-        arguments = call[:arguments] || call["arguments"] || {}
+        arguments = deep_symbolize_keys(call[:arguments] || call["arguments"] || {})
         tool = @registry.find(tool_name)
@@ -123,34 +185,59 @@ module RubyPi
           )
         end
-        # Execute the tool with timeout and error handling
+        # Execute the tool with a safe timeout mechanism.
+        # Instead of the stdlib timeout (which uses Thread#raise and is unsafe),
+        # we spawn a worker thread and join with a timeout.
         start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
-        begin
-          value = Timeout.timeout(@timeout) do
-            tool.call(arguments)
+        # Container for the worker thread's result/error
+        value = nil
+        error = nil
+        worker = Thread.new do
+          # Don't spam stderr from the rescued worker thread.
+          Thread.current.report_on_exception = false
+          begin
+            value = tool.call(arguments)
+          rescue Exception => e # rubocop:disable Lint/RescueException
+            # Rescue the full Exception hierarchy (not just StandardError).
+            # If a tool block raises Interrupt, SystemExit, or any other
+            # non-StandardError, rescuing only StandardError leaves both
+            # `value` and `error` nil; the join then reports a successful
+            # nil result — a panic in a tool silently becomes "returned nil".
+            # Capture the failure here; the main thread surfaces it as a
+            # failed Result. The worker thread itself does not propagate.
+            error = e
           end
-          elapsed_ms = elapsed_since(start_time)
+        end
+        # Join with timeout — returns nil if the thread didn't finish in time
+        finished = worker.join(@timeout)
+        elapsed_ms = elapsed_since(start_time)
+        if finished.nil?
+          # Thread did not finish within the timeout. We cannot safely kill it
+          # (Thread#kill can corrupt state), so we leave it running and report
+          # the timeout. This matches the tradeoff documented for parallel mode.
           Result.new(
             name: tool_name,
-            success: true,
-            value: value,
+            success: false,
+            error: "Tool '#{tool_name}' timed out after #{@timeout}s",
             duration_ms: elapsed_ms
           )
-        rescue Timeout::Error
-          elapsed_ms = elapsed_since(start_time)
+        elsif error
           Result.new(
             name: tool_name,
             success: false,
-            error: "Tool '#{tool_name}' timed out after #{@timeout}s",
+            error: "#{error.class}: #{error.message}",
             duration_ms: elapsed_ms
           )
-        rescue StandardError => e
-          elapsed_ms = elapsed_since(start_time)
+        else
           Result.new(
             name: tool_name,
-            success: false,
-            error: "#{e.class}: #{e.message}",
+            success: true,
+            value: value,
             duration_ms: elapsed_ms
           )
         end
@@ -163,6 +250,37 @@ module RubyPi
       def elapsed_since(start_time)
         (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000.0
       end
+      # Recursively converts all string keys in a hash to symbols so that
+      # tool implementations can use idiomatic Ruby symbol-key access
+      # (e.g. `args[:field]`) regardless of whether the LLM provider
+      # returned string-keyed JSON. Exposed as a class method so the agent
+      # loop can apply the same transformation to tool_call arguments
+      # before recording them in `tool_calls_made`, keeping the agent's
+      # observable arguments shape consistent with what tool blocks see.
+      #
+      # @param obj [Object] the object to transform (Hash, Array, or scalar)
+      # @return [Object] the transformed object with symbolized keys
+      def self.deep_symbolize_keys(obj)
+        case obj
+        when Hash
+          obj.each_with_object({}) do |(key, value), result|
+            result[key.to_sym] = deep_symbolize_keys(value)
+          end
+        when Array
+          obj.map { |item| deep_symbolize_keys(item) }
+        else
+          obj
+        end
+      end
+      # Instance-method delegate so existing internal callers keep working.
+      #
+      # @param obj [Object] the object to transform (Hash, Array, or scalar)
+      # @return [Object] the transformed object with symbolized keys
+      def deep_symbolize_keys(obj)
+        self.class.deep_symbolize_keys(obj)
+      end
     end
   end
 end