RubyGems - phronomy - Versions diffs - 0.2.2 → 0.4.0 - Mend

phronomy 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +127 -30
data/README.md +106 -122
data/lib/phronomy/agent/base.rb +135 -57
data/lib/phronomy/agent/checkpoint.rb +53 -0
data/lib/phronomy/agent/orchestrator.rb +119 -0
data/lib/phronomy/agent/react_agent.rb +18 -28
data/lib/phronomy/agent/shared_state.rb +303 -0
data/lib/phronomy/agent/suspend_signal.rb +35 -0
data/lib/phronomy/agent/team_coordinator.rb +285 -0
data/lib/phronomy/agent.rb +2 -1
data/lib/phronomy/configuration.rb +0 -24
data/lib/phronomy/generator_verifier.rb +250 -0
data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +10 -27
data/lib/phronomy/railtie.rb +0 -6
data/lib/phronomy/ruby_llm_patches.rb +20 -0
data/lib/phronomy/tool/mcp_tool.rb +23 -26
data/lib/phronomy/tracing/langfuse_tracer.rb +3 -6
data/lib/phronomy/vector_store/redis_search.rb +4 -4
data/lib/phronomy/version.rb +1 -1
data/lib/phronomy/workflow.rb +4 -7
data/lib/phronomy/workflow_runner.rb +42 -30
data/lib/phronomy.rb +18 -0
data/scripts/check_readme_ruby.rb +38 -0
metadata +12 -38
data/docs/trustworthy_ai_enhancements.md +0 -332
data/lib/phronomy/active_record/acts_as.rb +0 -48
data/lib/phronomy/active_record/checkpoint.rb +0 -20
data/lib/phronomy/active_record/extensions.rb +0 -14
data/lib/phronomy/active_record/message.rb +0 -20
data/lib/phronomy/actor.rb +0 -68
data/lib/phronomy/memory/compression/base.rb +0 -37
data/lib/phronomy/memory/compression/summary.rb +0 -107
data/lib/phronomy/memory/compression/tool_output_pruner.rb +0 -67
data/lib/phronomy/memory/compression.rb +0 -11
data/lib/phronomy/memory/conversation_manager.rb +0 -213
data/lib/phronomy/memory/retrieval/base.rb +0 -22
data/lib/phronomy/memory/retrieval/composite.rb +0 -76
data/lib/phronomy/memory/retrieval/recent.rb +0 -35
data/lib/phronomy/memory/retrieval/semantic.rb +0 -114
data/lib/phronomy/memory/retrieval.rb +0 -12
data/lib/phronomy/memory/storage/active_record.rb +0 -248
data/lib/phronomy/memory/storage/base.rb +0 -155
data/lib/phronomy/memory/storage/in_memory.rb +0 -152
data/lib/phronomy/memory/storage.rb +0 -11
data/lib/phronomy/memory.rb +0 -21
data/lib/phronomy/rails/agent_job.rb +0 -75
data/lib/phronomy/state_store/active_record.rb +0 -76
data/lib/phronomy/state_store/base.rb +0 -112
data/lib/phronomy/state_store/encryptor/active_support.rb +0 -49
data/lib/phronomy/state_store/encryptor/base.rb +0 -34
data/lib/phronomy/state_store/encryptor.rb +0 -16
data/lib/phronomy/state_store/file.rb +0 -85
data/lib/phronomy/state_store/in_memory.rb +0 -53
data/lib/phronomy/state_store/redis.rb +0 -70
data/lib/phronomy/state_store.rb +0 -9
data/lib/phronomy/thread_actor_registry.rb +0 -85
data/lib/phronomy/trust_pipeline.rb +0 -264

data/lib/phronomy/agent/base.rb CHANGED Viewed

@@ -66,7 +66,8 @@ module Phronomy
           if text || block_given?
             @instructions = text || block
           else
-            @instructions
+            return @instructions if instance_variable_defined?(:@instructions)
+            superclass.respond_to?(:instructions) ? superclass.instructions : nil
           end
         end
@@ -88,7 +89,10 @@ module Phronomy
         #   )
         def tools(*args)
           if args.empty?
-            return @tools || []
+            if instance_variable_defined?(:@tools)
+              return @tools
+            end
+            return superclass.respond_to?(:tools) ? superclass.tools : []
           end
           if args.length == 1 && args.first.is_a?(Hash)
@@ -122,7 +126,8 @@ module Phronomy
           if name
             @provider = name
           else
-            @provider
+            return @provider if instance_variable_defined?(:@provider)
+            superclass.respond_to?(:provider) ? superclass.provider : nil
           end
         end
@@ -402,18 +407,25 @@ module Phronomy
       #   +:message+, +:query+, or +:user+ as the text key, plus any template
       #   variables consumed by the configured instructions template.
       # @param config [Hash] runtime options:
-      #   +:memory+     ({Phronomy::Memory::ConversationManager}) — memory backend
+      #   +:messages+   (Array<RubyLLM::Message>)  — conversation history from a previous invocation
       #   +:thread_id+  (+String+)                 — conversation thread identifier
       #   +:user_id+    (+String+, optional)        — caller identity forwarded to the tracer
       #   +:session_id+ (+String+, optional)        — session identity forwarded to the tracer
-      # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+
+      # @return [Hash] +{ output: String, messages: Array, usage: Phronomy::TokenUsage }+,
+      #   or +{ output: nil, suspended: true, checkpoint: Phronomy::Agent::Checkpoint,
+      #   messages: Array }+ when the invocation was suspended awaiting tool approval.
       # @raise [Phronomy::GuardrailError] when an input or output guardrail rejects the value
-      # @example
+      # @example Normal invocation
       #   result = MyAgent.new.invoke("What is Ruby?")
       #   puts result[:output]
+      # @example Suspend / resume flow
+      #   result = agent.invoke("Perform task X")
+      #   if result[:suspended]
+      #     result = agent.resume(result[:checkpoint], approved: true)
+      #   end
+      #   puts result[:output]
       def invoke(input, config: {})
-        thread_id = config[:thread_id]
-        _run_in_thread_actor(thread_id) { _invoke_impl(input, config: config) }
+        _invoke_impl(input, config: config)
       end
       # Streaming version of #invoke. Yields {Phronomy::Agent::StreamEvent} objects
@@ -433,23 +445,73 @@ module Phronomy
       def stream(input, config: {}, &block)
         return invoke(input, config: config) unless block
-        thread_id = config[:thread_id]
-        _run_in_thread_actor(thread_id) { _stream_impl(input, config: config, &block) }
+        _stream_impl(input, config: config, &block)
       rescue => e
         block&.call(StreamEvent.new(type: :error, payload: {error: e}))
         raise
       end
+      # Resumes a previously suspended invocation from a {Phronomy::Agent::Checkpoint}.
+      #
+      # This method reconstructs the conversation state captured at suspension
+      # time, injects the tool result (executed or denied), and continues the
+      # LLM loop until it produces a final answer.
+      #
+      # @param checkpoint [Phronomy::Agent::Checkpoint] the checkpoint returned by
+      #   the suspended #invoke call
+      # @param approved   [Boolean] +true+ to execute the pending tool; +false+
+      #   to inject a denial message and let the LLM handle it gracefully
+      # @param config     [Hash] same runtime options as #invoke
+      # @return [Hash] +{ output: String, suspended: false, messages: Array, usage: Phronomy::TokenUsage }+
+      # @raise [Phronomy::GuardrailError] when an output guardrail rejects the value
+      def resume(checkpoint, approved:, config: {})
+        checkpoint.thread_id
+        # Build a fresh chat with all tools registered.
+        chat = build_chat
+        # Restore the full conversation (system + history + user + assistant).
+        checkpoint.messages.each { |msg| chat.messages << msg }
+        # Determine the tool result: execute it or inject a denial string.
+        tool_result =
+          if approved
+            tool_instance = chat.tools[checkpoint.pending_tool_name.to_sym]
+            tool_instance ? tool_instance.call(checkpoint.pending_tool_args) : "Tool not found."
+          else
+            "Tool execution denied."
+          end
+        # Inject the tool result so the LLM can continue.
+        chat.add_message(
+          role: :tool,
+          content: tool_result.to_s,
+          tool_call_id: checkpoint.pending_tool_call_id
+        )
+        # Continue the React loop.
+        response = chat.complete
+        output = response.content
+        usage = Phronomy::TokenUsage.from_tokens(response.tokens)
+        run_output_guardrails!(output)
+        {output: output, suspended: false, messages: chat.messages, usage: usage}
+      end
       # Registers a callback that is invoked before executing any tool that has
       # +requires_approval true+ set. The block receives the tool name (String)
       # and the arguments Hash, and must return a truthy value to allow execution.
       # Returning a falsy value causes the tool to return a denial message instead
       # of executing.
       #
-      # When no handler is registered, tools with +requires_approval+ execute
-      # without interruption (backward-compatible behaviour).
+      # When no handler is registered and a tool with +requires_approval+ is
+      # called, #invoke returns a suspended result hash containing a
+      # {Phronomy::Agent::Checkpoint}.  Call #resume to continue execution after
+      # obtaining an approval decision from the user or an external system.
       #
-      # @example
+      # @example Synchronous handler
       #   agent = MyAgent.new
       #   agent.on_approval_required { |tool_name, args| prompt_user(tool_name, args) }
       # @return [self]
@@ -510,7 +572,6 @@ module Phronomy
         trace("agent.invoke", input: input, **caller_meta) do |_span|
           run_input_guardrails!(input)
-          memory = config[:memory]
           thread_id = config[:thread_id]
           chat = build_chat
@@ -528,8 +589,8 @@ module Phronomy
             end
           end
-          if memory && thread_id
-            msgs = load_from_memory(memory, thread_id: thread_id, query: user_message)
+          msgs = Array(config[:messages])
+          unless msgs.empty?
             message_elements = build_message_elements(msgs)
             # Run on_trim: app may call ctx.remove(seqs) to drop messages this turn.
@@ -547,8 +608,7 @@ module Phronomy
                   compact_ctx = Context::CompactionContext.new(
                     message_elements: message_elements,
                     budget: budget,
-                    thread_id: thread_id,
-                    memory: memory
+                    thread_id: thread_id
                   )
                   compact_cb.call(compact_ctx)
                   message_elements = build_message_elements(compact_ctx.result_messages)
@@ -564,8 +624,18 @@ module Phronomy
           context[:messages].each { |msg| chat.messages << msg }
           # Wire per-event callbacks to yield StreamEvents.
-          chat.before_tool_call { |tool_call| block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tool_call})) }
-          chat.after_tool_result { |tool_result| block.call(StreamEvent.new(type: :tool_result, payload: {tool_result: tool_result})) }
+          current_tool_call = nil
+          chat.on_tool_call do |tool_call|
+            current_tool_call = tool_call
+            block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tool_call}))
+          end
+          chat.on_tool_result do |tool_result|
+            block.call(StreamEvent.new(type: :tool_result, payload: {
+              tool_call_id: current_tool_call&.id,
+              tool_name: current_tool_call&.name,
+              tool_result: tool_result
+            }))
+          end
           # Run before_completion hooks (global → class → instance) before the LLM call.
           run_before_completion_hooks!(chat, config)
@@ -574,8 +644,6 @@ module Phronomy
             block.call(StreamEvent.new(type: :token, payload: {content: chunk.content}))
           end
-          save_to_memory(memory, thread_id: thread_id, messages: chat.messages) if memory && thread_id
           output = response.content
           usage = Phronomy::TokenUsage.from_tokens(response.tokens)
@@ -587,14 +655,6 @@ module Phronomy
         end
       end
-      # Runs +block+ inside the {Phronomy::ThreadActorRegistry} Actor for
-      # +thread_id+. When +thread_id+ is nil the block executes on the calling thread.
-      def _run_in_thread_actor(thread_id, &block)
-        return block.call unless thread_id
-        Phronomy::ThreadActorRegistry.for(thread_id).call(&block)
-      end
       # Performs a single (non-retried) invocation. Extracted so that #invoke can
       # wrap it in a retry loop without duplicating the LLM interaction logic.
       def invoke_once(input, config: {})
@@ -606,15 +666,13 @@ module Phronomy
           # Run input guardrails before touching the LLM.
           run_input_guardrails!(input)
-          memory = config[:memory]
           thread_id = config[:thread_id]
           user_message = extract_message(input)
           chat = build_chat
           budget = build_token_budget
-          # Load conversation history from memory.
-          raw_messages = (memory && thread_id) ?
-            load_from_memory(memory, thread_id: thread_id, query: user_message) : []
+          # Load conversation history from config[:messages] (app-managed).
+          raw_messages = Array(config[:messages])
           # Assign synthetic 0-based seq numbers for use by trim/compaction callbacks.
           message_elements = build_message_elements(raw_messages)
@@ -636,8 +694,7 @@ module Phronomy
                 compact_ctx = Context::CompactionContext.new(
                   message_elements: message_elements,
                   budget: budget,
-                  thread_id: thread_id,
-                  memory: memory
+                  thread_id: thread_id
                 )
                 compact_cb.call(compact_ctx)
                 message_elements = build_message_elements(compact_ctx.result_messages)
@@ -671,10 +728,23 @@ module Phronomy
           # Run before_completion hooks (global → class → instance) before the LLM call.
           run_before_completion_hooks!(chat, config)
-          response = chat.ask(user_message)
-          # Persist the updated conversation to memory.
-          save_to_memory(memory, thread_id: thread_id, messages: chat.messages) if memory && thread_id
+          # Register suspension hook for approval-required tools (no-op when a
+          # synchronous on_approval_required handler is already registered).
+          _register_suspension_hook!(chat)
+          begin
+            response = chat.ask(user_message)
+          rescue SuspendSignal => signal
+            checkpoint = Checkpoint.new(
+              thread_id: thread_id,
+              messages: chat.messages.dup,
+              pending_tool_name: signal.tool_name,
+              pending_tool_args: signal.args,
+              pending_tool_call_id: signal.tool_call_id
+            )
+            suspended_result = {output: nil, suspended: true, checkpoint: checkpoint, messages: chat.messages}
+            next [suspended_result, nil]
+          end
           output = response.content
           usage = Phronomy::TokenUsage.from_tokens(response.tokens)
@@ -832,23 +902,6 @@ module Phronomy
       # Load messages from a ConversationManager.
       #
-      # @param memory    [Memory::ConversationManager]
-      # @param thread_id [String]
-      # @param query     [String, nil]
-      # @return [Array]
-      def load_from_memory(memory, thread_id:, query: nil)
-        memory.load(thread_id: thread_id, query: query)
-      end
-      # Persist messages to a ConversationManager.
-      #
-      # @param memory    [Memory::ConversationManager]
-      # @param thread_id [String]
-      # @param messages  [Array]
-      def save_to_memory(memory, thread_id:, messages:)
-        memory.save(thread_id: thread_id, messages: messages)
-      end
       def build_chat
         opts = {}
         m = self.class.model
@@ -917,6 +970,31 @@ module Phronomy
         (@output_guardrails || []).each { |g| g.run!(output) }
       end
+      # Registers an on_tool_call hook on the chat object that raises SuspendSignal
+      # when an approval-required tool is about to be executed and no synchronous
+      # on_approval_required handler has been registered.
+      #
+      # Does nothing when:
+      #   - a synchronous handler is already registered (@approval_handler is set), or
+      #   - none of the agent's tools have requires_approval set.
+      #
+      # @param chat [RubyLLM::Chat]
+      def _register_suspension_hook!(chat)
+        return if @approval_handler
+        return if self.class.tools.none? { |tc| tc.requires_approval }
+        chat.on_tool_call do |tool_call|
+          tool_instance = chat.tools[tool_call.name.to_sym]
+          if tool_instance&.requires_approval
+            raise SuspendSignal.new(
+              tool_name: tool_call.name,
+              args: tool_call.arguments,
+              tool_call_id: tool_call.id
+            )
+          end
+        end
+      end
       # Builds the final tool class to register with the chat.
       #
       # Two transformations are applied in order:

data/lib/phronomy/agent/checkpoint.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+module Phronomy
+  module Agent
+    # Encapsulates the suspended state of an agent invocation.
+    #
+    # A Checkpoint is returned as the +:checkpoint+ key of the result hash when
+    # an approval-required tool is encountered and no synchronous
+    # on_approval_required handler has been registered.
+    #
+    # Pass the checkpoint to Agent::Base#resume to continue execution after
+    # obtaining an approval decision from the user or an external system.
+    #
+    # @example Suspend and resume
+    #   result = agent.invoke("Do task X")
+    #   if result[:suspended]
+    #     approved = prompt_user(result[:checkpoint].pending_tool_name)
+    #     result   = agent.resume(result[:checkpoint], approved: approved)
+    #   end
+    #   puts result[:output]
+    class Checkpoint
+      # @return [String, nil] the thread_id from the invocation config
+      attr_reader :thread_id
+      # @return [Array<RubyLLM::Message>] conversation messages up to and including
+      #   the assistant message that requested the pending tool call
+      attr_reader :messages
+      # @return [String] the name of the tool awaiting approval
+      attr_reader :pending_tool_name
+      # @return [Hash] the arguments the LLM passed to the pending tool
+      attr_reader :pending_tool_args
+      # @return [String] the tool_call_id from the LLM response (required to
+      #   inject the tool result message on resume)
+      attr_reader :pending_tool_call_id
+      # @param thread_id           [String, nil]
+      # @param messages            [Array<RubyLLM::Message>]
+      # @param pending_tool_name   [String]
+      # @param pending_tool_args   [Hash]
+      # @param pending_tool_call_id [String]
+      def initialize(thread_id:, messages:, pending_tool_name:, pending_tool_args:, pending_tool_call_id:)
+        @thread_id = thread_id
+        @messages = messages.dup.freeze
+        @pending_tool_name = pending_tool_name
+        @pending_tool_args = pending_tool_args
+        @pending_tool_call_id = pending_tool_call_id
+      end
+    end
+  end
+end

data/lib/phronomy/agent/orchestrator.rb ADDED Viewed

@@ -0,0 +1,119 @@
+# frozen_string_literal: true
+module Phronomy
+  module Agent
+    # Base class for orchestrator agents that coordinate multiple subagents.
+    # Implements the Orchestrator-Subagent multi-agent coordination pattern
+    # (Anthropic blog, Pattern 2).
+    #
+    # @see https://claude.com/blog/multi-agent-coordination-patterns
+    #
+    # Extends {Phronomy::Agent::Base} with:
+    # - A +subagent+ class-level DSL for declarative subagent registration. Each
+    #   declared subagent is automatically exposed as an LLM-callable tool.
+    # - +dispatch_parallel+ for programmatic parallel invocation of heterogeneous
+    #   agents.
+    # - +fan_out+ for parallel invocation of the same agent across multiple inputs.
+    #
+    # @example Declarative DSL
+    #   class ResearchOrchestrator < Phronomy::Agent::Orchestrator
+    #     model "gpt-4o"
+    #     instructions "You coordinate research tasks."
+    #     subagent :searcher,   SearchAgent
+    #     subagent :summarizer, SummaryAgent
+    #   end
+    #
+    #   result = ResearchOrchestrator.new.invoke("Research the latest AI news.")
+    #
+    # @example Programmatic parallel dispatch
+    #   class MyOrchestrator < Phronomy::Agent::Orchestrator
+    #     model "gpt-4o"
+    #     instructions "Dispatch tasks in parallel."
+    #
+    #     def run(input)
+    #       results = dispatch_parallel(
+    #         { agent: SearchAgent,   input: "topic A" },
+    #         { agent: AnalysisAgent, input: input }
+    #       )
+    #       results.map { |r| r[:output] }.join("\n")
+    #     end
+    #   end
+    #
+    # @example Fan-out (same agent, multiple inputs)
+    #   results = fan_out(agent: TranslationAgent, inputs: ["Hello", "World"])
+    class Orchestrator < Base
+      # Declares a named subagent and registers it as a tool accessible to the
+      # LLM during an +invoke+ call.
+      #
+      # Each call appends a new tool to this class's tool list.  The generated
+      # tool's function name is +dispatch_to_<name>+.  When the LLM calls the
+      # tool, a fresh instance of +agent_class+ is created and +invoke+ is called
+      # with the provided input string.
+      #
+      # @param name        [Symbol] logical name that identifies the subagent
+      # @param agent_class [Class]  subclass of {Phronomy::Agent::Base}
+      # @param on_error    [Symbol] +:raise+ (default) re-raises any exception
+      #   from the subagent; +:skip+ returns +nil+ so the LLM can decide how to
+      #   proceed
+      def self.subagent(name, agent_class, on_error: :raise)
+        tool_class = Class.new(Phronomy::Tool::Base) do
+          tool_name "dispatch_to_#{name}"
+          description "Dispatch work to the #{name} subagent (#{agent_class.name})"
+          param :input, type: :string, desc: "The task or question for the subagent"
+          define_method(:execute) do |input:|
+            result = agent_class.new.invoke(input)
+            result[:output]
+          rescue
+            raise if on_error == :raise
+            nil
+          end
+        end
+        # Append without clobbering previously registered tools or aliases.
+        @tools = (@tools || []) + [tool_class]
+        @tool_aliases ||= {}
+        registered_subagents[name] = {agent_class: agent_class, on_error: on_error}
+      end
+      # Returns the subagent registry for this specific class (not inherited).
+      #
+      # @return [Hash{Symbol => Hash}]
+      def self.registered_subagents
+        @registered_subagents ||= {}
+      end
+      # Dispatches multiple heterogeneous agent tasks in parallel using Ruby
+      # threads. Each task is a Hash describing one agent invocation.
+      #
+      # Results are returned in the same order as the input +tasks+ array.
+      # If any thread raises an exception, the exception is re-raised in the
+      # calling thread after all threads have completed (via +Thread#value+).
+      #
+      # @param tasks [Array<Hash>]
+      # @option task [Class]  :agent  agent class to invoke (required)
+      # @option task [String] :input  input string for the agent (required)
+      # @option task [Hash]   :config forwarded to +agent#invoke+ (default: +{}+)
+      # @return [Array<Hash>] agent results in the same order as +tasks+
+      def dispatch_parallel(*tasks)
+        threads = tasks.map do |task|
+          Thread.new do
+            task[:agent].new.invoke(task[:input], config: task.fetch(:config, {}))
+          end
+        end
+        threads.map(&:value)
+      end
+      # Runs the same agent against multiple inputs in parallel (fan-out pattern).
+      #
+      # @param agent  [Class]         agent class to invoke for every input
+      # @param inputs [Array<String>] list of input strings
+      # @param config [Hash]          forwarded to every +agent#invoke+ call
+      # @return [Array<Hash>] results in the same order as +inputs+
+      def fan_out(agent:, inputs:, config: {})
+        dispatch_parallel(*inputs.map { |input| {agent: agent, input: input, config: config} })
+      end
+    end
+  end
+end

data/lib/phronomy/agent/react_agent.rb CHANGED Viewed

@@ -18,18 +18,11 @@ module Phronomy
           # Run input guardrails before any LLM interaction.
           run_input_guardrails!(input)
-          memory = config[:memory]
-          thread_id = config[:thread_id]
+          config[:thread_id]
           max_iter = self.class.max_iterations
-          # Seed with persisted messages when memory is provided.
-          initial_messages = if memory && thread_id
-            load_from_memory(memory, thread_id: thread_id, query: extract_message(input))
-          else
-            []
-          end
-          messages = initial_messages.dup
+          # Seed with app-managed conversation history when provided.
+          messages = Array(config[:messages]).dup
           user_asked = false
           total_usage = Phronomy::TokenUsage.zero
           iterations_exhausted = true
@@ -45,12 +38,8 @@ module Phronomy
             end
           end
-          save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
-          # Fall back to the last message that carries non-nil content. This
+          # Fall back to the last message
           # guards against the case where the final message is a tool-call or
-          # tool-result message (content == nil) when max_iterations is
-          # exhausted before the model produces a text reply.
           output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
           # Run output guardrails before returning to the caller.
@@ -80,17 +69,10 @@ module Phronomy
         trace("agent.invoke", input: input, **caller_meta) do |_span|
           run_input_guardrails!(input)
-          memory = config[:memory]
-          thread_id = config[:thread_id]
+          config[:thread_id]
           max_iter = self.class.max_iterations
-          initial_messages = if memory && thread_id
-            load_from_memory(memory, thread_id: thread_id, query: extract_message(input))
-          else
-            []
-          end
-          messages = initial_messages.dup
+          messages = Array(config[:messages]).dup
           user_asked = false
           total_usage = Phronomy::TokenUsage.zero
           iterations_exhausted = true
@@ -106,8 +88,6 @@ module Phronomy
             end
           end
-          save_to_memory(memory, thread_id: thread_id, messages: messages) if memory && thread_id
           # Fall back to the last message that carries non-nil content (same as
           # the non-streaming path above).
           output = messages.reverse.find { |m| m.content && !m.content.empty? }&.content
@@ -154,8 +134,18 @@ module Phronomy
         chat = build_chat
         messages.each { |m| chat.add_message(m) }
-        chat.before_tool_call { |tc| block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tc})) }
-        chat.after_tool_result { |tr| block.call(StreamEvent.new(type: :tool_result, payload: {tool_result: tr})) }
+        current_tool_call = nil
+        chat.on_tool_call do |tc|
+          current_tool_call = tc
+          block.call(StreamEvent.new(type: :tool_call, payload: {tool_call: tc}))
+        end
+        chat.on_tool_result do |tr|
+          block.call(StreamEvent.new(type: :tool_result, payload: {
+            tool_call_id: current_tool_call&.id,
+            tool_name: current_tool_call&.name,
+            tool_result: tr
+          }))
+        end
         # Run before_completion hooks before each LLM call in the streaming loop.
         run_before_completion_hooks!(chat, config)