RubyGems - pikuri - Versions diffs - 0.0.1 - Mend

pikuri 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +62 -0
data/GETTING_STARTED.md +223 -0
data/LICENSE +21 -0
data/README.md +193 -0
data/lib/pikuri/agent/chat_transport.rb +41 -0
data/lib/pikuri/agent/context_window_detector.rb +101 -0
data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
data/lib/pikuri/agent/listener/message_listener.rb +93 -0
data/lib/pikuri/agent/listener/step_limit.rb +97 -0
data/lib/pikuri/agent/listener/terminal.rb +137 -0
data/lib/pikuri/agent/listener/token_log.rb +166 -0
data/lib/pikuri/agent/listener_list.rb +113 -0
data/lib/pikuri/agent/message.rb +61 -0
data/lib/pikuri/agent/synthesizer.rb +120 -0
data/lib/pikuri/agent/tokens.rb +56 -0
data/lib/pikuri/agent.rb +286 -0
data/lib/pikuri/subprocess.rb +166 -0
data/lib/pikuri/tool/bash.rb +272 -0
data/lib/pikuri/tool/calculator.rb +82 -0
data/lib/pikuri/tool/confirmer.rb +96 -0
data/lib/pikuri/tool/edit.rb +196 -0
data/lib/pikuri/tool/fetch.rb +167 -0
data/lib/pikuri/tool/glob.rb +310 -0
data/lib/pikuri/tool/grep.rb +338 -0
data/lib/pikuri/tool/parameters.rb +314 -0
data/lib/pikuri/tool/read.rb +254 -0
data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
data/lib/pikuri/tool/scraper/html.rb +285 -0
data/lib/pikuri/tool/scraper/pdf.rb +54 -0
data/lib/pikuri/tool/scraper/simple.rb +177 -0
data/lib/pikuri/tool/search/brave.rb +184 -0
data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
data/lib/pikuri/tool/search/engines.rb +154 -0
data/lib/pikuri/tool/search/exa.rb +217 -0
data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
data/lib/pikuri/tool/search/result.rb +29 -0
data/lib/pikuri/tool/skill.rb +80 -0
data/lib/pikuri/tool/skill_catalog.rb +376 -0
data/lib/pikuri/tool/sub_agent.rb +102 -0
data/lib/pikuri/tool/web_scrape.rb +117 -0
data/lib/pikuri/tool/web_search.rb +38 -0
data/lib/pikuri/tool/workspace.rb +150 -0
data/lib/pikuri/tool/write.rb +170 -0
data/lib/pikuri/tool.rb +118 -0
data/lib/pikuri/url_cache.rb +106 -0
data/lib/pikuri/version.rb +10 -0
data/lib/pikuri.rb +165 -0
data/prompts/coding-system-prompt.txt +28 -0
data/prompts/pikuri-chat.txt +15 -0
metadata +259 -0

data/lib/pikuri/agent/synthesizer.rb ADDED Viewed

@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+module Pikuri
+  class Agent
+    # Step-exhaustion rescue. When an +Agent+'s {Listener::StepLimit}
+    # trips, +Agent#run_loop+ catches the +Exceeded+ exception and hands
+    # off to {Synthesizer.run} so the run still produces something useful
+    # — a tools-free assistant turn that answers the user's question
+    # from whatever evidence the failed agent collected before running
+    # out of budget.
+    #
+    # == Why this exists
+    #
+    # Without a rescue, a step-exhausted run just raises a stack trace
+    # past +bin/pikuri-chat+ and the user gets nothing despite the agent
+    # having gathered useful information in the first N-1 steps. The
+    # observed failure mode is the "wait, but what about X?" death-loop:
+    # the agent collects sound evidence in the first few rounds, then
+    # spends the rest of the budget second-guessing. By the time the cap
+    # trips, the answer is largely in the messages — it just needs a
+    # tools-free pass to synthesize.
+    #
+    # == Seam discipline
+    #
+    # {Synthesizer.run} does not reference +RubyLLM::*+. +Agent+
+    # constructs the synth chat itself (the one +RubyLLM.chat+ call lives
+    # in +lib/agent.rb+, same as the parent chat) and passes it in.
+    # +Synthesizer+ only calls instance methods on whatever +chat+ it
+    # receives — +#with_instructions+ and +#ask+ — so the seam stays at
+    # three files.
+    module Synthesizer
+      # The synthesizer's system prompt. Strict and short: use the
+      # evidence, don't apologize, admit gaps when present.
+      SYSTEM_PROMPT = <<~PROMPT
+        You are given evidence another agent collected before running out of steps. Answer the user's question using only this evidence. You have no tools. If the evidence is insufficient, state plainly what's missing and what partial answer you can give. Do not apologize or comment on the previous agent.
+      PROMPT
+      # Configure +chat+ for synthesis, run one turn against it, and
+      # return the final assistant content. Listeners are attached so
+      # the synth's reasoning and answer flow through the same surface
+      # the parent agent uses — terminal renders them inline, an
+      # in-memory recorder picks them up, and a future web sink sees
+      # them as normal +Message+ variants.
+      #
+      # @param chat [RubyLLM::Chat] a *fresh* chat with no tools. The
+      #   caller is responsible for constructing it with the same
+      #   model/provider configuration the parent used.
+      # @param parent_messages [Array<RubyLLM::Message>] the parent
+      #   chat's full message history at the moment of step exhaustion.
+      #   Used to build the evidence transcript.
+      # @param user_message [String] the user's original question from
+      #   the parent turn that exhausted.
+      # @param listeners [Agent::ListenerList] listeners to attach to
+      #   the synth chat. Typically the parent agent's list run through
+      #   {ListenerList#for_sub_agent} with +max_steps: 1+ — same
+      #   transformation a sub-agent invocation gets, since the synth
+      #   runs on a fresh +RubyLLM::Chat+: +TokenLog+ zeroed, +Terminal+
+      #   padded, +StepLimit+ at the defensive cap (the synth has no
+      #   tools so it should never trip), shared listeners (e.g.
+      #   +InMemoryMessageList+) kept by reference.
+      # @return [String, nil] the synth's final assistant content, or
+      #   +nil+ if the synth somehow produced no assistant message
+      def self.run(chat:, parent_messages:, user_message:, listeners:)
+        chat.with_instructions(SYSTEM_PROMPT)
+        listeners.attach(chat)
+        chat.ask(build_prompt(parent_messages: parent_messages, user_message: user_message))
+        chat.messages.reverse.find { |m| m.role == :assistant }&.content
+      end
+      # Render the user's question plus an "Evidence gathered" section
+      # built from +parent_messages+ as a single prompt string. Pure
+      # function — no I/O, safe to test directly with fixture messages.
+      #
+      # @param parent_messages [Array<RubyLLM::Message>]
+      # @param user_message [String]
+      # @return [String]
+      def self.build_prompt(parent_messages:, user_message:)
+        transcript = format_evidence(parent_messages)
+        "Question: #{user_message}\n\nEvidence gathered:\n#{transcript}"
+      end
+      # Walk the parent's message history and produce a paired
+      # "Tool call:" / "Tool result:" log, preserving order. Tool calls
+      # that have no matching +:tool+ message are dropped — the call
+      # that tripped the step limit never executed, so including it
+      # would mislead the synth into citing nonexistent results.
+      # Non-empty assistant text content is preserved as a "Note:" line,
+      # since the parent may have summarized progress between tool
+      # calls.
+      #
+      # @param messages [Array<RubyLLM::Message>]
+      # @return [String]
+      def self.format_evidence(messages)
+        results_by_id = messages
+                        .select { |m| m.role == :tool }
+                        .to_h { |m| [m.tool_call_id, m.content] }
+        lines = []
+        messages.each do |msg|
+          next unless msg.role == :assistant
+          text = msg.content
+          lines << "Note: #{text}" if text.is_a?(String) && !text.empty?
+          msg.tool_calls&.each_value do |tc|
+            result = results_by_id[tc.id]
+            next unless result
+            args = tc.arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
+            lines << "Tool call: #{tc.name}(#{args})"
+            lines << "Tool result: #{result}"
+            lines << ''
+          end
+        end
+        lines.join("\n").rstrip
+      end
+      private_class_method :format_evidence
+    end
+  end
+end

data/lib/pikuri/agent/tokens.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+module Pikuri
+  class Agent
+    # Provider-reported token usage for a single assistant turn, copied
+    # off a +RubyLLM::Message+'s +tokens+ block. Delivered to listeners
+    # through {Listener::MessageListener#on_tokens} rather than the
+    # {Message} stream — it's metadata about an exchange, not an event
+    # in it.
+    #
+    # Emitted by {Listener::MessageListener#dispatch_chat_message} on
+    # every assistant +after_message+ event, including pure tool-call
+    # turns where {Message::Assistant} would have been filtered out for
+    # empty content. Those are exactly the turns where context-window
+    # growth matters most.
+    #
+    # All counts are +Integer, nil+. +nil+ means the provider did not
+    # report that field — common with local llama.cpp / Ollama servers
+    # that leave parts of the OpenAI +usage+ block empty. Listeners
+    # treat +nil+ as zero.
+    #
+    # The fields +input+, +cached+, and +cache_creation+ are
+    # **exclusive portions of this turn's full prompt** under the shape
+    # ruby_llm exposes for llama.cpp and Anthropic: they sum to the
+    # total prompt size processed on this request. OpenAI proper nests
+    # +cached_tokens+ inside its +prompt_tokens+ instead — if pikuri
+    # ever talks there directly, the sum formula needs revisiting.
+    #
+    # - +input+ — newly-processed (uncached) prompt tokens this turn.
+    # - +output+ — tokens in this single assistant reply.
+    # - +cached+ — portion of this turn's prompt served from the
+    #   provider's prompt cache. Still counts against the context
+    #   window (caching is a speed/cost optimization, not a context-
+    #   savings mechanism).
+    # - +cache_creation+ — portion of this turn's prompt written into
+    #   the prompt cache. Anthropic-specific; usually +nil+ on
+    #   OpenAI-compatible local servers.
+    # - +thinking+ — extended-thinking (Anthropic) or reasoning
+    #   (OpenAI o-series) tokens produced on this turn. +nil+ on
+    #   providers without a reasoning channel.
+    # - +model_id+ — provider-side model name as reported on the
+    #   response; useful when a process targets multiple models.
+    #
+    # == Computing "current context window size"
+    #
+    # +input + cached + cache_creation+ is the size of the prompt
+    # processed on this turn. Add +output+ to get tokens consumed by the
+    # conversation *through* this turn — this turn's prompt plus its
+    # reply, both of which the model will re-process on the next turn.
+    # That's what climbs toward +RubyLLM::ContextLengthExceededError+
+    # and is the snapshot {Listener::TokenLog#context_window_size}
+    # tracks (without the +output+ term, a long reply stays invisible
+    # in the headline until the next turn pulls it in as cached prompt).
+    Tokens = Data.define(:input, :output, :cached, :cache_creation, :thinking, :model_id)
+  end
+end

data/lib/pikuri/agent.rb ADDED Viewed

@@ -0,0 +1,286 @@
+# frozen_string_literal: true
+require 'ruby_llm'
+module Pikuri
+  # Thin wrapper around +RubyLLM::Chat+: pikuri owns the *extension surface*
+  # (the listener objects that consume normalized chat events) while
+  # ruby_llm owns the loop itself. The Thought / Tool-call / Observation
+  # iteration lives in +Chat#complete+; pikuri's job is just attaching
+  # listeners at construction time, forwarding the user turn, and
+  # notifying the listeners of the new {Message::User} so any that care
+  # about turn boundaries (notably {Listener::StepLimit}) can react.
+  #
+  # Listeners live in a {ListenerList} the caller supplies — duck-typed
+  # against a tiny +attach(chat)+ / +on_message(msg)+ protocol, with the
+  # list itself implementing the same protocol so +Agent+ never touches
+  # the underlying +Array+. There are no defaults for +tools:+ or
+  # +listeners:+ on {#initialize}: both are conscious decisions the
+  # caller must state every time.
+  #
+  # == Step-exhaustion rescue
+  #
+  # If a {Listener::StepLimit} in {#listeners} trips during +Chat#ask+,
+  # {#run_loop} catches the +Exceeded+ exception, emits a
+  # {Message::FallbackNotice} to every listener, and hands off to
+  # {Synthesizer.run} on a fresh +RubyLLM::Chat+. The synth reuses the
+  # parent's {ListenerList} via {ListenerList#for_sub_agent} with
+  # +max_steps: 1+ — same transformation a sub-agent invocation gets,
+  # since the synth is a fresh context: +TokenLog+ zeroed, +Terminal+
+  # padded, +StepLimit+ at the defensive cap (the synth has no tools so
+  # it should never trip), +InMemoryMessageList+ shared by reference. The
+  # listener +name:+ becomes +"<@name>_synthesizer"+ (or just
+  # +"synthesizer"+ for the main agent) so the synth turn is distinct
+  # from the parent's normal turns in any name-aware log line. The
+  # synth's answer becomes the value reported by
+  # {#last_assistant_content}, so callers (notably {Tool::SubAgent})
+  # still get a usable reply instead of raising past +bin/pikuri-chat+.
+  class Agent
+    # @param transport [ChatTransport] the model-resolution triple
+    #   (+model+ / +provider+ / +assume_model_exists+) forwarded to
+    #   +RubyLLM.chat+. Bundled into one value object so every
+    #   construction site — this constructor and the synthesizer rescue
+    #   below — can forward all three with one assignment instead of
+    #   three kwargs (where dropping one would silently route the chat
+    #   elsewhere or raise +RubyLLM::ModelNotFoundError+). If
+    #   +transport.model+ is +nil+, it's filled in from
+    #   +RubyLLM.config.default_model+.
+    # @param system_prompt [String] system message prepended to the chat
+    # @param tools [Array<Tool>] pikuri tools registered with the
+    #   underlying chat in declaration order. Each is converted to
+    #   ruby_llm's runtime shape via {Tool#to_ruby_llm_tool} when wired
+    #   in. Required — no default, because the tool set is a deliberate
+    #   per-call decision (pass +[]+ for a tools-free agent).
+    # @param listeners [ListenerList] the listener list whose +attach+
+    #   the constructor calls on the underlying chat. Required — no
+    #   default, because the renderer and step-budget choices are
+    #   deliberate per-call decisions. Typical CLI shape:
+    #   +ListenerList.new([Listener::Terminal.new, Listener::StepLimit.new(max: 20)])+.
+    # @param context_window [Integer, nil] explicit override for the
+    #   model's context-window cap. When set, it wins over ruby_llm's
+    #   reported value and the llama.cpp probe — see
+    #   {ContextWindowDetector} for precedence. Resolved cap is pushed to
+    #   every {Listener::TokenLog} so the +ctx=<used>/<cap>+ headline
+    #   lights up.
+    # @param llama_probe_url [String, nil] llama.cpp +/props+ URL used as
+    #   the third detection source. Only consulted when neither
+    #   +context_window+ nor ruby_llm's reported value is set. Typically
+    #   derived by +bin/pikuri-chat+ from its configured +openai_api_base+;
+    #   leave +nil+ when the configured server is anything other than
+    #   llama.cpp.
+    # @param name [String] identifier for this agent. Empty for the main
+    #   agent; sub-agents get monotonic hierarchical names like
+    #   +"sub_agent 0"+, +"sub_agent 1"+, +"sub_agent 0_0"+, ... generated
+    #   by {Tool::SubAgent} from the parent's name + a per-parent counter.
+    #   Forwarded to listeners through {ListenerList#for_sub_agent} so
+    #   name-aware ones (notably {Listener::TokenLog}) can tag their output.
+    # @param skill_catalog [Tool::SkillCatalog] catalog of on-disk skills
+    #   the agent may load on demand. Defaults to
+    #   +Tool::SkillCatalog::EMPTY+, which is a no-op singleton. When
+    #   non-empty: the catalog's prompt block ({Tool::SkillCatalog#format_for_prompt})
+    #   is appended to +system_prompt+ so the LLM can see what's available,
+    #   and a {Tool::Skill} bound to the catalog is appended to +tools+
+    #   so the LLM can actually load them. The two changes are coupled —
+    #   advertising skills without a loader (or vice versa) would be a
+    #   bug, so the catalog is the single source of truth for both.
+    # @return [Agent]
+    def initialize(transport:, system_prompt:, tools:, listeners:,
+                   context_window: nil, llama_probe_url: nil, name: '',
+                   skill_catalog: Tool::SkillCatalog::EMPTY)
+      @transport = transport.model ? transport : transport.with(model: RubyLLM.config.default_model)
+      @system_prompt = skill_catalog.empty? ? system_prompt : system_prompt + skill_catalog.format_for_prompt
+      @skill_catalog = skill_catalog
+      @tools = tools.dup
+      @listeners = listeners
+      @name = name
+      @synth_answer = nil
+      unless skill_catalog.empty?
+        raise 'Tool::Skill cannot be passed in tools: when skill_catalog is non-empty; ' \
+              'Agent auto-registers it from the catalog.' \
+          if @tools.any?(Tool::Skill)
+        @tools << Tool::Skill.new(catalog: skill_catalog)
+      end
+      @chat = RubyLLM.chat(**@transport.to_h)
+      @chat.with_instructions(@system_prompt)
+      @tools.each { |t| @chat.with_tool(t.to_ruby_llm_tool) }
+      @context_window_cap = ContextWindowDetector.new(
+        override: context_window,
+        ruby_llm_reported: @chat.model.context_window,
+        llama_probe_url: llama_probe_url
+      ).detect
+      @listeners.context_window_cap = @context_window_cap
+      @listeners.attach(@chat)
+    end
+    # @return [RubyLLM::Chat] underlying chat; the extension seam
+    attr_reader :chat
+    # @return [ChatTransport] the resolved transport bundle this agent
+    #   was constructed with — same model id / provider /
+    #   assume-model-exists flag passed to every +RubyLLM.chat+ call
+    #   originating from this agent (the main chat, the synthesizer
+    #   rescue, the sub-agent tool). Read by {Tool::SubAgent} so
+    #   spawned sub-agents reuse the same transport.
+    attr_reader :transport
+    # @return [Array<Tool>] this agent's tool list in declaration order.
+    #   Snapshotted by {Tool::SubAgent} so spawned sub-agents inherit
+    #   the parent's tools (minus the sub-agent tool itself, which
+    #   {#allow_sub_agent} appends to +@tools+ only after the snapshot
+    #   has been taken — recursion guard).
+    attr_reader :tools
+    # @return [String] resolved model id from {#transport}. Convenience
+    #   delegator for callers that don't need the full transport bundle.
+    def model
+      @transport.model
+    end
+    # @return [String] system prompt actually sent to the chat — equal to
+    #   the constructor's +system_prompt:+ argument plus, when a non-
+    #   empty +skill_catalog:+ was supplied, the catalog's
+    #   +<available_skills>+ block. {Tool::SubAgent} forwards this
+    #   already-augmented value to spawned sub-agents, so they see the
+    #   same catalog without needing the +skill_catalog:+ kwarg themselves.
+    attr_reader :system_prompt
+    # @return [Tool::SkillCatalog] catalog passed to the constructor;
+    #   +Tool::SkillCatalog::EMPTY+ if none was supplied. Read by callers
+    #   that want to inspect the loaded skills (e.g. for a startup banner).
+    attr_reader :skill_catalog
+    # @return [ListenerList] the listener list attached to this agent's
+    #   chat
+    attr_reader :listeners
+    # @return [String] this agent's identifier — empty for the main agent;
+    #   for sub-agents, the hierarchical id assigned by
+    #   {Tool::SubAgent} (e.g. +"sub_agent 0"+, +"sub_agent 1"+,
+    #   +"sub_agent 0_0"+). Read by the sub-agent tool so spawned
+    #   sub-agents prefix their own names with this one, and propagated
+    #   to listeners via {ListenerList#for_sub_agent} so name-aware ones
+    #   can tag output.
+    attr_reader :name
+    # @return [Integer, nil] context-window cap resolved by
+    #   {ContextWindowDetector} at construction time. +nil+ when no
+    #   source produced a value (custom local model with no override and
+    #   no reachable llama.cpp +/props+). Read by {Tool::SubAgent} so
+    #   spawned sub-agents inherit the same cap without re-probing.
+    attr_reader :context_window_cap
+    # Final assistant message content for the most recent {#run_loop}.
+    # When the synthesizer rescue fired, returns its answer; otherwise
+    # walks the underlying chat's history. Returns +nil+ if neither
+    # source has produced an assistant turn yet.
+    #
+    # @return [String, nil]
+    def last_assistant_content
+      return @synth_answer if @synth_answer
+      last = @chat.messages.reverse.find { |m| m.role == :assistant }
+      last&.content
+    end
+    # Run the agent loop for a single user turn. Notifies every listener of
+    # the {Message::User} — which is also how {Listener::StepLimit}
+    # learns to reset its counter — and forwards +user_message+ to
+    # {#chat} via +ask+. Returns nil; rendering and any other observable
+    # output is the listeners' responsibility.
+    #
+    # If a {Listener::StepLimit} trips during +ask+, the rescue branch
+    # emits a {Message::FallbackNotice} and runs {Synthesizer.run} on a
+    # fresh +RubyLLM::Chat+. The synth's answer is captured for
+    # {#last_assistant_content}; the exception does not bubble out.
+    #
+    # Subsequent calls keep building on the same chat history, so the
+    # model sees full multi-turn context.
+    #
+    # @param user_message [String] the user's request for this turn; must
+    #   not be +nil+, empty, or whitespace-only
+    # @raise [ArgumentError] if +user_message+ is +nil+, empty, or
+    #   contains only whitespace — an empty turn would poison the chat
+    #   history and burn a step budget on nothing
+    # @return [nil]
+    def run_loop(user_message:)
+      raise ArgumentError, "user_message must not be blank, got #{user_message.inspect}" \
+        if user_message.nil? || user_message.to_s.strip.empty?
+      @synth_answer = nil
+      @listeners.on_message(Message::User.new(content: user_message))
+      @chat.ask(user_message)
+      nil
+    rescue Listener::StepLimit::Exceeded => e
+      notice = Message::FallbackNotice.new(
+        reason: "agent exhausted #{e.max_steps} steps; synthesizing answer from gathered evidence"
+      )
+      @listeners.on_message(notice)
+      synth_chat = RubyLLM.chat(**@transport.to_h)
+      # Synth runs under this agent's identity but on a fresh chat with a
+      # different system prompt, so it gets a distinct +_synthesizer+
+      # suffix on the name — same +_+ separator the sub-agent generator
+      # uses, so main becomes +"synthesizer"+ and a sub-agent
+      # +"sub_agent 0"+ becomes +"sub_agent 0_synthesizer"+. Any
+      # +TokenLog+ in the list tags the synth's prompt under that bracket
+      # so it's obvious from the log which turns were the rescue rather
+      # than the original loop.
+      synth_name = @name.empty? ? 'synthesizer' : "#{@name}_synthesizer"
+      @synth_answer = Synthesizer.run(
+        chat: synth_chat,
+        parent_messages: @chat.messages,
+        user_message: user_message,
+        listeners: @listeners.for_sub_agent(max_steps: 1, name: synth_name)
+      )
+      nil
+    end
+    # Adds a +sub_agent+ tool that lets this agent spawn sub-agents which
+    # share the parent's model, system prompt, and current tool set (minus
+    # the sub-agent tool itself, so recursion is impossible).
+    #
+    # {Tool::SubAgent} snapshots +@tools+ during construction; we append
+    # the new sub-agent tool to +@tools+ only after that, so the
+    # sub-agent's tool list never contains itself.
+    #
+    # Each sub-agent run gets a derived {ListenerList} via
+    # {ListenerList#for_sub_agent} — listeners that define a sub-agent
+    # variant return a fresh instance (e.g. +StepLimit+ at the new cap,
+    # +Terminal+ with sub-agent padding, +TokenLog+ zeroed); listeners
+    # without the hook (+InMemoryMessageList+, ...) are shared by reference so
+    # the sub-agent's events render and capture continuously with the
+    # parent's.
+    #
+    # @param max_steps [Integer] step budget for each sub-agent run,
+    #   passed through to {Tool::SubAgent#initialize}
+    # @raise [RuntimeError] if a {Tool::SubAgent} is already registered
+    #   on this agent — calling twice would advertise two identically
+    #   named tools to ruby_llm and double the sub-agent's tool list
+    #   (the second snapshot would contain the first sub-agent tool).
+    # @return [void]
+    def allow_sub_agent(max_steps: 10)
+      raise "Tool::SubAgent already registered on this agent; allow_sub_agent may only be called once" \
+        if @tools.any?(Tool::SubAgent)
+      sub_tool = Tool::SubAgent.new(self, max_steps: max_steps)
+      @tools << sub_tool
+      @chat.with_tool(sub_tool.to_ruby_llm_tool)
+    end
+    # Short, single-line config dump suitable for a startup banner or a
+    # debug print. Delegates the listener rendering to {ListenerList#to_s}.
+    #
+    # @example
+    #   agent.to_s
+    #   # => "Agent(model=qwen3-35b, tools=4, listeners=[Terminal, StepLimit(max=20)])"
+    #
+    # @return [String]
+    def to_s
+      "Agent(model=#{model}, tools=#{@tools.size}, listeners=#{@listeners})"
+    end
+  end
+end

data/lib/pikuri/subprocess.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+require 'open3'
+require 'set'
+module Pikuri
+  # Chokepoint for *all* subprocess spawning in pikuri. Forces a new
+  # process group for each invocation, tracks pgids so descendants of
+  # the direct child (commands backgrounded with +&+) can be cleaned
+  # up at process exit, and captures combined stdout+stderr through a
+  # single pipe.
+  #
+  # == Seam discipline
+  #
+  # All subprocess spawning in +lib/+ goes through {.spawn}. Direct
+  # +Process.spawn+ / +Open3.*+ / +system+ / backticks anywhere in
+  # +lib/+ are bugs. The convention is grep-enforceable:
+  # +grep -rn 'Process\.spawn\|Open3\|system\|backtick' lib/+ should
+  # only hit this file.
+  #
+  # == Timeouts are the caller's job
+  #
+  # {.spawn} does not implement a timeout — Ruby's +Timeout.timeout+
+  # cannot kill subprocesses cleanly. Callers that need a timeout
+  # wrap their argv with coreutils' +timeout+ binary:
+  #
+  #   Pikuri::Subprocess.spawn(
+  #     'timeout', '--signal=TERM', '--kill-after=5s', '120s',
+  #     'bash', '-c', command,
+  #     chdir: workspace.cwd.to_s
+  #   )
+  #
+  # When +timeout+ and its FD-inheriting children die, the combined
+  # output pipe closes and {#wait}'s +io.read+ returns. No Ruby-side
+  # timeout machinery; the +timeout+ binary handles SIGTERM-then-
+  # SIGKILL race-free.
+  #
+  # == Backgrounded subprocesses
+  #
+  # When a shell command backgrounds work with +&+, the resulting
+  # process stays in our pgroup. {#wait} returns as soon as the
+  # direct child exits, but {.active} keeps the pgid in the tracked
+  # set as long as any process in the group is alive (probed with
+  # +kill(0, -pgid)+). On pikuri exit, {.cleanup!} sends SIGTERM to
+  # every tracked group. The model can opt out via +nohup cmd &+ or
+  # +setsid cmd &+ — both detach from our group.
+  #
+  # == State is process-global
+  #
+  # One +@active+ Set and one +at_exit+ for the whole process. A
+  # +Mutex+ guards register/prune/cleanup; v1 is single-threaded, so
+  # this is more for the +at_exit+/register race than for current
+  # callers.
+  #
+  # == Why +Pikuri::Subprocess+, not top-level
+  #
+  # First class actually under the +Pikuri::+ namespace. Domain
+  # classes (+Tool+, +Agent+, +URLCache+) are top-level as a legacy
+  # convention — they predate the namespacing decision and an
+  # eventual refactor moves them too. For now: library-level
+  # infrastructure under +Pikuri::+; domain objects flat. See
+  # +CLAUDE.md+ for the convention.
+  class Subprocess
+    # Combined output + exit status, returned from {#wait}.
+    Result = Data.define(:output, :status)
+    # Spawn +argv+ in a new process group, redirecting stderr onto
+    # stdout. Tracked for cleanup.
+    #
+    # @param argv [Array<String>] command and arguments. Caller does
+    #   any shell wrapping (e.g. +'bash', '-c', cmd+) when shell
+    #   interpretation is wanted; +argv+ is passed to +exec+
+    #   directly, so no implicit shell expansion happens here.
+    # @param chdir [String, Pathname] working directory
+    # @return [Subprocess] handle — call {#wait} to block for the
+    #   direct child to exit and read the captured output
+    def self.spawn(*argv, chdir:)
+      stdin, io, wait_thr = Open3.popen2e(*argv, chdir: chdir.to_s, pgroup: true)
+      stdin.close
+      register(wait_thr.pid)
+      new(io: io, wait_thr: wait_thr)
+    end
+    # @return [Integer] direct child's pid
+    attr_reader :pid
+    # @return [Integer] process group id. Equal to {#pid} since the
+    #   child was spawned with +pgroup: true+ (it's the group leader).
+    attr_reader :pgid
+    # @return [IO] read end of the combined stdout+stderr pipe.
+    #   Exposed for future live-streaming consumers; v1 callers go
+    #   straight to {#wait}, which drains it.
+    attr_reader :io
+    # @api private — call {.spawn}, not the constructor.
+    def initialize(io:, wait_thr:)
+      @io       = io
+      @wait_thr = wait_thr
+      @pid      = wait_thr.pid
+      @pgid     = wait_thr.pid # pgroup:true → pgid == pid
+    end
+    # Block until the direct child exits, read whatever remains on
+    # the combined-output pipe, return a {Result}. The pgid stays
+    # tracked if the group still has live members (backgrounded
+    # children); pruned if everything's gone.
+    #
+    # @return [Result]
+    def wait
+      output = @io.read
+      @io.close
+      Result.new(output: output, status: @wait_thr.value)
+    ensure
+      self.class.send(:prune, @pgid)
+    end
+    class << self
+      # Currently-tracked process groups, with dead ones pruned as a
+      # side effect. Useful for a future +/bg+ REPL command or a
+      # between-turn status line.
+      #
+      # @return [Array<Integer>]
+      def active
+        @mutex.synchronize do
+          @active.delete_if { |g| !alive?(g) }
+          @active.to_a
+        end
+      end
+      # SIGTERM every tracked process group. Used by +at_exit+
+      # (production) and +after+ blocks (specs). Best-effort —
+      # ignores errors from already-dead groups.
+      #
+      # @return [void]
+      def cleanup!
+        @mutex.synchronize do
+          @active.each { |g| Process.kill('-TERM', g) rescue nil }
+          @active.clear
+        end
+      end
+      private
+      def register(pgid)
+        @mutex.synchronize { @active << pgid }
+      end
+      def prune(pgid)
+        @mutex.synchronize { @active.delete(pgid) unless alive?(pgid) }
+      end
+      def alive?(pgid)
+        Process.kill(0, -pgid)
+        true
+      rescue Errno::ESRCH
+        false
+      end
+    end
+    @active = Set.new
+    @mutex  = Mutex.new
+  end
+end
+at_exit { Pikuri::Subprocess.cleanup! }