RubyGems - pikuri-core - Versions diffs - 0.0.3 - Mend

pikuri-core 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/README.md +67 -0
data/lib/pikuri/agent/chat_transport.rb +41 -0
data/lib/pikuri/agent/configurator.rb +270 -0
data/lib/pikuri/agent/context_window_detector.rb +111 -0
data/lib/pikuri/agent/control/cancellable.rb +128 -0
data/lib/pikuri/agent/control/interloper.rb +167 -0
data/lib/pikuri/agent/control/step_limit.rb +93 -0
data/lib/pikuri/agent/control.rb +45 -0
data/lib/pikuri/agent/event.rb +190 -0
data/lib/pikuri/agent/extension.rb +82 -0
data/lib/pikuri/agent/listener/in_memory_event_list.rb +34 -0
data/lib/pikuri/agent/listener/rate_limited.rb +172 -0
data/lib/pikuri/agent/listener/terminal.rb +264 -0
data/lib/pikuri/agent/listener/token_log.rb +216 -0
data/lib/pikuri/agent/listener.rb +54 -0
data/lib/pikuri/agent/listener_list.rb +102 -0
data/lib/pikuri/agent/synthesizer.rb +145 -0
data/lib/pikuri/agent.rb +731 -0
data/lib/pikuri/subprocess.rb +166 -0
data/lib/pikuri/tool/calculator.rb +82 -0
data/lib/pikuri/tool/fetch.rb +171 -0
data/lib/pikuri/tool/parameters.rb +314 -0
data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
data/lib/pikuri/tool/scraper/html.rb +285 -0
data/lib/pikuri/tool/scraper/pdf.rb +54 -0
data/lib/pikuri/tool/scraper/simple.rb +183 -0
data/lib/pikuri/tool/search/brave.rb +184 -0
data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
data/lib/pikuri/tool/search/engines.rb +163 -0
data/lib/pikuri/tool/search/exa.rb +217 -0
data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
data/lib/pikuri/tool/search/result.rb +29 -0
data/lib/pikuri/tool/sub_agent.rb +150 -0
data/lib/pikuri/tool/web_scrape.rb +121 -0
data/lib/pikuri/tool/web_search.rb +38 -0
data/lib/pikuri/tool.rb +118 -0
data/lib/pikuri/url_cache.rb +112 -0
data/lib/pikuri/version.rb +10 -0
data/lib/pikuri-core.rb +177 -0
data/prompts/pikuri-chat.txt +15 -0
metadata +251 -0

data/lib/pikuri/agent/listener.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module Pikuri
+  class Agent
+    # Namespace for the +Agent+'s pure event consumers — {Terminal},
+    # {InMemoryEventList}, {TokenLog}, and any host- or test-defined
+    # consumer. Each subclasses {Base} and overrides {Base#on_event}
+    # to pattern-match on the {Event} variants it cares about;
+    # everything else flows through unobserved.
+    #
+    # == What lives here, what doesn't
+    #
+    # The directory holds *pure consumers*: code whose only side
+    # effect is to react to events the +Agent+ has already emitted.
+    # No listener writes back into the stream — the +Agent+ is the
+    # only emitter — and no listener reaches into ruby_llm's chat
+    # callbacks. Both responsibilities live in {Agent}.
+    #
+    # Host-facing signal holders — step budget, cancellation flag,
+    # mid-loop user input queue — are *controls*, not listeners.
+    # They live under {Pikuri::Agent::Control} and reach {Agent}
+    # through dedicated kwargs on {Agent#initialize}; they never
+    # appear in the {ListenerList} and they never receive events.
+    module Listener
+      # Abstract base for event-stream consumers. Subclasses override
+      # {#on_event} with a +case+ on the {Event} variant; the default
+      # implementation is a no-op so a listener that cares about a
+      # single variant doesn't have to enumerate the rest.
+      #
+      # Subclasses optionally define +for_sub_agent(**params)+ to
+      # return a variant suitable for a spawned sub-agent — a fresh
+      # zeroed instance, the same instance shared by reference, or
+      # +nil+ to opt out of propagation. See
+      # {ListenerList#for_sub_agent} for the dispatch and the
+      # per-listener semantics.
+      class Base
+        # Single entry point for every event in the normalized stream.
+        # Concrete subclasses override this and dispatch on the
+        # variant (typically with a +case event in Event::X(...)+
+        # pattern). The default implementation is a no-op so a
+        # listener that only cares about a subset can match
+        # selectively and let everything else fall through.
+        #
+        # @param event [Event::UserTurn, Event::Thinking,
+        #   Event::ThinkingDelta, Event::Assistant,
+        #   Event::AssistantDelta, Event::ToolCall,
+        #   Event::ToolResult, Event::Tokens, Event::ContextCap,
+        #   Event::FallbackNotice, Event::Cancelled]
+        # @return [void]
+        def on_event(event); end
+      end
+    end
+  end
+end

data/lib/pikuri/agent/listener_list.rb ADDED Viewed

@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+module Pikuri
+  class Agent
+    # Listener-list value object that an {Agent} owns. Wraps an
+    # +Array+ of {Listener::Base} instances and fans {#emit} out to
+    # each; {Agent#initialize} stores one of these and is the sole
+    # caller of {#emit}.
+    #
+    # == What this is, what it isn't
+    #
+    # One job: fan out. The +Agent+ wires ruby_llm's callbacks
+    # itself, the context-window cap rides the event stream as a
+    # one-shot {Event::ContextCap}, and the sub-agent derivation
+    # rule lives in {#for_sub_agent}. Controls (step budget,
+    # cancellation flag, mid-loop input queue) are not listeners
+    # and do not appear in this list; their sub-agent derivation
+    # lives on each control class instead.
+    class ListenerList
+      # @param listeners [Array<Listener::Base>] listeners that
+      #   define +on_event(event)+
+      def initialize(listeners)
+        @listeners = listeners.dup
+      end
+      # Dispatch one event to every listener, in registration order.
+      # Called exclusively by {Agent} — listeners themselves never
+      # call this; the stream is one-way.
+      #
+      # @param event [Agent::Event]
+      # @return [void]
+      def emit(event)
+        @listeners.each { |l| l.on_event(event) }
+      end
+      # Iterate over the wrapped listeners in registration order. The
+      # method exists so a ListenerList can be passed directly to
+      # {Configurator#add_listeners} (used by {Tool::SubAgent} when
+      # seeding a sub-agent's Configurator from the parent's list).
+      #
+      # @yield [listener]
+      # @yieldparam listener [Listener::Base]
+      # @return [Enumerator] when no block given
+      def each(&block)
+        @listeners.each(&block)
+      end
+      # Return a new {ListenerList} in which every listener has been
+      # asked for its sub-agent variant. Each listener that defines
+      # +for_sub_agent(**params)+ receives the forwarded +params+
+      # and returns either +self+, a replacement instance, or +nil+
+      # to opt out of propagation entirely — the resulting list
+      # compacts +nil+ entries away. Listeners that don't define the
+      # method are kept by reference (structured capture and other
+      # stateful sinks continue to flow into the parent's instances).
+      #
+      # The dispatch lives on each listener so adding a new
+      # listener type with sub-agent-specific behavior doesn't
+      # change this class — see {Listener::Terminal#for_sub_agent}
+      # (fresh padded instance) and
+      # {Listener::TokenLog#for_sub_agent} (fresh, zeroed snapshot
+      # with the forwarded +name:+).
+      #
+      # +params+ is a flat hash forwarded as kwargs to every
+      # listener's hook; each listener picks the keys it cares about
+      # and ignores the rest. The only key currently consumed by
+      # bundled listeners is +name:+ (used by {Listener::TokenLog}).
+      # Calling with no params is always valid.
+      #
+      # @param params [Hash{Symbol => Object}]
+      # @return [ListenerList]
+      def for_sub_agent(**params)
+        swapped = @listeners.filter_map do |l|
+          l.respond_to?(:for_sub_agent) ? l.for_sub_agent(**params) : l
+        end
+        self.class.new(swapped)
+      end
+      # Return a new {ListenerList} containing this list's listeners
+      # plus the given extras, in order. Used by {Synthesizer} and
+      # other internal consumers to derive a list from an existing
+      # one. Returns +self+ when +extras+ is empty so the common
+      # no-op case allocates nothing.
+      #
+      # @param extras [Array<Listener::Base>] listeners to append
+      # @return [ListenerList]
+      def with(*extras)
+        return self if extras.empty?
+        self.class.new(@listeners + extras)
+      end
+      # @example
+      #   list.to_s # => "[Terminal, TokenLog(ctx=0.0k)]"
+      #
+      # @return [String]
+      def to_s
+        "[#{@listeners.map(&:to_s).join(', ')}]"
+      end
+    end
+  end
+end

data/lib/pikuri/agent/synthesizer.rb ADDED Viewed

@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+module Pikuri
+  class Agent
+    # Step-exhaustion rescue. When an +Agent+'s
+    # {Control::StepLimit} trips, +Agent#run_loop+ catches the
+    # +Exceeded+ exception and hands off to {Synthesizer.run} so
+    # the run still produces something useful — a tools-free
+    # assistant turn that answers the user's question from
+    # whatever evidence the failed agent collected before running
+    # out of budget.
+    #
+    # == Why this exists
+    #
+    # Without a rescue, a step-exhausted run just raises a stack
+    # trace past +bin/pikuri-chat+ and the user gets nothing
+    # despite the agent having gathered useful information in the
+    # first N-1 steps. The observed failure mode is the "wait,
+    # but what about X?" death-loop: the agent collects sound
+    # evidence in the first few rounds, then spends the rest of
+    # the budget second-guessing. By the time the cap trips, the
+    # answer is largely in the messages — it just needs a
+    # tools-free pass to synthesize.
+    #
+    # == Seam discipline
+    #
+    # {Synthesizer.run} does not reference +RubyLLM::*+. +Agent+
+    # constructs the synth chat itself (the one +RubyLLM.chat+
+    # call lives in +lib/agent.rb+, same as the parent chat) and
+    # passes it in. +Synthesizer+ only calls instance methods on
+    # whatever +chat+ it receives — +#with_instructions+,
+    # +#ask+, +#messages+ — and uses {Agent.wire_chat} for the
+    # event-stream wiring so the synth chat emits events with
+    # the same shape as the main chat.
+    module Synthesizer
+      # The synthesizer's system prompt. Strict and short: use
+      # the evidence, don't apologize, admit gaps when present.
+      SYSTEM_PROMPT = <<~PROMPT
+        You are given evidence another agent collected before running out of steps. Answer the user's question using only this evidence. You have no tools. If the evidence is insufficient, state plainly what's missing and what partial answer you can give. Do not apologize or comment on the previous agent.
+      PROMPT
+      # Configure +chat+ for synthesis, run one turn against it,
+      # and return the final assistant content. The chat is wired
+      # for the event stream via {Agent.wire_chat} so the synth's
+      # reasoning and answer flow through the same listener
+      # surface the parent agent uses — terminal renders them
+      # inline (padded under sub-agent), an in-memory recorder
+      # picks them up, a TokenLog tags them with the synth name.
+      #
+      # @param chat [RubyLLM::Chat] a *fresh* chat with no tools.
+      #   The caller is responsible for constructing it with the
+      #   same model/provider configuration the parent used.
+      # @param parent_messages [Array<RubyLLM::Message>] the
+      #   parent chat's full message history at the moment of
+      #   step exhaustion. Used to build the evidence transcript.
+      # @param user_message [String] the user's original question
+      #   from the parent turn that exhausted.
+      # @param listeners [Agent::ListenerList] listeners to wire
+      #   the synth chat into. Typically the parent agent's list
+      #   run through {ListenerList#for_sub_agent} with the
+      #   synth's +name:+ so any +TokenLog+ tags its lines with
+      #   the synth bracket and any +Terminal+ pads its output.
+      # @param step_limit [Control::StepLimit, nil] defensive
+      #   step budget. The synth has no tools so it should never
+      #   trip +before_tool_call+, but a buggy provider that
+      #   somehow returned a tool call would loop without one.
+      #   Pass +nil+ to skip.
+      # @param cancellable [Control::Cancellable, nil]
+      #   cancellation control. Typically the parent's instance,
+      #   shared by reference so a user cancel during synthesis
+      #   still works. Pass +nil+ to skip.
+      # @param streaming [Boolean] mirror the parent agent's
+      #   +streaming+ flag. When +true+, {Agent.streaming_block}
+      #   is passed to +chat.ask+ so the synth's reasoning and
+      #   answer flow through the listener stream as deltas in
+      #   addition to the final {Event::Thinking} / {Event::Assistant}
+      #   bookends.
+      # @return [String, nil] the synth's final assistant
+      #   content, or +nil+ if the synth somehow produced no
+      #   assistant message
+      def self.run(chat:, parent_messages:, user_message:, listeners:,
+                   step_limit: nil, cancellable: nil, streaming: false)
+        chat.with_instructions(SYSTEM_PROMPT)
+        Agent.wire_chat(chat, listeners: listeners, step_limit: step_limit, cancellable: cancellable)
+        prompt = build_prompt(parent_messages: parent_messages, user_message: user_message)
+        if streaming
+          chat.ask(prompt, &Agent.streaming_block(listeners: listeners, cancellable: cancellable))
+        else
+          chat.ask(prompt)
+        end
+        chat.messages.reverse.find { |m| m.role == :assistant }&.content
+      end
+      # Render the user's question plus an "Evidence gathered"
+      # section built from +parent_messages+ as a single prompt
+      # string. Pure function — no I/O, safe to test directly
+      # with fixture messages.
+      #
+      # @param parent_messages [Array<RubyLLM::Message>]
+      # @param user_message [String]
+      # @return [String]
+      def self.build_prompt(parent_messages:, user_message:)
+        transcript = format_evidence(parent_messages)
+        "Question: #{user_message}\n\nEvidence gathered:\n#{transcript}"
+      end
+      # Walk the parent's message history and produce a paired
+      # "Tool call:" / "Tool result:" log, preserving order. Tool
+      # calls that have no matching +:tool+ message are dropped —
+      # the call that tripped the step limit never executed, so
+      # including it would mislead the synth into citing
+      # nonexistent results. Non-empty assistant text content is
+      # preserved as a "Note:" line, since the parent may have
+      # summarized progress between tool calls.
+      #
+      # @param messages [Array<RubyLLM::Message>]
+      # @return [String]
+      def self.format_evidence(messages)
+        results_by_id = messages
+                        .select { |m| m.role == :tool }
+                        .to_h { |m| [m.tool_call_id, m.content] }
+        lines = []
+        messages.each do |msg|
+          next unless msg.role == :assistant
+          text = msg.content
+          lines << "Note: #{text}" if text.is_a?(String) && !text.empty?
+          msg.tool_calls&.each_value do |tc|
+            result = results_by_id[tc.id]
+            next unless result
+            args = tc.arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
+            lines << "Tool call: #{tc.name}(#{args})"
+            lines << "Tool result: #{result}"
+            lines << ''
+          end
+        end
+        lines.join("\n").rstrip
+      end
+      private_class_method :format_evidence
+    end
+  end
+end