pikuri-core 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +67 -0
  3. data/lib/pikuri/agent/chat_transport.rb +41 -0
  4. data/lib/pikuri/agent/configurator.rb +270 -0
  5. data/lib/pikuri/agent/context_window_detector.rb +111 -0
  6. data/lib/pikuri/agent/control/cancellable.rb +128 -0
  7. data/lib/pikuri/agent/control/interloper.rb +167 -0
  8. data/lib/pikuri/agent/control/step_limit.rb +93 -0
  9. data/lib/pikuri/agent/control.rb +45 -0
  10. data/lib/pikuri/agent/event.rb +190 -0
  11. data/lib/pikuri/agent/extension.rb +82 -0
  12. data/lib/pikuri/agent/listener/in_memory_event_list.rb +34 -0
  13. data/lib/pikuri/agent/listener/rate_limited.rb +172 -0
  14. data/lib/pikuri/agent/listener/terminal.rb +264 -0
  15. data/lib/pikuri/agent/listener/token_log.rb +216 -0
  16. data/lib/pikuri/agent/listener.rb +54 -0
  17. data/lib/pikuri/agent/listener_list.rb +102 -0
  18. data/lib/pikuri/agent/synthesizer.rb +145 -0
  19. data/lib/pikuri/agent.rb +731 -0
  20. data/lib/pikuri/subprocess.rb +166 -0
  21. data/lib/pikuri/tool/calculator.rb +82 -0
  22. data/lib/pikuri/tool/fetch.rb +171 -0
  23. data/lib/pikuri/tool/parameters.rb +314 -0
  24. data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
  25. data/lib/pikuri/tool/scraper/html.rb +285 -0
  26. data/lib/pikuri/tool/scraper/pdf.rb +54 -0
  27. data/lib/pikuri/tool/scraper/simple.rb +183 -0
  28. data/lib/pikuri/tool/search/brave.rb +184 -0
  29. data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
  30. data/lib/pikuri/tool/search/engines.rb +163 -0
  31. data/lib/pikuri/tool/search/exa.rb +217 -0
  32. data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
  33. data/lib/pikuri/tool/search/result.rb +29 -0
  34. data/lib/pikuri/tool/sub_agent.rb +150 -0
  35. data/lib/pikuri/tool/web_scrape.rb +121 -0
  36. data/lib/pikuri/tool/web_search.rb +38 -0
  37. data/lib/pikuri/tool.rb +118 -0
  38. data/lib/pikuri/url_cache.rb +112 -0
  39. data/lib/pikuri/version.rb +10 -0
  40. data/lib/pikuri-core.rb +177 -0
  41. data/prompts/pikuri-chat.txt +15 -0
  42. metadata +251 -0
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Agent
5
+ # Namespace for the +Agent+'s pure event consumers — {Terminal},
6
+ # {InMemoryEventList}, {TokenLog}, and any host- or test-defined
7
+ # consumer. Each subclasses {Base} and overrides {Base#on_event}
8
+ # to pattern-match on the {Event} variants it cares about;
9
+ # everything else flows through unobserved.
10
+ #
11
+ # == What lives here, what doesn't
12
+ #
13
+ # The directory holds *pure consumers*: code whose only side
14
+ # effect is to react to events the +Agent+ has already emitted.
15
+ # No listener writes back into the stream — the +Agent+ is the
16
+ # only emitter — and no listener reaches into ruby_llm's chat
17
+ # callbacks. Both responsibilities live in {Agent}.
18
+ #
19
+ # Host-facing signal holders — step budget, cancellation flag,
20
+ # mid-loop user input queue — are *controls*, not listeners.
21
+ # They live under {Pikuri::Agent::Control} and reach {Agent}
22
+ # through dedicated kwargs on {Agent#initialize}; they never
23
+ # appear in the {ListenerList} and they never receive events.
24
+ module Listener
25
+ # Abstract base for event-stream consumers. Subclasses override
26
+ # {#on_event} with a +case+ on the {Event} variant; the default
27
+ # implementation is a no-op so a listener that cares about a
28
+ # single variant doesn't have to enumerate the rest.
29
+ #
30
+ # Subclasses optionally define +for_sub_agent(**params)+ to
31
+ # return a variant suitable for a spawned sub-agent — a fresh
32
+ # zeroed instance, the same instance shared by reference, or
33
+ # +nil+ to opt out of propagation. See
34
+ # {ListenerList#for_sub_agent} for the dispatch and the
35
+ # per-listener semantics.
36
+ class Base
37
+ # Single entry point for every event in the normalized stream.
38
+ # Concrete subclasses override this and dispatch on the
39
+ # variant (typically with a +case event in Event::X(...)+
40
+ # pattern). The default implementation is a no-op so a
41
+ # listener that only cares about a subset can match
42
+ # selectively and let everything else fall through.
43
+ #
44
+ # @param event [Event::UserTurn, Event::Thinking,
45
+ # Event::ThinkingDelta, Event::Assistant,
46
+ # Event::AssistantDelta, Event::ToolCall,
47
+ # Event::ToolResult, Event::Tokens, Event::ContextCap,
48
+ # Event::FallbackNotice, Event::Cancelled]
49
+ # @return [void]
50
+ def on_event(event); end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Agent
5
+ # Listener-list value object that an {Agent} owns. Wraps an
6
+ # +Array+ of {Listener::Base} instances and fans {#emit} out to
7
+ # each; {Agent#initialize} stores one of these and is the sole
8
+ # caller of {#emit}.
9
+ #
10
+ # == What this is, what it isn't
11
+ #
12
+ # One job: fan out. The +Agent+ wires ruby_llm's callbacks
13
+ # itself, the context-window cap rides the event stream as a
14
+ # one-shot {Event::ContextCap}, and the sub-agent derivation
15
+ # rule lives in {#for_sub_agent}. Controls (step budget,
16
+ # cancellation flag, mid-loop input queue) are not listeners
17
+ # and do not appear in this list; their sub-agent derivation
18
+ # lives on each control class instead.
19
+ class ListenerList
20
+ # @param listeners [Array<Listener::Base>] listeners that
21
+ # define +on_event(event)+
22
+ def initialize(listeners)
23
+ @listeners = listeners.dup
24
+ end
25
+
26
+ # Dispatch one event to every listener, in registration order.
27
+ # Called exclusively by {Agent} — listeners themselves never
28
+ # call this; the stream is one-way.
29
+ #
30
+ # @param event [Agent::Event]
31
+ # @return [void]
32
+ def emit(event)
33
+ @listeners.each { |l| l.on_event(event) }
34
+ end
35
+
36
+ # Iterate over the wrapped listeners in registration order. The
37
+ # method exists so a ListenerList can be passed directly to
38
+ # {Configurator#add_listeners} (used by {Tool::SubAgent} when
39
+ # seeding a sub-agent's Configurator from the parent's list).
40
+ #
41
+ # @yield [listener]
42
+ # @yieldparam listener [Listener::Base]
43
+ # @return [Enumerator] when no block given
44
+ def each(&block)
45
+ @listeners.each(&block)
46
+ end
47
+
48
+ # Return a new {ListenerList} in which every listener has been
49
+ # asked for its sub-agent variant. Each listener that defines
50
+ # +for_sub_agent(**params)+ receives the forwarded +params+
51
+ # and returns either +self+, a replacement instance, or +nil+
52
+ # to opt out of propagation entirely — the resulting list
53
+ # compacts +nil+ entries away. Listeners that don't define the
54
+ # method are kept by reference (structured capture and other
55
+ # stateful sinks continue to flow into the parent's instances).
56
+ #
57
+ # The dispatch lives on each listener so adding a new
58
+ # listener type with sub-agent-specific behavior doesn't
59
+ # change this class — see {Listener::Terminal#for_sub_agent}
60
+ # (fresh padded instance) and
61
+ # {Listener::TokenLog#for_sub_agent} (fresh, zeroed snapshot
62
+ # with the forwarded +name:+).
63
+ #
64
+ # +params+ is a flat hash forwarded as kwargs to every
65
+ # listener's hook; each listener picks the keys it cares about
66
+ # and ignores the rest. The only key currently consumed by
67
+ # bundled listeners is +name:+ (used by {Listener::TokenLog}).
68
+ # Calling with no params is always valid.
69
+ #
70
+ # @param params [Hash{Symbol => Object}]
71
+ # @return [ListenerList]
72
+ def for_sub_agent(**params)
73
+ swapped = @listeners.filter_map do |l|
74
+ l.respond_to?(:for_sub_agent) ? l.for_sub_agent(**params) : l
75
+ end
76
+ self.class.new(swapped)
77
+ end
78
+
79
+ # Return a new {ListenerList} containing this list's listeners
80
+ # plus the given extras, in order. Used by {Synthesizer} and
81
+ # other internal consumers to derive a list from an existing
82
+ # one. Returns +self+ when +extras+ is empty so the common
83
+ # no-op case allocates nothing.
84
+ #
85
+ # @param extras [Array<Listener::Base>] listeners to append
86
+ # @return [ListenerList]
87
+ def with(*extras)
88
+ return self if extras.empty?
89
+
90
+ self.class.new(@listeners + extras)
91
+ end
92
+
93
+ # @example
94
+ # list.to_s # => "[Terminal, TokenLog(ctx=0.0k)]"
95
+ #
96
+ # @return [String]
97
+ def to_s
98
+ "[#{@listeners.map(&:to_s).join(', ')}]"
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Agent
5
+ # Step-exhaustion rescue. When an +Agent+'s
6
+ # {Control::StepLimit} trips, +Agent#run_loop+ catches the
7
+ # +Exceeded+ exception and hands off to {Synthesizer.run} so
8
+ # the run still produces something useful — a tools-free
9
+ # assistant turn that answers the user's question from
10
+ # whatever evidence the failed agent collected before running
11
+ # out of budget.
12
+ #
13
+ # == Why this exists
14
+ #
15
+ # Without a rescue, a step-exhausted run just raises a stack
16
+ # trace past +bin/pikuri-chat+ and the user gets nothing
17
+ # despite the agent having gathered useful information in the
18
+ # first N-1 steps. The observed failure mode is the "wait,
19
+ # but what about X?" death-loop: the agent collects sound
20
+ # evidence in the first few rounds, then spends the rest of
21
+ # the budget second-guessing. By the time the cap trips, the
22
+ # answer is largely in the messages — it just needs a
23
+ # tools-free pass to synthesize.
24
+ #
25
+ # == Seam discipline
26
+ #
27
+ # {Synthesizer.run} does not reference +RubyLLM::*+. +Agent+
28
+ # constructs the synth chat itself (the one +RubyLLM.chat+
29
+ # call lives in +lib/agent.rb+, same as the parent chat) and
30
+ # passes it in. +Synthesizer+ only calls instance methods on
31
+ # whatever +chat+ it receives — +#with_instructions+,
32
+ # +#ask+, +#messages+ — and uses {Agent.wire_chat} for the
33
+ # event-stream wiring so the synth chat emits events with
34
+ # the same shape as the main chat.
35
+ module Synthesizer
36
+ # The synthesizer's system prompt. Strict and short: use
37
+ # the evidence, don't apologize, admit gaps when present.
38
+ SYSTEM_PROMPT = <<~PROMPT
39
+ You are given evidence another agent collected before running out of steps. Answer the user's question using only this evidence. You have no tools. If the evidence is insufficient, state plainly what's missing and what partial answer you can give. Do not apologize or comment on the previous agent.
40
+ PROMPT
41
+
42
+ # Configure +chat+ for synthesis, run one turn against it,
43
+ # and return the final assistant content. The chat is wired
44
+ # for the event stream via {Agent.wire_chat} so the synth's
45
+ # reasoning and answer flow through the same listener
46
+ # surface the parent agent uses — terminal renders them
47
+ # inline (padded under sub-agent), an in-memory recorder
48
+ # picks them up, a TokenLog tags them with the synth name.
49
+ #
50
+ # @param chat [RubyLLM::Chat] a *fresh* chat with no tools.
51
+ # The caller is responsible for constructing it with the
52
+ # same model/provider configuration the parent used.
53
+ # @param parent_messages [Array<RubyLLM::Message>] the
54
+ # parent chat's full message history at the moment of
55
+ # step exhaustion. Used to build the evidence transcript.
56
+ # @param user_message [String] the user's original question
57
+ # from the parent turn that exhausted.
58
+ # @param listeners [Agent::ListenerList] listeners to wire
59
+ # the synth chat into. Typically the parent agent's list
60
+ # run through {ListenerList#for_sub_agent} with the
61
+ # synth's +name:+ so any +TokenLog+ tags its lines with
62
+ # the synth bracket and any +Terminal+ pads its output.
63
+ # @param step_limit [Control::StepLimit, nil] defensive
64
+ # step budget. The synth has no tools so it should never
65
+ # trip +before_tool_call+, but a buggy provider that
66
+ # somehow returned a tool call would loop without one.
67
+ # Pass +nil+ to skip.
68
+ # @param cancellable [Control::Cancellable, nil]
69
+ # cancellation control. Typically the parent's instance,
70
+ # shared by reference so a user cancel during synthesis
71
+ # still works. Pass +nil+ to skip.
72
+ # @param streaming [Boolean] mirror the parent agent's
73
+ # +streaming+ flag. When +true+, {Agent.streaming_block}
74
+ # is passed to +chat.ask+ so the synth's reasoning and
75
+ # answer flow through the listener stream as deltas in
76
+ # addition to the final {Event::Thinking} / {Event::Assistant}
77
+ # bookends.
78
+ # @return [String, nil] the synth's final assistant
79
+ # content, or +nil+ if the synth somehow produced no
80
+ # assistant message
81
+ def self.run(chat:, parent_messages:, user_message:, listeners:,
82
+ step_limit: nil, cancellable: nil, streaming: false)
83
+ chat.with_instructions(SYSTEM_PROMPT)
84
+ Agent.wire_chat(chat, listeners: listeners, step_limit: step_limit, cancellable: cancellable)
85
+ prompt = build_prompt(parent_messages: parent_messages, user_message: user_message)
86
+ if streaming
87
+ chat.ask(prompt, &Agent.streaming_block(listeners: listeners, cancellable: cancellable))
88
+ else
89
+ chat.ask(prompt)
90
+ end
91
+ chat.messages.reverse.find { |m| m.role == :assistant }&.content
92
+ end
93
+
94
+ # Render the user's question plus an "Evidence gathered"
95
+ # section built from +parent_messages+ as a single prompt
96
+ # string. Pure function — no I/O, safe to test directly
97
+ # with fixture messages.
98
+ #
99
+ # @param parent_messages [Array<RubyLLM::Message>]
100
+ # @param user_message [String]
101
+ # @return [String]
102
+ def self.build_prompt(parent_messages:, user_message:)
103
+ transcript = format_evidence(parent_messages)
104
+ "Question: #{user_message}\n\nEvidence gathered:\n#{transcript}"
105
+ end
106
+
107
+ # Walk the parent's message history and produce a paired
108
+ # "Tool call:" / "Tool result:" log, preserving order. Tool
109
+ # calls that have no matching +:tool+ message are dropped —
110
+ # the call that tripped the step limit never executed, so
111
+ # including it would mislead the synth into citing
112
+ # nonexistent results. Non-empty assistant text content is
113
+ # preserved as a "Note:" line, since the parent may have
114
+ # summarized progress between tool calls.
115
+ #
116
+ # @param messages [Array<RubyLLM::Message>]
117
+ # @return [String]
118
+ def self.format_evidence(messages)
119
+ results_by_id = messages
120
+ .select { |m| m.role == :tool }
121
+ .to_h { |m| [m.tool_call_id, m.content] }
122
+
123
+ lines = []
124
+ messages.each do |msg|
125
+ next unless msg.role == :assistant
126
+
127
+ text = msg.content
128
+ lines << "Note: #{text}" if text.is_a?(String) && !text.empty?
129
+
130
+ msg.tool_calls&.each_value do |tc|
131
+ result = results_by_id[tc.id]
132
+ next unless result
133
+
134
+ args = tc.arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
135
+ lines << "Tool call: #{tc.name}(#{args})"
136
+ lines << "Tool result: #{result}"
137
+ lines << ''
138
+ end
139
+ end
140
+ lines.join("\n").rstrip
141
+ end
142
+ private_class_method :format_evidence
143
+ end
144
+ end
145
+ end