pikuri 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +62 -0
- data/GETTING_STARTED.md +223 -0
- data/LICENSE +21 -0
- data/README.md +193 -0
- data/lib/pikuri/agent/chat_transport.rb +41 -0
- data/lib/pikuri/agent/context_window_detector.rb +101 -0
- data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
- data/lib/pikuri/agent/listener/message_listener.rb +93 -0
- data/lib/pikuri/agent/listener/step_limit.rb +97 -0
- data/lib/pikuri/agent/listener/terminal.rb +137 -0
- data/lib/pikuri/agent/listener/token_log.rb +166 -0
- data/lib/pikuri/agent/listener_list.rb +113 -0
- data/lib/pikuri/agent/message.rb +61 -0
- data/lib/pikuri/agent/synthesizer.rb +120 -0
- data/lib/pikuri/agent/tokens.rb +56 -0
- data/lib/pikuri/agent.rb +286 -0
- data/lib/pikuri/subprocess.rb +166 -0
- data/lib/pikuri/tool/bash.rb +272 -0
- data/lib/pikuri/tool/calculator.rb +82 -0
- data/lib/pikuri/tool/confirmer.rb +96 -0
- data/lib/pikuri/tool/edit.rb +196 -0
- data/lib/pikuri/tool/fetch.rb +167 -0
- data/lib/pikuri/tool/glob.rb +310 -0
- data/lib/pikuri/tool/grep.rb +338 -0
- data/lib/pikuri/tool/parameters.rb +314 -0
- data/lib/pikuri/tool/read.rb +254 -0
- data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
- data/lib/pikuri/tool/scraper/html.rb +285 -0
- data/lib/pikuri/tool/scraper/pdf.rb +54 -0
- data/lib/pikuri/tool/scraper/simple.rb +177 -0
- data/lib/pikuri/tool/search/brave.rb +184 -0
- data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
- data/lib/pikuri/tool/search/engines.rb +154 -0
- data/lib/pikuri/tool/search/exa.rb +217 -0
- data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
- data/lib/pikuri/tool/search/result.rb +29 -0
- data/lib/pikuri/tool/skill.rb +80 -0
- data/lib/pikuri/tool/skill_catalog.rb +376 -0
- data/lib/pikuri/tool/sub_agent.rb +102 -0
- data/lib/pikuri/tool/web_scrape.rb +117 -0
- data/lib/pikuri/tool/web_search.rb +38 -0
- data/lib/pikuri/tool/workspace.rb +150 -0
- data/lib/pikuri/tool/write.rb +170 -0
- data/lib/pikuri/tool.rb +118 -0
- data/lib/pikuri/url_cache.rb +106 -0
- data/lib/pikuri/version.rb +10 -0
- data/lib/pikuri.rb +165 -0
- data/prompts/coding-system-prompt.txt +28 -0
- data/prompts/pikuri-chat.txt +15 -0
- metadata +259 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
module Listener
|
|
6
|
+
# Abstract base for listeners that consume the +Agent+'s normalized
|
|
7
|
+
# event stream as {Message} value objects. Concrete subclasses
|
|
8
|
+
# override the single {#on_message} hook and pattern-match on the
|
|
9
|
+
# variant.
|
|
10
|
+
#
|
|
11
|
+
# +#attach(chat)+ wires the three +RubyLLM::Chat+ callbacks once,
|
|
12
|
+
# builds the appropriate {Message} variant from each payload, and
|
|
13
|
+
# forwards it into {#on_message}. Empty +thinking+ and +assistant+
|
|
14
|
+
# content is filtered here so subclasses never receive vacuous
|
|
15
|
+
# events.
|
|
16
|
+
#
|
|
17
|
+
# Provider-reported token usage is a separate channel: every
|
|
18
|
+
# assistant +after_message+ event (including pure tool-call turns
|
|
19
|
+
# with empty content) emits an {Agent::Tokens} through {#on_tokens}
|
|
20
|
+
# so listeners that track context-window growth see every round
|
|
21
|
+
# trip. {Message::User} is not a +Chat+ event — {Agent#run_loop}
|
|
22
|
+
# constructs it directly and calls {#on_message}.
|
|
23
|
+
class MessageListener
|
|
24
|
+
# Wire the receiver into +chat+'s callback API. After this
|
|
25
|
+
# returns, +chat+'s loop will build {Message} variants from each
|
|
26
|
+
# event and forward them into {#on_message} on this instance.
|
|
27
|
+
#
|
|
28
|
+
# @param chat [RubyLLM::Chat]
|
|
29
|
+
# @return [void]
|
|
30
|
+
def attach(chat)
|
|
31
|
+
chat.after_message { |msg| dispatch_chat_message(msg) }
|
|
32
|
+
chat.before_tool_call { |tc| on_message(Message::ToolCall.new(name: tc.name, arguments: tc.arguments)) }
|
|
33
|
+
chat.after_tool_result { |r| on_message(Message::ToolResult.new(content: r)) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Single entry point for every event in the normalized stream.
|
|
37
|
+
# Concrete subclasses override this and dispatch on the variant
|
|
38
|
+
# (typically with a +case msg in Message::X(...)+ pattern). The
|
|
39
|
+
# default implementation is a no-op so listeners that only care
|
|
40
|
+
# about a subset can pattern-match and let everything else fall
|
|
41
|
+
# through.
|
|
42
|
+
#
|
|
43
|
+
# @param message [Message::User, Message::Thinking,
|
|
44
|
+
# Message::Assistant, Message::ToolCall, Message::ToolResult,
|
|
45
|
+
# Message::FallbackNotice]
|
|
46
|
+
# @return [void]
|
|
47
|
+
def on_message(message); end
|
|
48
|
+
|
|
49
|
+
# Hook for provider-reported token usage. Fires once per
|
|
50
|
+
# assistant turn (including pure tool-call turns with empty
|
|
51
|
+
# content) from {#dispatch_chat_message}. Default is a no-op so
|
|
52
|
+
# listeners that don't care — {Terminal}, {InMemoryMessageList} —
|
|
53
|
+
# ignore it; {TokenLog} overrides this to log and accumulate.
|
|
54
|
+
#
|
|
55
|
+
# @param tokens [Agent::Tokens]
|
|
56
|
+
# @return [void]
|
|
57
|
+
def on_tokens(tokens); end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# Normalizes a +RubyLLM::Chat+ +after_message+ payload into the
|
|
62
|
+
# appropriate {Message} variants for {#on_message}, plus an
|
|
63
|
+
# {Agent::Tokens} for {#on_tokens}.
|
|
64
|
+
#
|
|
65
|
+
# +msg+ is a +RubyLLM::Message+. Beyond +role+, +content+,
|
|
66
|
+
# +thinking+, and +tokens+ used here, it also carries
|
|
67
|
+
# +msg.tool_calls+ on assistant turns that requested one and
|
|
68
|
+
# +msg.raw+ for the unparsed provider payload.
|
|
69
|
+
#
|
|
70
|
+
# @param msg [RubyLLM::Message]
|
|
71
|
+
# @return [void]
|
|
72
|
+
def dispatch_chat_message(msg)
|
|
73
|
+
return unless msg.role == :assistant
|
|
74
|
+
|
|
75
|
+
text = msg.thinking&.text
|
|
76
|
+
on_message(Message::Thinking.new(content: text)) if text && !text.empty?
|
|
77
|
+
|
|
78
|
+
content = msg.content
|
|
79
|
+
on_message(Message::Assistant.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
80
|
+
|
|
81
|
+
on_tokens(Agent::Tokens.new(
|
|
82
|
+
input: msg.input_tokens,
|
|
83
|
+
output: msg.output_tokens,
|
|
84
|
+
cached: msg.cached_tokens,
|
|
85
|
+
cache_creation: msg.cache_creation_tokens,
|
|
86
|
+
thinking: msg.thinking_tokens,
|
|
87
|
+
model_id: msg.model_id
|
|
88
|
+
))
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
module Listener
|
|
6
|
+
# Standalone listener that caps the number of tool calls per
|
|
7
|
+
# +Agent#run_loop+ invocation. Not a +MessageListener+ — its job is
|
|
8
|
+
# control flow (raising on overrun), not surfacing events.
|
|
9
|
+
#
|
|
10
|
+
# ruby_llm has no built-in step budget, so this listener counts
|
|
11
|
+
# +before_tool_call+ events on the underlying +RubyLLM::Chat+ and
|
|
12
|
+
# raises {Exceeded} once the cap is crossed. +Agent#run_loop+
|
|
13
|
+
# forwards that out of +Chat#ask+; the step-exhaustion synthesizer
|
|
14
|
+
# rescues it to salvage the run.
|
|
15
|
+
class StepLimit
|
|
16
|
+
# Raised by the +before_tool_call+ callback once tool-call count
|
|
17
|
+
# exceeds +max+. Carries the budget that was tripped so rescue
|
|
18
|
+
# clauses can include it in user-facing messages.
|
|
19
|
+
class Exceeded < StandardError
|
|
20
|
+
# @return [Integer]
|
|
21
|
+
attr_reader :max_steps
|
|
22
|
+
|
|
23
|
+
# @param max_steps [Integer]
|
|
24
|
+
def initialize(max_steps)
|
|
25
|
+
@max_steps = max_steps
|
|
26
|
+
super("Agent loop exceeded #{max_steps} steps")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @param max [Integer] hard cap on tool-call rounds; must be
|
|
31
|
+
# positive
|
|
32
|
+
# @raise [ArgumentError] if +max+ is zero or negative
|
|
33
|
+
def initialize(max:)
|
|
34
|
+
raise ArgumentError, "max must be positive, got #{max}" if max <= 0
|
|
35
|
+
|
|
36
|
+
@max = max
|
|
37
|
+
@step = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Wire the receiver into +chat+'s +before_tool_call+ callback. The
|
|
41
|
+
# callback increments the counter on every tool call and raises
|
|
42
|
+
# {Exceeded} once it crosses the configured cap.
|
|
43
|
+
#
|
|
44
|
+
# @param chat [RubyLLM::Chat]
|
|
45
|
+
# @return [void]
|
|
46
|
+
def attach(chat)
|
|
47
|
+
chat.before_tool_call do |_|
|
|
48
|
+
@step += 1
|
|
49
|
+
raise Exceeded, @max if @step > @max
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Listener protocol hook. Resets the step counter on
|
|
54
|
+
# {Agent::Message::User} — the User message marks the start of a
|
|
55
|
+
# new turn by definition, so it doubles as the lifecycle signal.
|
|
56
|
+
# Every other variant is a no-op.
|
|
57
|
+
#
|
|
58
|
+
# @param message [Agent::Message]
|
|
59
|
+
# @return [void]
|
|
60
|
+
def on_message(message)
|
|
61
|
+
case message
|
|
62
|
+
in Agent::Message::User
|
|
63
|
+
@step = 0
|
|
64
|
+
else
|
|
65
|
+
# no-op
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Sub-agent variant: a fresh +StepLimit+ at the caller-supplied
|
|
70
|
+
# +max_steps:+, or — when the key is absent from the forwarded
|
|
71
|
+
# params — at the receiver's own cap. The mutable counter and
|
|
72
|
+
# the +before_tool_call+ registration are per-chat, so the
|
|
73
|
+
# parent's instance cannot govern a sub-agent's chat; every
|
|
74
|
+
# sub-agent needs its own.
|
|
75
|
+
#
|
|
76
|
+
# The +max_steps+ default to +@max+ makes the method safe to
|
|
77
|
+
# call with no arguments, which keeps
|
|
78
|
+
# {Agent::ListenerList#for_sub_agent} usable as a bare
|
|
79
|
+
# +.for_sub_agent+ — a caller that doesn't care about the cap
|
|
80
|
+
# gets a sensible inheritance instead of an exception.
|
|
81
|
+
#
|
|
82
|
+
# @param max_steps [Integer] positive step cap for the sub-agent;
|
|
83
|
+
# defaults to the receiver's current cap
|
|
84
|
+
# @return [StepLimit]
|
|
85
|
+
# @raise [ArgumentError] if +max_steps+ is non-positive
|
|
86
|
+
def for_sub_agent(max_steps: @max, **)
|
|
87
|
+
self.class.new(max: max_steps)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# @return [String] short config dump for {Agent#to_s}
|
|
91
|
+
def to_s
|
|
92
|
+
"StepLimit(max=#{@max})"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rainbow'
|
|
4
|
+
require 'tty-markdown'
|
|
5
|
+
|
|
6
|
+
module Pikuri
|
|
7
|
+
class Agent
|
|
8
|
+
module Listener
|
|
9
|
+
# Terminal renderer for the normalized event stream: dim grey
|
|
10
|
+
# reasoning, Markdown-rendered assistant content, cyan tool-call
|
|
11
|
+
# and tool-result lines. {Message::User} is intentionally silent —
|
|
12
|
+
# the user typed it, so re-rendering it adds nothing on the CLI.
|
|
13
|
+
# This is the default listener attached by {Agent#initialize}.
|
|
14
|
+
#
|
|
15
|
+
# Optionally prepends a fixed number of leading spaces to every
|
|
16
|
+
# rendered line via the +padding:+ kwarg. Sub-agents get a fresh
|
|
17
|
+
# padded instance through this listener's {#for_sub_agent} hook
|
|
18
|
+
# (dispatched by {Agent::ListenerList#for_sub_agent}) so their
|
|
19
|
+
# output is visually indented under the parent's stream.
|
|
20
|
+
class Terminal < MessageListener
|
|
21
|
+
LOGGER = Pikuri.logger_for('Terminal')
|
|
22
|
+
|
|
23
|
+
# Cap, in characters, applied to tool-result content rendered to
|
|
24
|
+
# the terminal. Anything longer is truncated with a marker that
|
|
25
|
+
# reports the original byte size so the reader can tell a 200-
|
|
26
|
+
# char return apart from a 50KB HTML dump.
|
|
27
|
+
MAX_TOOL_RESULT_CHARS = 200
|
|
28
|
+
|
|
29
|
+
# Padding applied to a sub-agent's rendered stream. Absolute, not
|
|
30
|
+
# additive — sub-agent recursion is blocked (the sub-agent's tool
|
|
31
|
+
# set excludes +sub_agent+ itself), so the depth never exceeds 1
|
|
32
|
+
# in practice and a fixed indent reads cleanly.
|
|
33
|
+
SUB_AGENT_PADDING = 2
|
|
34
|
+
|
|
35
|
+
# @param padding [Integer] non-negative number of leading spaces
|
|
36
|
+
# prepended to every rendered line. Defaults to 0; sub-agents
|
|
37
|
+
# get a fresh instance with {SUB_AGENT_PADDING} via
|
|
38
|
+
# {#for_sub_agent}.
|
|
39
|
+
def initialize(padding: 0)
|
|
40
|
+
super()
|
|
41
|
+
@padding = padding
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# @return [Integer] current padding width; exposed so callers can
|
|
45
|
+
# introspect it (and so tests can assert it).
|
|
46
|
+
attr_reader :padding
|
|
47
|
+
|
|
48
|
+
# Sub-agent variant: a fresh +Terminal+ at {SUB_AGENT_PADDING} so
|
|
49
|
+
# sub-agent output is visually indented under the parent's
|
|
50
|
+
# stream. Called by {Agent::ListenerList#for_sub_agent}; ignores
|
|
51
|
+
# any params it's handed (Terminal has no caller-provided knobs).
|
|
52
|
+
#
|
|
53
|
+
# @return [Terminal]
|
|
54
|
+
def for_sub_agent(**)
|
|
55
|
+
self.class.new(padding: SUB_AGENT_PADDING)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @param message [Agent::Message] one of the {Agent::Message}
|
|
59
|
+
# variants; see {Agent::Message} for the full list
|
|
60
|
+
# @return [void]
|
|
61
|
+
def on_message(message)
|
|
62
|
+
case message
|
|
63
|
+
in Message::Thinking(content:)
|
|
64
|
+
puts indent(Rainbow(content).color(85, 85, 85))
|
|
65
|
+
in Message::Assistant(content:)
|
|
66
|
+
puts indent(render_markdown(content))
|
|
67
|
+
in Message::ToolCall(name:, arguments:)
|
|
68
|
+
args = arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
|
|
69
|
+
puts indent(Rainbow("→ #{name}(#{args})").cyan)
|
|
70
|
+
in Message::ToolResult(content:)
|
|
71
|
+
puts indent(Rainbow("= #{truncate_tool_result(content)}").cyan)
|
|
72
|
+
in Message::FallbackNotice(reason:)
|
|
73
|
+
puts indent(Rainbow("! #{reason}").yellow)
|
|
74
|
+
in Message::User
|
|
75
|
+
# No-op: the user just typed this; echoing it back is noise.
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# @return [String] short label for {Agent#to_s}; includes the
|
|
80
|
+
# padding suffix only when non-zero so the default form stays
|
|
81
|
+
# +"Terminal"+
|
|
82
|
+
def to_s
|
|
83
|
+
@padding.zero? ? 'Terminal' : "Terminal(padding=#{@padding})"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
# Prepend +@padding+ spaces to every line of +text+. Splits on
|
|
89
|
+
# +each_line+ rather than a +gsub+ trick so a trailing newline
|
|
90
|
+
# in the input doesn't produce a stray padded blank line at the
|
|
91
|
+
# end — +puts+ adds the final newline if missing, same as
|
|
92
|
+
# before.
|
|
93
|
+
#
|
|
94
|
+
# @param text [String]
|
|
95
|
+
# @return [String]
|
|
96
|
+
def indent(text)
|
|
97
|
+
return text if @padding.zero?
|
|
98
|
+
|
|
99
|
+
prefix = ' ' * @padding
|
|
100
|
+
text.to_s.each_line.map { |line| prefix + line }.join
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Render assistant Markdown for the terminal, degrading to the
|
|
104
|
+
# raw string when the renderer raises. tty-markdown / strings
|
|
105
|
+
# have known bugs around ANSI inside tables (e.g.
|
|
106
|
+
# +Strings::Wrap.insert_ansi+ raising +IndexError+); we'd rather
|
|
107
|
+
# show ugly Markdown than abort an in-flight conversation.
|
|
108
|
+
#
|
|
109
|
+
# @param content [String] assistant Markdown
|
|
110
|
+
# @return [String] rendered ANSI text, or +content+ unchanged on
|
|
111
|
+
# render failure
|
|
112
|
+
def render_markdown(content)
|
|
113
|
+
TTY::Markdown.parse(content)
|
|
114
|
+
rescue StandardError => e
|
|
115
|
+
LOGGER.warn("TTY::Markdown render failed (#{e.class}: #{e.message}); falling back to raw text")
|
|
116
|
+
content
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Flatten whitespace and cap to {MAX_TOOL_RESULT_CHARS}. The cap
|
|
120
|
+
# keeps multi-screen dumps (rendered HTML, PDF text) from
|
|
121
|
+
# drowning the terminal stream; the byte-count suffix on a
|
|
122
|
+
# truncated result distinguishes "tool returned exactly this"
|
|
123
|
+
# from "tool returned much more, you're seeing a slice."
|
|
124
|
+
#
|
|
125
|
+
# @param content [String] tool observation
|
|
126
|
+
# @return [String] single-line display form, possibly truncated
|
|
127
|
+
def truncate_tool_result(content)
|
|
128
|
+
original_bytes = content.to_s.bytesize
|
|
129
|
+
flattened = content.to_s.gsub(/\s+/, ' ').strip
|
|
130
|
+
return flattened if flattened.length <= MAX_TOOL_RESULT_CHARS
|
|
131
|
+
|
|
132
|
+
"#{flattened[0, MAX_TOOL_RESULT_CHARS]}… (#{original_bytes} bytes total)"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
module Listener
|
|
6
|
+
# Logs the conversation's context-window consumption per assistant
|
|
7
|
+
# turn via +Pikuri.logger_for('Tokens')+. Overrides
|
|
8
|
+
# {MessageListener#on_tokens} and emits one +INFO+ line per turn.
|
|
9
|
+
#
|
|
10
|
+
# Existence rationale: catch context-window growth before the
|
|
11
|
+
# provider raises +RubyLLM::ContextLengthExceededError+. +ctx+ is
|
|
12
|
+
# the headline number — tokens consumed by the conversation
|
|
13
|
+
# *through* this turn: this turn's prompt plus its reply, both of
|
|
14
|
+
# which the model will re-process on the next turn. The +Δ+ field
|
|
15
|
+
# shows the climb between turns; +↑+ / +↓+ are the per-turn input
|
|
16
|
+
# / output sizes that drove it (matching Claude Code's convention:
|
|
17
|
+
# +↑+ is what's sent to the model, +↓+ is what comes back).
|
|
18
|
+
#
|
|
19
|
+
# == State and scope
|
|
20
|
+
#
|
|
21
|
+
# The latest snapshot lives on the instance, so one +TokenLog+ is
|
|
22
|
+
# per-conversation. Sub-agents (with their own +RubyLLM::Chat+) get
|
|
23
|
+
# a fresh instance via {#for_sub_agent} (dispatched by
|
|
24
|
+
# {ListenerList#for_sub_agent}) so their counts log against their
|
|
25
|
+
# own chat. The synthesizer rescue keeps using the parent's
|
|
26
|
+
# instance because synth turns extend the same conversation.
|
|
27
|
+
#
|
|
28
|
+
# == Log line shape
|
|
29
|
+
#
|
|
30
|
+
# Without a cap (no detection source — see {Agent::ContextWindowDetector}):
|
|
31
|
+
#
|
|
32
|
+
# msg #1: ctx=6.8k Δ+6.8k ↑6.8k ↓0.0k
|
|
33
|
+
# msg #2: ctx=8.4k Δ+1.6k ↑1.0k ↓0.6k
|
|
34
|
+
#
|
|
35
|
+
# With a cap of e.g. 32k (set by {Agent#initialize} after detection):
|
|
36
|
+
#
|
|
37
|
+
# msg #1: ctx=6.8k/32.0k Δ+6.8k ↑6.8k ↓0.0k
|
|
38
|
+
#
|
|
39
|
+
# When the owning {Agent} has a non-empty {Agent#name} (i.e. a
|
|
40
|
+
# sub-agent), the line is prefixed with +[name] +:
|
|
41
|
+
#
|
|
42
|
+
# [sub_agent 0] msg #1: ctx=4.2k Δ+4.2k ↑4.2k ↓0.0k
|
|
43
|
+
#
|
|
44
|
+
# +ctx+ is the snapshot (+input + cached + cache_creation + output+;
|
|
45
|
+
# see {Agent::Tokens}), optionally suffixed with +/<cap>+ when
|
|
46
|
+
# {#context_window_cap} is set so the operator can see how close
|
|
47
|
+
# the conversation is to the limit. Including +output+ makes this
|
|
48
|
+
# turn's reply visible immediately — leaving it out would hide a
|
|
49
|
+
# long reply in the +ctx=+ headline until the next turn pulled it
|
|
50
|
+
# in as cached prompt. +Δ+ is signed: +Δ+X+ for growth,
|
|
51
|
+
# +Δ-X+ for shrinkage (legitimate if ruby_llm ever prunes between
|
|
52
|
+
# turns). On the first message the baseline is implicitly zero, so
|
|
53
|
+
# +Δ+ equals +ctx+. Sizes are scaled by 1024 and shown with one
|
|
54
|
+
# decimal + a +k+ suffix.
|
|
55
|
+
class TokenLog < MessageListener
|
|
56
|
+
LOGGER = Pikuri.logger_for('Tokens')
|
|
57
|
+
|
|
58
|
+
# Tokens consumed by the conversation through the most recent
|
|
59
|
+
# turn — this turn's prompt plus its reply, which together make
|
|
60
|
+
# up the bulk of what the next turn will re-process. Equals
|
|
61
|
+
# +input + cached + cache_creation + output+ from the latest
|
|
62
|
+
# {#on_tokens}. Zero until the first {#on_tokens} fires.
|
|
63
|
+
#
|
|
64
|
+
# @return [Integer]
|
|
65
|
+
attr_reader :context_window_size
|
|
66
|
+
|
|
67
|
+
# Model's context-window cap, or +nil+ if no source could supply
|
|
68
|
+
# one (see {Agent::ContextWindowDetector}). When set, the +ctx=+
|
|
69
|
+
# field renders as +ctx=<used>/<cap>+ instead of just +ctx=<used>+.
|
|
70
|
+
# {Agent#initialize} pushes the detected value here via
|
|
71
|
+
# {ListenerList#context_window_cap=} once it has run the detector,
|
|
72
|
+
# so an instance constructed bare (e.g. by callers, in tests, or
|
|
73
|
+
# by {#for_sub_agent}) defaults to +nil+.
|
|
74
|
+
#
|
|
75
|
+
# @return [Integer, nil]
|
|
76
|
+
attr_accessor :context_window_cap
|
|
77
|
+
|
|
78
|
+
# @return [String] owning agent's identifier. Empty by default
|
|
79
|
+
# (main agent); set by {#for_sub_agent} from the sub-agent's
|
|
80
|
+
# generated name so the log lines can be prefixed with
|
|
81
|
+
# +[<name>] +. Read-only — for a sub-agent's listener you get a
|
|
82
|
+
# fresh instance via {#for_sub_agent}.
|
|
83
|
+
attr_reader :name
|
|
84
|
+
|
|
85
|
+
# @param name [String] agent identifier prepended to each log
|
|
86
|
+
# line as +[<name>] + when non-empty. Defaults to +""+ for the
|
|
87
|
+
# main agent.
|
|
88
|
+
def initialize(name: '')
|
|
89
|
+
super()
|
|
90
|
+
@name = name
|
|
91
|
+
@msg = 0
|
|
92
|
+
@context_window_size = 0
|
|
93
|
+
@context_window_cap = nil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Sub-agent variant: a fresh +TokenLog+ with a zeroed snapshot so
|
|
97
|
+
# the sub-agent's context-window readings track its own
|
|
98
|
+
# +RubyLLM::Chat+ rather than continuing the parent's. Picks the
|
|
99
|
+
# sub-agent's +name:+ out of the forwarded params so its log
|
|
100
|
+
# lines carry the +[<name>] + prefix; defaults to +""+ when
|
|
101
|
+
# absent. The cap is left +nil+ here; the sub-agent's
|
|
102
|
+
# {Agent#initialize} pushes the resolved cap onto every
|
|
103
|
+
# +TokenLog+ in its list via {ListenerList#context_window_cap=}
|
|
104
|
+
# immediately after construction.
|
|
105
|
+
#
|
|
106
|
+
# @param name [String] sub-agent's identifier
|
|
107
|
+
# @return [TokenLog]
|
|
108
|
+
def for_sub_agent(name: '', **)
|
|
109
|
+
self.class.new(name: name)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Update the context-window snapshot, then log one +INFO+ line.
|
|
113
|
+
#
|
|
114
|
+
# @param tokens [Agent::Tokens]
|
|
115
|
+
# @return [void]
|
|
116
|
+
def on_tokens(tokens)
|
|
117
|
+
@msg += 1
|
|
118
|
+
input_now = tokens.input.to_i
|
|
119
|
+
cached_now = tokens.cached.to_i
|
|
120
|
+
cache_creation_now = tokens.cache_creation.to_i
|
|
121
|
+
|
|
122
|
+
prev_ctx = @context_window_size
|
|
123
|
+
@context_window_size = input_now + cached_now + cache_creation_now + tokens.output.to_i
|
|
124
|
+
delta = @context_window_size - prev_ctx
|
|
125
|
+
|
|
126
|
+
LOGGER.info(format_line(input_now, tokens.output.to_i, delta))
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# @return [String] short label for {Agent#to_s}, including the
|
|
130
|
+
# current context-window size (with cap suffix when set)
|
|
131
|
+
def to_s
|
|
132
|
+
"TokenLog(ctx=#{format_ctx})"
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
private
|
|
136
|
+
|
|
137
|
+
def format_line(input, output, delta)
|
|
138
|
+
sign = delta.negative? ? '-' : '+'
|
|
139
|
+
prefix = @name.empty? ? '' : "[#{@name}] "
|
|
140
|
+
"#{prefix}msg ##{@msg}: ctx=#{format_ctx} Δ#{sign}#{format_k(delta.abs)} ↑#{format_k(input)} ↓#{format_k(output)}"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# +<used>+ when no cap is set, +<used>/<cap>+ when one is. Shared
|
|
144
|
+
# between {#format_line} and {#to_s} so the headline reads the same
|
|
145
|
+
# in the log stream and in {Agent#to_s} banners.
|
|
146
|
+
def format_ctx
|
|
147
|
+
base = format_k(@context_window_size)
|
|
148
|
+
return base if @context_window_cap.nil?
|
|
149
|
+
|
|
150
|
+
"#{base}/#{format_k(@context_window_cap)}"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Format a token count as a 1024-scaled +k+-suffixed string.
|
|
154
|
+
# +nil+ → +0.0k+; +0+ → +0.0k+; 12_453 → +12.2k+. Uniform
|
|
155
|
+
# format keeps lines easy to scan at the cost of looking odd
|
|
156
|
+
# for very small per-turn outputs (+↓0.0k+ on tool-call acks).
|
|
157
|
+
#
|
|
158
|
+
# @param n [Integer, nil]
|
|
159
|
+
# @return [String]
|
|
160
|
+
def format_k(n)
|
|
161
|
+
format('%.1fk', n.to_i / 1024.0)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
# Listener-list value object that an {Agent} owns. Implements the same
|
|
6
|
+
# +attach(chat)+ / +on_message(msg)+ protocol as an individual
|
|
7
|
+
# listener — every call is fanned out to the underlying list — so the
|
|
8
|
+
# +Agent+ never sees a raw +Array+ and never has to express
|
|
9
|
+
# "for each listener, do X" inline.
|
|
10
|
+
#
|
|
11
|
+
# == Why a class, not an Array
|
|
12
|
+
#
|
|
13
|
+
# Two operations want one home rather than scattered helpers:
|
|
14
|
+
#
|
|
15
|
+
# 1. {#for_sub_agent} produces a derived list for a sub-agent run by
|
|
16
|
+
# forwarding to each listener's own +for_sub_agent(**params)+
|
|
17
|
+
# hook (default identity for listeners that don't define one).
|
|
18
|
+
# The dispatch lives on the listener — +Terminal+ swaps in a
|
|
19
|
+
# padded fresh instance, +TokenLog+ resets its snapshot,
|
|
20
|
+
# +StepLimit+ picks +max_steps+ out of the params hash — so this
|
|
21
|
+
# class doesn't grow a method per new listener type. The
|
|
22
|
+
# synthesizer rescue uses the same hook with +max_steps: 1+,
|
|
23
|
+
# since a step-exhausted synth is just another fresh-context run.
|
|
24
|
+
#
|
|
25
|
+
# 2. {#attach} / {#on_message} replace +each { |l| l.attach(chat) }+
|
|
26
|
+
# and +each { |l| l.on_message(msg) }+ at the call sites, which
|
|
27
|
+
# makes the seam ("a listener list is a thing the agent owns")
|
|
28
|
+
# more visible.
|
|
29
|
+
class ListenerList
|
|
30
|
+
# @param listeners [Array] listeners that respond to the duck-typed
|
|
31
|
+
# +attach(chat)+ / +on_message(msg)+ protocol
|
|
32
|
+
def initialize(listeners)
|
|
33
|
+
@listeners = listeners.dup
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Wire every listener into +chat+'s callback API. Forwarded verbatim
|
|
37
|
+
# to each listener's +#attach+ — see {Listener::MessageListener#attach}
|
|
38
|
+
# and {Listener::StepLimit#attach} for what each one registers.
|
|
39
|
+
#
|
|
40
|
+
# @param chat [RubyLLM::Chat]
|
|
41
|
+
# @return [void]
|
|
42
|
+
def attach(chat)
|
|
43
|
+
@listeners.each { |l| l.attach(chat) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Dispatch one message to every listener.
|
|
47
|
+
#
|
|
48
|
+
# @param message [Agent::Message]
|
|
49
|
+
# @return [void]
|
|
50
|
+
def on_message(message)
|
|
51
|
+
@listeners.each { |l| l.on_message(message) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Return a new {ListenerList} in which every listener has been
|
|
55
|
+
# asked for its sub-agent variant. Each listener that defines
|
|
56
|
+
# +for_sub_agent(**params)+ receives the forwarded +params+ and
|
|
57
|
+
# returns either +self+ or a replacement; listeners that don't
|
|
58
|
+
# define the method are kept by reference (output, structured
|
|
59
|
+
# capture, and anything else stateful flow continuously into the
|
|
60
|
+
# parent's instances).
|
|
61
|
+
#
|
|
62
|
+
# The dispatch lives on the listener so adding a new listener type
|
|
63
|
+
# with sub-agent-specific behavior doesn't change this class — see
|
|
64
|
+
# {Listener::Terminal#for_sub_agent} (fresh padded instance),
|
|
65
|
+
# {Listener::TokenLog#for_sub_agent} (fresh, zeroed snapshot), and
|
|
66
|
+
# {Listener::StepLimit#for_sub_agent} (fresh cap from +max_steps:+).
|
|
67
|
+
#
|
|
68
|
+
# +params+ is a flat hash forwarded as kwargs to every listener's
|
|
69
|
+
# hook; each listener picks the keys it cares about and ignores
|
|
70
|
+
# the rest (the +**+ catch-all in their signatures). Calling with
|
|
71
|
+
# no params is always valid — every listener's +for_sub_agent+
|
|
72
|
+
# treats its consumed keys as optional (e.g. +StepLimit+ falls
|
|
73
|
+
# back to its own cap when +max_steps:+ is absent).
|
|
74
|
+
#
|
|
75
|
+
# @param params [Hash{Symbol => Object}] kwargs forwarded to each
|
|
76
|
+
# listener's +for_sub_agent+. Currently +max_steps:+ is the only
|
|
77
|
+
# key any listener consumes.
|
|
78
|
+
# @return [ListenerList]
|
|
79
|
+
def for_sub_agent(**params)
|
|
80
|
+
swapped = @listeners.map do |l|
|
|
81
|
+
l.respond_to?(:for_sub_agent) ? l.for_sub_agent(**params) : l
|
|
82
|
+
end
|
|
83
|
+
self.class.new(swapped)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Set the context-window cap on every {Listener::TokenLog} in the
|
|
87
|
+
# list. Called by {Agent#initialize} once
|
|
88
|
+
# {Agent::ContextWindowDetector} has resolved a value, so the
|
|
89
|
+
# +ctx=<used>/<cap>+ form lights up across all token loggers without
|
|
90
|
+
# the caller having to know which listeners they registered.
|
|
91
|
+
#
|
|
92
|
+
# Non-+TokenLog+ listeners are left alone — they have no cap to
|
|
93
|
+
# carry.
|
|
94
|
+
#
|
|
95
|
+
# @param cap [Integer, nil] cap to apply; +nil+ is allowed (and
|
|
96
|
+
# keeps the existing +ctx=<used>+ form)
|
|
97
|
+
# @return [void]
|
|
98
|
+
def context_window_cap=(cap)
|
|
99
|
+
@listeners.each do |l|
|
|
100
|
+
l.context_window_cap = cap if l.is_a?(Listener::TokenLog)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @example
|
|
105
|
+
# list.to_s # => "[Terminal, StepLimit(max=20)]"
|
|
106
|
+
#
|
|
107
|
+
# @return [String]
|
|
108
|
+
def to_s
|
|
109
|
+
"[#{@listeners.map(&:to_s).join(', ')}]"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
# Sealed value-object hierarchy describing a single event in the
|
|
6
|
+
# +Agent+'s normalized stream. Listeners (Terminal renderer,
|
|
7
|
+
# in-memory recorder, future web sink) receive these through one
|
|
8
|
+
# +Listener::MessageListener#on_message+ entry point and dispatch on
|
|
9
|
+
# the variant's class.
|
|
10
|
+
#
|
|
11
|
+
# Each variant is a +Data.define+ with the minimal fields it needs;
|
|
12
|
+
# value equality and pattern-matching support come for free.
|
|
13
|
+
#
|
|
14
|
+
# == Where each variant comes from
|
|
15
|
+
#
|
|
16
|
+
# * {User} — synthesized by {Agent#run_loop} before forwarding the
|
|
17
|
+
# turn to +Chat#ask+. Never appears in ruby_llm's listener stream.
|
|
18
|
+
# * {Thinking} / {Assistant} — extracted from a +Chat#after_message+
|
|
19
|
+
# payload when the role is +:assistant+. Empty +thinking.text+ and
|
|
20
|
+
# empty +content+ are filtered out at the dispatch site so
|
|
21
|
+
# listeners never see vacuous events.
|
|
22
|
+
# * {ToolCall} — emitted on +Chat#before_tool_call+, carrying the
|
|
23
|
+
# tool's name and the LLM-supplied argument hash.
|
|
24
|
+
# * {ToolResult} — emitted on +Chat#after_tool_result+, carrying the
|
|
25
|
+
# observation string the tool produced.
|
|
26
|
+
#
|
|
27
|
+
# Provider-reported token usage is *not* a {Message} variant — it's
|
|
28
|
+
# metadata about an exchange, not an event in it. See {Agent::Tokens}
|
|
29
|
+
# and {Listener::MessageListener#on_tokens}.
|
|
30
|
+
module Message
|
|
31
|
+
# User's input for a turn, as passed to {Agent#run_loop}.
|
|
32
|
+
User = Data.define(:content)
|
|
33
|
+
|
|
34
|
+
# Assistant reasoning ("thinking") block, extracted from the
|
|
35
|
+
# +thinking.text+ field on a +RubyLLM::Message+ with role
|
|
36
|
+
# +:assistant+.
|
|
37
|
+
Thinking = Data.define(:content)
|
|
38
|
+
|
|
39
|
+
# Assistant Markdown content, extracted from a +RubyLLM::Message+
|
|
40
|
+
# with role +:assistant+.
|
|
41
|
+
Assistant = Data.define(:content)
|
|
42
|
+
|
|
43
|
+
# A tool invocation the LLM has requested but not yet observed.
|
|
44
|
+
# Arguments are the raw hash ruby_llm parsed from the model's
|
|
45
|
+
# +tool_calls+ JSON — no validation has run yet.
|
|
46
|
+
ToolCall = Data.define(:name, :arguments)
|
|
47
|
+
|
|
48
|
+
# The observation a tool produced, as returned by
|
|
49
|
+
# {Tool#run}. Recoverable failures arrive here as +"Error: ..."+
|
|
50
|
+
# strings (per the pikuri error convention), not as exceptions.
|
|
51
|
+
ToolResult = Data.define(:content)
|
|
52
|
+
|
|
53
|
+
# Out-of-band notice that the agent had to take a rescue path —
|
|
54
|
+
# currently emitted by {Agent#run_loop} when {Listener::StepLimit}
|
|
55
|
+
# trips and the synthesizer fallback runs. Lets listeners (Terminal,
|
|
56
|
+
# future web UI) surface the divergence to the user before the
|
|
57
|
+
# synthesizer's own assistant output flows through.
|
|
58
|
+
FallbackNotice = Data.define(:reason)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|