pikuri-core 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +67 -0
- data/lib/pikuri/agent/chat_transport.rb +41 -0
- data/lib/pikuri/agent/configurator.rb +270 -0
- data/lib/pikuri/agent/context_window_detector.rb +111 -0
- data/lib/pikuri/agent/control/cancellable.rb +128 -0
- data/lib/pikuri/agent/control/interloper.rb +167 -0
- data/lib/pikuri/agent/control/step_limit.rb +93 -0
- data/lib/pikuri/agent/control.rb +45 -0
- data/lib/pikuri/agent/event.rb +190 -0
- data/lib/pikuri/agent/extension.rb +82 -0
- data/lib/pikuri/agent/listener/in_memory_event_list.rb +34 -0
- data/lib/pikuri/agent/listener/rate_limited.rb +172 -0
- data/lib/pikuri/agent/listener/terminal.rb +264 -0
- data/lib/pikuri/agent/listener/token_log.rb +216 -0
- data/lib/pikuri/agent/listener.rb +54 -0
- data/lib/pikuri/agent/listener_list.rb +102 -0
- data/lib/pikuri/agent/synthesizer.rb +145 -0
- data/lib/pikuri/agent.rb +731 -0
- data/lib/pikuri/subprocess.rb +166 -0
- data/lib/pikuri/tool/calculator.rb +82 -0
- data/lib/pikuri/tool/fetch.rb +171 -0
- data/lib/pikuri/tool/parameters.rb +314 -0
- data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
- data/lib/pikuri/tool/scraper/html.rb +285 -0
- data/lib/pikuri/tool/scraper/pdf.rb +54 -0
- data/lib/pikuri/tool/scraper/simple.rb +183 -0
- data/lib/pikuri/tool/search/brave.rb +184 -0
- data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
- data/lib/pikuri/tool/search/engines.rb +163 -0
- data/lib/pikuri/tool/search/exa.rb +217 -0
- data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
- data/lib/pikuri/tool/search/result.rb +29 -0
- data/lib/pikuri/tool/sub_agent.rb +150 -0
- data/lib/pikuri/tool/web_scrape.rb +121 -0
- data/lib/pikuri/tool/web_search.rb +38 -0
- data/lib/pikuri/tool.rb +118 -0
- data/lib/pikuri/url_cache.rb +112 -0
- data/lib/pikuri/version.rb +10 -0
- data/lib/pikuri-core.rb +177 -0
- data/prompts/pikuri-chat.txt +15 -0
- metadata +251 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
module Listener
|
|
6
|
+
# Decorator that coalesces high-frequency streaming deltas
|
|
7
|
+
# ({Event::AssistantDelta}, {Event::ThinkingDelta}) into at most
|
|
8
|
+
# +fps+ events per second before handing them off to an inner
|
|
9
|
+
# listener. Non-delta events flow through unmodified, with any
|
|
10
|
+
# pending coalesced content either emitted ahead of them
|
|
11
|
+
# (default) or dropped silently — see
|
|
12
|
+
# +flush_pending_on_non_delta:+.
|
|
13
|
+
#
|
|
14
|
+
# Useful for renderers whose per-delta repaint cost is higher
|
|
15
|
+
# than the wire-arrival rate of provider chunks: a fast provider
|
|
16
|
+
# can emit dozens of deltas per second, and a Markdown-rendering
|
|
17
|
+
# UI doesn't need every one of them on-screen. +fps: 15+ is a
|
|
18
|
+
# reasonable starting point for a TUI.
|
|
19
|
+
#
|
|
20
|
+
# == Tick alignment
|
|
21
|
+
#
|
|
22
|
+
# The first flush is anchored to a monotonic-clock reference
|
|
23
|
+
# taken at construction, not to the arrival of the first delta.
|
|
24
|
+
# This keeps stream-startup latency from leaking into the
|
|
25
|
+
# cadence: with +fps: 1+, if the first delta arrives 0.9s after
|
|
26
|
+
# construction, it flushes immediately (the construction-time
|
|
27
|
+
# tick has already passed) and the next tick fires 0.1s later
|
|
28
|
+
# — preserving the configured rate. After a flush, the schedule
|
|
29
|
+
# advances by exactly +1/fps+ per missed tick; a long-idle
|
|
30
|
+
# stream doesn't backlog ticks against itself.
|
|
31
|
+
#
|
|
32
|
+
# == Per-stream buffering
|
|
33
|
+
#
|
|
34
|
+
# Assistant and thinking deltas are buffered independently. A
|
|
35
|
+
# tick flushes whichever buffers are non-empty as separate
|
|
36
|
+
# coalesced delta events; providers practically don't interleave
|
|
37
|
+
# the two modalities within a single tick, but the
|
|
38
|
+
# implementation doesn't rely on that.
|
|
39
|
+
#
|
|
40
|
+
# == Non-delta handling
|
|
41
|
+
#
|
|
42
|
+
# +flush_pending_on_non_delta: true+ (default) emits any pending
|
|
43
|
+
# coalesced content as a delta event ahead of the non-delta
|
|
44
|
+
# event so the inner listener sees the complete stream — the
|
|
45
|
+
# lossless choice. +false+ drops the pending buffers silently:
|
|
46
|
+
# appropriate for inner listeners that re-render the
|
|
47
|
+
# authoritative final content on a bookend ({Event::Assistant} /
|
|
48
|
+
# {Event::Thinking}), where rendering the trailing 0–66 ms of
|
|
49
|
+
# deltas would be wasted CPU before the redraw.
|
|
50
|
+
#
|
|
51
|
+
# == Threading
|
|
52
|
+
#
|
|
53
|
+
# No threads, no timers. The decorator advances its tick state
|
|
54
|
+
# only when an event arrives, so it's safe to install on any
|
|
55
|
+
# listener regardless of which thread the agent emits on, as
|
|
56
|
+
# long as delivery is sequential (which the {ListenerList}
|
|
57
|
+
# contract guarantees).
|
|
58
|
+
class RateLimited < Base
|
|
59
|
+
# @param inner [Listener::Base] the listener to forward
|
|
60
|
+
# (possibly coalesced) events to.
|
|
61
|
+
# @param fps [Integer, Float] frames-per-second cap on the
|
|
62
|
+
# coalesced delta stream. Must be positive.
|
|
63
|
+
# @param flush_pending_on_non_delta [Boolean] +true+
|
|
64
|
+
# (default) emits any pending coalesced content as a delta
|
|
65
|
+
# before forwarding the non-delta event; +false+ drops the
|
|
66
|
+
# pending buffers silently.
|
|
67
|
+
# @param clock [Proc] zero-arg returning monotonic
|
|
68
|
+
# seconds-since-some-epoch as a +Float+. Injection seam for
|
|
69
|
+
# deterministic specs; production uses
|
|
70
|
+
# +Process.clock_gettime(Process::CLOCK_MONOTONIC)+.
|
|
71
|
+
# @raise [ArgumentError] if +fps+ is not positive.
|
|
72
|
+
def initialize(inner, fps:, flush_pending_on_non_delta: true,
|
|
73
|
+
clock: -> { Process.clock_gettime(Process::CLOCK_MONOTONIC) })
|
|
74
|
+
super()
|
|
75
|
+
raise ArgumentError, "fps must be positive, got #{fps.inspect}" unless fps.positive?
|
|
76
|
+
|
|
77
|
+
@inner = inner
|
|
78
|
+
@fps = fps
|
|
79
|
+
@period = 1.0 / fps
|
|
80
|
+
@flush_on_non_delta = flush_pending_on_non_delta
|
|
81
|
+
@clock = clock
|
|
82
|
+
@next_tick_at = clock.call
|
|
83
|
+
@assistant_buffer = String.new
|
|
84
|
+
@thinking_buffer = String.new
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @param event [Agent::Event] one of the {Agent::Event}
|
|
88
|
+
# variants; see {Agent::Event} for the full list
|
|
89
|
+
# @return [void]
|
|
90
|
+
def on_event(event)
|
|
91
|
+
case event
|
|
92
|
+
when Event::AssistantDelta
|
|
93
|
+
@assistant_buffer << event.content
|
|
94
|
+
tick
|
|
95
|
+
when Event::ThinkingDelta
|
|
96
|
+
@thinking_buffer << event.content
|
|
97
|
+
tick
|
|
98
|
+
else
|
|
99
|
+
if @flush_on_non_delta
|
|
100
|
+
flush_thinking
|
|
101
|
+
flush_assistant
|
|
102
|
+
else
|
|
103
|
+
@assistant_buffer.clear
|
|
104
|
+
@thinking_buffer.clear
|
|
105
|
+
end
|
|
106
|
+
@inner.on_event(event)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Sub-agent variant: wraps the inner listener's sub-agent
|
|
111
|
+
# variant (or the inner listener itself when it doesn't define
|
|
112
|
+
# +for_sub_agent+) in a fresh +RateLimited+ with the same
|
|
113
|
+
# knobs. Returns +nil+ when the inner's +for_sub_agent+
|
|
114
|
+
# returned +nil+ — there's nothing to wrap.
|
|
115
|
+
#
|
|
116
|
+
# @param params [Hash{Symbol => Object}] forwarded to the
|
|
117
|
+
# inner listener's +for_sub_agent+, same protocol as
|
|
118
|
+
# {ListenerList#for_sub_agent}
|
|
119
|
+
# @return [RateLimited, nil]
|
|
120
|
+
def for_sub_agent(**params)
|
|
121
|
+
inner_sub = @inner.respond_to?(:for_sub_agent) ? @inner.for_sub_agent(**params) : @inner
|
|
122
|
+
return nil if inner_sub.nil?
|
|
123
|
+
|
|
124
|
+
self.class.new(inner_sub, fps: @fps,
|
|
125
|
+
flush_pending_on_non_delta: @flush_on_non_delta,
|
|
126
|
+
clock: @clock)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# @return [String] short label for {Agent#to_s}; embeds the
|
|
130
|
+
# inner listener's label so the chain reads end-to-end in
|
|
131
|
+
# {ListenerList#to_s}
|
|
132
|
+
def to_s
|
|
133
|
+
policy = @flush_on_non_delta ? 'flush' : 'drop'
|
|
134
|
+
"RateLimited(#{@fps}fps, #{policy}, #{@inner})"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private
|
|
138
|
+
|
|
139
|
+
# Advance to the next tick strictly after +now+, in one shot.
|
|
140
|
+
# Handles the case where deltas paused for several periods —
|
|
141
|
+
# a long-idle stream doesn't backlog ticks against the next
|
|
142
|
+
# stream's first delta.
|
|
143
|
+
# @return [void]
|
|
144
|
+
def tick
|
|
145
|
+
now = @clock.call
|
|
146
|
+
return if now < @next_tick_at
|
|
147
|
+
|
|
148
|
+
flush_thinking
|
|
149
|
+
flush_assistant
|
|
150
|
+
missed = ((now - @next_tick_at) / @period).floor + 1
|
|
151
|
+
@next_tick_at += missed * @period
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# @return [void]
|
|
155
|
+
def flush_assistant
|
|
156
|
+
return if @assistant_buffer.empty?
|
|
157
|
+
|
|
158
|
+
@inner.on_event(Event::AssistantDelta.new(content: @assistant_buffer.dup))
|
|
159
|
+
@assistant_buffer.clear
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# @return [void]
|
|
163
|
+
def flush_thinking
|
|
164
|
+
return if @thinking_buffer.empty?
|
|
165
|
+
|
|
166
|
+
@inner.on_event(Event::ThinkingDelta.new(content: @thinking_buffer.dup))
|
|
167
|
+
@thinking_buffer.clear
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rainbow'
|
|
4
|
+
require 'tty-markdown'
|
|
5
|
+
|
|
6
|
+
module Pikuri
|
|
7
|
+
class Agent
|
|
8
|
+
module Listener
|
|
9
|
+
# Terminal renderer for the normalized event stream: dim grey
|
|
10
|
+
# reasoning, Markdown-rendered assistant content, cyan tool-
|
|
11
|
+
# call and tool-result lines, yellow fallback notice, red
|
|
12
|
+
# cancelled notice. {Event::UserTurn} is intentionally silent
|
|
13
|
+
# (the terminal user just typed the message, so re-rendering
|
|
14
|
+
# it adds nothing); {Event::Tokens} and {Event::ContextCap}
|
|
15
|
+
# are silent too (their consumer is {TokenLog}).
|
|
16
|
+
#
|
|
17
|
+
# Optionally prepends a fixed number of leading spaces to
|
|
18
|
+
# every rendered line via the +padding:+ kwarg. Sub-agents
|
|
19
|
+
# get a fresh padded instance through {#for_sub_agent}
|
|
20
|
+
# (dispatched by {ListenerList#for_sub_agent}) so their
|
|
21
|
+
# output is visually indented under the parent's stream.
|
|
22
|
+
#
|
|
23
|
+
# == Streaming mode
|
|
24
|
+
#
|
|
25
|
+
# When constructed with +streaming: true+ (typically because
|
|
26
|
+
# the host's {Agent} was constructed with the matching flag):
|
|
27
|
+
#
|
|
28
|
+
# - {Event::ThinkingDelta} fragments print live in the same
|
|
29
|
+
# dim grey as the non-streaming {Event::Thinking}, with no
|
|
30
|
+
# trailing newline so the next fragment continues the line.
|
|
31
|
+
# - {Event::AssistantDelta} fragments print live, *raw* — no
|
|
32
|
+
# Markdown render. tty-markdown can't render half-finished
|
|
33
|
+
# Markdown (broken code blocks, half-rendered tables), so
|
|
34
|
+
# the live stream gives up formatting in exchange for
|
|
35
|
+
# liveness.
|
|
36
|
+
# - {Event::Thinking} and {Event::Assistant} bookends print
|
|
37
|
+
# a single blank line as a stream terminator, not their
|
|
38
|
+
# content (the content already landed via the deltas). The
|
|
39
|
+
# terminator gives the next event (tool call, next round,
|
|
40
|
+
# final REPL prompt) a clean line to start on.
|
|
41
|
+
#
|
|
42
|
+
# In non-streaming mode (+streaming: false+, the default),
|
|
43
|
+
# the deltas are silently ignored and the bookend events
|
|
44
|
+
# render the full text the way they always have.
|
|
45
|
+
class Terminal < Base
|
|
46
|
+
# Subsystem logger; set its level with +PIKURI_LOG_TERMINAL+
|
|
47
|
+
# or the global +PIKURI_LOG+. Used for the narrow rescue
|
|
48
|
+
# around third-party rendering (+tty-markdown+ choking on
|
|
49
|
+
# assistant output) — see the CLAUDE.md "secondary to the
|
|
50
|
+
# loop" carve-out.
|
|
51
|
+
#
|
|
52
|
+
# @return [Logger]
|
|
53
|
+
LOGGER = Pikuri.logger_for('Terminal')
|
|
54
|
+
|
|
55
|
+
# Cap, in characters, applied to tool-result content
|
|
56
|
+
# rendered to the terminal. Anything longer is truncated
|
|
57
|
+
# with a marker that reports the original byte size so the
|
|
58
|
+
# reader can tell a 200-char return apart from a 50KB HTML
|
|
59
|
+
# dump.
|
|
60
|
+
MAX_TOOL_RESULT_CHARS = 200
|
|
61
|
+
|
|
62
|
+
# Padding applied to a sub-agent's rendered stream.
|
|
63
|
+
# Absolute, not additive — sub-agent recursion is blocked
|
|
64
|
+
# (the sub-agent's tool set excludes +sub_agent+ itself),
|
|
65
|
+
# so the depth never exceeds 1 in practice and a fixed
|
|
66
|
+
# indent reads cleanly.
|
|
67
|
+
SUB_AGENT_PADDING = 2
|
|
68
|
+
|
|
69
|
+
# @param padding [Integer] non-negative number of leading
|
|
70
|
+
# spaces prepended to every rendered line. Defaults to 0;
|
|
71
|
+
# sub-agents get a fresh instance with
|
|
72
|
+
# {SUB_AGENT_PADDING} via {#for_sub_agent}.
|
|
73
|
+
# @param streaming [Boolean] render the chunk-level delta
|
|
74
|
+
# stream live. See the class header's "Streaming mode"
|
|
75
|
+
# section for the behavior swap. Defaults to +false+.
|
|
76
|
+
def initialize(padding: 0, streaming: false)
|
|
77
|
+
super()
|
|
78
|
+
@padding = padding
|
|
79
|
+
@streaming = streaming
|
|
80
|
+
@at_line_start = true
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @return [Integer] current padding width; exposed so
|
|
84
|
+
# callers can introspect it (and so tests can assert it).
|
|
85
|
+
attr_reader :padding
|
|
86
|
+
|
|
87
|
+
# @return [Boolean] +true+ when this Terminal is in
|
|
88
|
+
# streaming mode (deltas rendered live; bookends emit a
|
|
89
|
+
# terminator newline only).
|
|
90
|
+
attr_reader :streaming
|
|
91
|
+
|
|
92
|
+
# Sub-agent variant: a fresh +Terminal+ at
|
|
93
|
+
# {SUB_AGENT_PADDING}, carrying the same +streaming+ flag
|
|
94
|
+
# the parent had, so sub-agent output is visually indented
|
|
95
|
+
# under the parent's stream and the stream mode stays
|
|
96
|
+
# consistent across the agent tree. Called by
|
|
97
|
+
# {ListenerList#for_sub_agent}; ignores any params it's
|
|
98
|
+
# handed (Terminal has no caller-provided knobs).
|
|
99
|
+
#
|
|
100
|
+
# @return [Terminal]
|
|
101
|
+
def for_sub_agent(**)
|
|
102
|
+
self.class.new(padding: SUB_AGENT_PADDING, streaming: @streaming)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# @param event [Agent::Event] one of the {Agent::Event}
|
|
106
|
+
# variants; see {Agent::Event} for the full list
|
|
107
|
+
# @return [void]
|
|
108
|
+
def on_event(event)
|
|
109
|
+
case event
|
|
110
|
+
in Event::Thinking(content:)
|
|
111
|
+
if @streaming
|
|
112
|
+
terminate_stream
|
|
113
|
+
else
|
|
114
|
+
println(indent(Rainbow(content).color(85, 85, 85)))
|
|
115
|
+
end
|
|
116
|
+
in Event::Assistant(content:)
|
|
117
|
+
if @streaming
|
|
118
|
+
terminate_stream
|
|
119
|
+
else
|
|
120
|
+
println(indent(render_markdown(content)))
|
|
121
|
+
end
|
|
122
|
+
in Event::ThinkingDelta(content:)
|
|
123
|
+
stream_fragment(Rainbow(content).color(85, 85, 85)) if @streaming
|
|
124
|
+
in Event::AssistantDelta(content:)
|
|
125
|
+
stream_fragment(content) if @streaming
|
|
126
|
+
in Event::ToolCall(name:, arguments:)
|
|
127
|
+
args = arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
|
|
128
|
+
println(indent(Rainbow("→ #{name}(#{args})").cyan))
|
|
129
|
+
in Event::ToolResult(content:)
|
|
130
|
+
println(indent(Rainbow("= #{truncate_tool_result(content)}").cyan))
|
|
131
|
+
in Event::FallbackNotice(reason:)
|
|
132
|
+
println(indent(Rainbow("! #{reason}").yellow))
|
|
133
|
+
in Event::Cancelled
|
|
134
|
+
println(indent(Rainbow('! cancelled').red))
|
|
135
|
+
else
|
|
136
|
+
# UserTurn / Tokens / ContextCap silent on the terminal.
|
|
137
|
+
# In non-streaming mode the deltas fall through here
|
|
138
|
+
# too (the final Thinking / Assistant bookend already
|
|
139
|
+
# renders the full text).
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @return [String] short label for {Agent#to_s}; appends
|
|
144
|
+
# each non-default knob (+padding+, +streaming+) so the
|
|
145
|
+
# default form stays +"Terminal"+
|
|
146
|
+
def to_s
|
|
147
|
+
suffix = []
|
|
148
|
+
suffix << "padding=#{@padding}" unless @padding.zero?
|
|
149
|
+
suffix << 'streaming' if @streaming
|
|
150
|
+
suffix.empty? ? 'Terminal' : "Terminal(#{suffix.join(', ')})"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
private
|
|
154
|
+
|
|
155
|
+
# +puts+ wrapper that also resets the streaming line-state
|
|
156
|
+
# to "at line start" — every full-line print necessarily
|
|
157
|
+
# leaves the cursor at column 0, so the next fragment
|
|
158
|
+
# printed in streaming mode knows to insert padding before
|
|
159
|
+
# its first character. Used by every branch in
|
|
160
|
+
# {#on_event} except the delta branches.
|
|
161
|
+
#
|
|
162
|
+
# @param text [String]
|
|
163
|
+
# @return [void]
|
|
164
|
+
def println(text)
|
|
165
|
+
puts text
|
|
166
|
+
@at_line_start = true
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Emit a single newline as a stream terminator and reset
|
|
170
|
+
# the line-state. Called on {Event::Thinking} /
|
|
171
|
+
# {Event::Assistant} bookends in streaming mode, where the
|
|
172
|
+
# content has already been rendered via the delta stream
|
|
173
|
+
# and what's left to do is give the next event a clean
|
|
174
|
+
# line to start on.
|
|
175
|
+
#
|
|
176
|
+
# @return [void]
|
|
177
|
+
def terminate_stream
|
|
178
|
+
puts
|
|
179
|
+
@at_line_start = true
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Print one streaming fragment to stdout *without* a
|
|
183
|
+
# trailing newline (so the next fragment continues the
|
|
184
|
+
# line) and flush so the bytes actually reach the
|
|
185
|
+
# terminal. Threads padding through any mid-fragment
|
|
186
|
+
# newlines: a fragment that contains "foo\nbar" with
|
|
187
|
+
# padding 2 prints +" foo\n bar"+ when the cursor was
|
|
188
|
+
# at line start.
|
|
189
|
+
#
|
|
190
|
+
# @param text [String] the fragment to emit; +nil+ / empty
|
|
191
|
+
# short-circuits
|
|
192
|
+
# @return [void]
|
|
193
|
+
def stream_fragment(text)
|
|
194
|
+
return if text.nil? || text.empty?
|
|
195
|
+
|
|
196
|
+
if @padding.zero?
|
|
197
|
+
print(text)
|
|
198
|
+
@at_line_start = text.end_with?("\n")
|
|
199
|
+
else
|
|
200
|
+
buf = String.new
|
|
201
|
+
prefix = ' ' * @padding
|
|
202
|
+
text.each_char do |ch|
|
|
203
|
+
buf << prefix if @at_line_start
|
|
204
|
+
buf << ch
|
|
205
|
+
@at_line_start = (ch == "\n")
|
|
206
|
+
end
|
|
207
|
+
print(buf)
|
|
208
|
+
end
|
|
209
|
+
$stdout.flush
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Prepend +@padding+ spaces to every line of +text+. Splits
|
|
213
|
+
# on +each_line+ rather than a +gsub+ trick so a trailing
|
|
214
|
+
# newline in the input doesn't produce a stray padded blank
|
|
215
|
+
# line at the end — +puts+ adds the final newline if
|
|
216
|
+
# missing, same as before.
|
|
217
|
+
#
|
|
218
|
+
# @param text [String]
|
|
219
|
+
# @return [String]
|
|
220
|
+
def indent(text)
|
|
221
|
+
return text if @padding.zero?
|
|
222
|
+
|
|
223
|
+
prefix = ' ' * @padding
|
|
224
|
+
text.to_s.each_line.map { |line| prefix + line }.join
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Render assistant Markdown for the terminal, degrading to
|
|
228
|
+
# the raw string when the renderer raises. tty-markdown /
|
|
229
|
+
# strings have known bugs around ANSI inside tables (e.g.
|
|
230
|
+
# +Strings::Wrap.insert_ansi+ raising +IndexError+); we'd
|
|
231
|
+
# rather show ugly Markdown than abort an in-flight
|
|
232
|
+
# conversation.
|
|
233
|
+
#
|
|
234
|
+
# @param content [String] assistant Markdown
|
|
235
|
+
# @return [String] rendered ANSI text, or +content+
|
|
236
|
+
# unchanged on render failure
|
|
237
|
+
def render_markdown(content)
|
|
238
|
+
TTY::Markdown.parse(content)
|
|
239
|
+
rescue StandardError => e
|
|
240
|
+
LOGGER.warn("TTY::Markdown render failed (#{e.class}: #{e.message}); falling back to raw text")
|
|
241
|
+
content
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Flatten whitespace and cap to {MAX_TOOL_RESULT_CHARS}. The
|
|
245
|
+
# cap keeps multi-screen dumps (rendered HTML, PDF text)
|
|
246
|
+
# from drowning the terminal stream; the byte-count suffix
|
|
247
|
+
# on a truncated result distinguishes "tool returned
|
|
248
|
+
# exactly this" from "tool returned much more, you're
|
|
249
|
+
# seeing a slice."
|
|
250
|
+
#
|
|
251
|
+
# @param content [String] tool observation
|
|
252
|
+
# @return [String] single-line display form, possibly
|
|
253
|
+
# truncated
|
|
254
|
+
def truncate_tool_result(content)
|
|
255
|
+
original_bytes = content.to_s.bytesize
|
|
256
|
+
flattened = content.to_s.gsub(/\s+/, ' ').strip
|
|
257
|
+
return flattened if flattened.length <= MAX_TOOL_RESULT_CHARS
|
|
258
|
+
|
|
259
|
+
"#{flattened[0, MAX_TOOL_RESULT_CHARS]}… (#{original_bytes} bytes total)"
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Agent
|
|
5
|
+
module Listener
|
|
6
|
+
# Logs the conversation's context-window consumption per
|
|
7
|
+
# assistant turn via +Pikuri.logger_for('Tokens')+. Consumes
|
|
8
|
+
# {Event::Tokens} (one log line per emission) and
|
|
9
|
+
# {Event::ContextCap} (one-shot cap, picked off and cached);
|
|
10
|
+
# every other event variant is a no-op.
|
|
11
|
+
#
|
|
12
|
+
# Existence rationale: catch context-window growth before the
|
|
13
|
+
# provider raises +RubyLLM::ContextLengthExceededError+.
|
|
14
|
+
# +ctx+ is the headline number — tokens consumed by the
|
|
15
|
+
# conversation *through* this turn: this turn's prompt plus
|
|
16
|
+
# its reply, both of which the model will re-process on the
|
|
17
|
+
# next turn. The +Δ+ field shows the climb between turns;
|
|
18
|
+
# +↑+ / +↓+ are the per-turn input / output sizes that drove
|
|
19
|
+
# it (matching Claude Code's convention: +↑+ is what's sent
|
|
20
|
+
# to the model, +↓+ is what comes back).
|
|
21
|
+
#
|
|
22
|
+
# == State and scope
|
|
23
|
+
#
|
|
24
|
+
# The latest snapshot lives on the instance, so one
|
|
25
|
+
# +TokenLog+ is per-conversation. Sub-agents (with their own
|
|
26
|
+
# +RubyLLM::Chat+) get a fresh instance via {#for_sub_agent}
|
|
27
|
+
# (dispatched by {ListenerList#for_sub_agent}) so their
|
|
28
|
+
# counts log against their own chat. The synthesizer rescue
|
|
29
|
+
# gets a derived list via the same hook because its synth
|
|
30
|
+
# chat is also fresh.
|
|
31
|
+
#
|
|
32
|
+
# == Log line shape
|
|
33
|
+
#
|
|
34
|
+
# Without a cap (no {Event::ContextCap} carrying a value):
|
|
35
|
+
#
|
|
36
|
+
# msg #1: ctx=6.8k Δ+6.8k ↑6.8k ↓0.0k
|
|
37
|
+
# msg #2: ctx=8.4k Δ+1.6k ↑1.0k ↓0.6k
|
|
38
|
+
#
|
|
39
|
+
# With a cap of e.g. 32k (an {Event::ContextCap} with
|
|
40
|
+
# +cap: 32_768+ was emitted at {Agent#initialize}):
|
|
41
|
+
#
|
|
42
|
+
# msg #1: ctx=6.8k/32.0k Δ+6.8k ↑6.8k ↓0.0k
|
|
43
|
+
#
|
|
44
|
+
# When the owning {Agent} has a non-empty {Agent#name} (i.e.
|
|
45
|
+
# a sub-agent), the line is prefixed with +[name] +:
|
|
46
|
+
#
|
|
47
|
+
# [sub_agent 0] msg #1: ctx=4.2k Δ+4.2k ↑4.2k ↓0.0k
|
|
48
|
+
#
|
|
49
|
+
# +ctx+ is the snapshot
|
|
50
|
+
# (+input + cached + cache_creation + output+; see
|
|
51
|
+
# {Event::Tokens}), optionally suffixed with +/<cap>+ when
|
|
52
|
+
# {#context_window_cap} is set so the operator can see how
|
|
53
|
+
# close the conversation is to the limit. Including +output+
|
|
54
|
+
# makes this turn's reply visible immediately — leaving it
|
|
55
|
+
# out would hide a long reply in the +ctx=+ headline until
|
|
56
|
+
# the next turn pulled it in as cached prompt. +Δ+ is
|
|
57
|
+
# signed: +Δ+X+ for growth, +Δ-X+ for shrinkage (legitimate
|
|
58
|
+
# if ruby_llm ever prunes between turns). On the first
|
|
59
|
+
# message the baseline is implicitly zero, so +Δ+ equals
|
|
60
|
+
# +ctx+. Sizes are scaled by 1024 and shown with one
|
|
61
|
+
# decimal + a +k+ suffix.
|
|
62
|
+
class TokenLog < Base
|
|
63
|
+
# Subsystem logger; the per-turn context-window line is
|
|
64
|
+
# emitted at +INFO+ level. Set its level with
|
|
65
|
+
# +PIKURI_LOG_TOKENS+ or the global +PIKURI_LOG+.
|
|
66
|
+
#
|
|
67
|
+
# @return [Logger]
|
|
68
|
+
LOGGER = Pikuri.logger_for('Tokens')
|
|
69
|
+
|
|
70
|
+
# Tokens consumed by the conversation through the most
|
|
71
|
+
# recent turn — this turn's prompt plus its reply, which
|
|
72
|
+
# together make up the bulk of what the next turn will
|
|
73
|
+
# re-process. Equals +input + cached + cache_creation +
|
|
74
|
+
# output+ from the latest {Event::Tokens}. Zero until the
|
|
75
|
+
# first {Event::Tokens} arrives.
|
|
76
|
+
#
|
|
77
|
+
# @return [Integer]
|
|
78
|
+
attr_reader :context_window_size
|
|
79
|
+
|
|
80
|
+
# Model's context-window cap, or +nil+ if no source could
|
|
81
|
+
# supply one (see {Agent::ContextWindowDetector}). When
|
|
82
|
+
# set, the +ctx=+ field renders as +ctx=<used>/<cap>+
|
|
83
|
+
# instead of just +ctx=<used>+. {Agent#initialize} emits a
|
|
84
|
+
# one-shot {Event::ContextCap} at construction; this
|
|
85
|
+
# listener picks the value off it and caches it here. An
|
|
86
|
+
# instance constructed bare (e.g. in tests, or by
|
|
87
|
+
# {#for_sub_agent}) defaults to +nil+.
|
|
88
|
+
#
|
|
89
|
+
# @return [Integer, nil]
|
|
90
|
+
attr_accessor :context_window_cap
|
|
91
|
+
|
|
92
|
+
# @return [String] owning agent's identifier. Empty by
|
|
93
|
+
# default (main agent); set by {#for_sub_agent} from the
|
|
94
|
+
# sub-agent's generated name so the log lines can be
|
|
95
|
+
# prefixed with +[<name>] +. Read-only — for a
|
|
96
|
+
# sub-agent's listener you get a fresh instance via
|
|
97
|
+
# {#for_sub_agent}.
|
|
98
|
+
attr_reader :name
|
|
99
|
+
|
|
100
|
+
# The most recent log line, in the exact format written to
|
|
101
|
+
# {LOGGER} (including any +[<name>] + prefix). Empty until
|
|
102
|
+
# the first {Event::Tokens} has been processed. Hosts that
|
|
103
|
+
# want to surface the current context-window snapshot in
|
|
104
|
+
# their own UI (e.g. a TUI status footer) read this
|
|
105
|
+
# instead of re-implementing the formatting.
|
|
106
|
+
#
|
|
107
|
+
# Thread safety: a single instance-variable read of a
|
|
108
|
+
# +String+ — safe to read from any thread; readers may
|
|
109
|
+
# briefly see the previous turn's line during an in-flight
|
|
110
|
+
# {#on_event} call, which is acceptable for a status
|
|
111
|
+
# display.
|
|
112
|
+
#
|
|
113
|
+
# @return [String]
|
|
114
|
+
attr_reader :status_line
|
|
115
|
+
|
|
116
|
+
# @param name [String] agent identifier prepended to each
|
|
117
|
+
# log line as +[<name>] + when non-empty. Defaults to
|
|
118
|
+
# +""+ for the main agent.
|
|
119
|
+
def initialize(name: '')
|
|
120
|
+
super()
|
|
121
|
+
@name = name
|
|
122
|
+
@msg = 0
|
|
123
|
+
@context_window_size = 0
|
|
124
|
+
@context_window_cap = nil
|
|
125
|
+
@status_line = ''
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Sub-agent variant: a fresh +TokenLog+ with a zeroed
|
|
129
|
+
# snapshot so the sub-agent's context-window readings
|
|
130
|
+
# track its own +RubyLLM::Chat+ rather than continuing the
|
|
131
|
+
# parent's. Picks the sub-agent's +name:+ out of the
|
|
132
|
+
# forwarded params so its log lines carry the +[<name>] +
|
|
133
|
+
# prefix; defaults to +""+ when absent. The cap is left
|
|
134
|
+
# +nil+ here; the sub-agent's {Agent#initialize} emits a
|
|
135
|
+
# fresh {Event::ContextCap} immediately after construction
|
|
136
|
+
# and this listener picks it up off the stream.
|
|
137
|
+
#
|
|
138
|
+
# @param name [String] sub-agent's identifier
|
|
139
|
+
# @return [TokenLog]
|
|
140
|
+
def for_sub_agent(name: '', **)
|
|
141
|
+
self.class.new(name: name)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# @param event [Agent::Event]
|
|
145
|
+
# @return [void]
|
|
146
|
+
def on_event(event)
|
|
147
|
+
case event
|
|
148
|
+
in Event::Tokens => tokens
|
|
149
|
+
log_tokens(tokens)
|
|
150
|
+
in Event::ContextCap(cap:)
|
|
151
|
+
@context_window_cap = cap
|
|
152
|
+
else
|
|
153
|
+
# Every other variant: no-op.
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# @return [String] short label for {Agent#to_s}, including
|
|
158
|
+
# the current context-window size (with cap suffix when
|
|
159
|
+
# set)
|
|
160
|
+
def to_s
|
|
161
|
+
"TokenLog(ctx=#{format_ctx})"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
# Update the snapshot and write one +INFO+ line to the
|
|
167
|
+
# subsystem logger.
|
|
168
|
+
#
|
|
169
|
+
# @param tokens [Event::Tokens]
|
|
170
|
+
# @return [void]
|
|
171
|
+
def log_tokens(tokens)
|
|
172
|
+
@msg += 1
|
|
173
|
+
input_now = tokens.input.to_i
|
|
174
|
+
cached_now = tokens.cached.to_i
|
|
175
|
+
cache_creation_now = tokens.cache_creation.to_i
|
|
176
|
+
|
|
177
|
+
prev_ctx = @context_window_size
|
|
178
|
+
@context_window_size = input_now + cached_now + cache_creation_now + tokens.output.to_i
|
|
179
|
+
delta = @context_window_size - prev_ctx
|
|
180
|
+
|
|
181
|
+
@status_line = format_line(input_now, tokens.output.to_i, delta)
|
|
182
|
+
LOGGER.info(@status_line)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def format_line(input, output, delta)
|
|
186
|
+
sign = delta.negative? ? '-' : '+'
|
|
187
|
+
prefix = @name.empty? ? '' : "[#{@name}] "
|
|
188
|
+
"#{prefix}msg ##{@msg}: ctx=#{format_ctx} Δ#{sign}#{format_k(delta.abs)} ↑#{format_k(input)} ↓#{format_k(output)}"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# +<used>+ when no cap is set, +<used>/<cap>+ when one is.
|
|
192
|
+
# Shared between {#format_line} and {#to_s} so the headline
|
|
193
|
+
# reads the same in the log stream and in {Agent#to_s}
|
|
194
|
+
# banners.
|
|
195
|
+
def format_ctx
|
|
196
|
+
base = format_k(@context_window_size)
|
|
197
|
+
return base if @context_window_cap.nil?
|
|
198
|
+
|
|
199
|
+
"#{base}/#{format_k(@context_window_cap)}"
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Format a token count as a 1024-scaled +k+-suffixed
|
|
203
|
+
# string. +nil+ → +0.0k+; +0+ → +0.0k+; 12_453 → +12.2k+.
|
|
204
|
+
# Uniform format keeps lines easy to scan at the cost of
|
|
205
|
+
# looking odd for very small per-turn outputs (+↓0.0k+ on
|
|
206
|
+
# tool-call acks).
|
|
207
|
+
#
|
|
208
|
+
# @param n [Integer, nil]
|
|
209
|
+
# @return [String]
|
|
210
|
+
def format_k(n)
|
|
211
|
+
format('%.1fk', n.to_i / 1024.0)
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|