pikuri 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -179
- data/lib/pikuri.rb +12 -162
- metadata +45 -159
- data/CHANGELOG.md +0 -62
- data/GETTING_STARTED.md +0 -223
- data/LICENSE +0 -21
- data/lib/pikuri/agent/chat_transport.rb +0 -41
- data/lib/pikuri/agent/context_window_detector.rb +0 -101
- data/lib/pikuri/agent/listener/in_memory_message_list.rb +0 -33
- data/lib/pikuri/agent/listener/message_listener.rb +0 -93
- data/lib/pikuri/agent/listener/step_limit.rb +0 -97
- data/lib/pikuri/agent/listener/terminal.rb +0 -137
- data/lib/pikuri/agent/listener/token_log.rb +0 -166
- data/lib/pikuri/agent/listener_list.rb +0 -113
- data/lib/pikuri/agent/message.rb +0 -61
- data/lib/pikuri/agent/synthesizer.rb +0 -120
- data/lib/pikuri/agent/tokens.rb +0 -56
- data/lib/pikuri/agent.rb +0 -286
- data/lib/pikuri/subprocess.rb +0 -166
- data/lib/pikuri/tool/bash.rb +0 -272
- data/lib/pikuri/tool/calculator.rb +0 -82
- data/lib/pikuri/tool/confirmer.rb +0 -96
- data/lib/pikuri/tool/edit.rb +0 -196
- data/lib/pikuri/tool/fetch.rb +0 -167
- data/lib/pikuri/tool/glob.rb +0 -310
- data/lib/pikuri/tool/grep.rb +0 -338
- data/lib/pikuri/tool/parameters.rb +0 -314
- data/lib/pikuri/tool/read.rb +0 -254
- data/lib/pikuri/tool/scraper/fetch_error.rb +0 -16
- data/lib/pikuri/tool/scraper/html.rb +0 -285
- data/lib/pikuri/tool/scraper/pdf.rb +0 -54
- data/lib/pikuri/tool/scraper/simple.rb +0 -177
- data/lib/pikuri/tool/search/brave.rb +0 -184
- data/lib/pikuri/tool/search/duckduckgo.rb +0 -196
- data/lib/pikuri/tool/search/engines.rb +0 -154
- data/lib/pikuri/tool/search/exa.rb +0 -217
- data/lib/pikuri/tool/search/rate_limiter.rb +0 -92
- data/lib/pikuri/tool/search/result.rb +0 -29
- data/lib/pikuri/tool/skill.rb +0 -80
- data/lib/pikuri/tool/skill_catalog.rb +0 -376
- data/lib/pikuri/tool/sub_agent.rb +0 -102
- data/lib/pikuri/tool/web_scrape.rb +0 -117
- data/lib/pikuri/tool/web_search.rb +0 -38
- data/lib/pikuri/tool/workspace.rb +0 -150
- data/lib/pikuri/tool/write.rb +0 -170
- data/lib/pikuri/tool.rb +0 -118
- data/lib/pikuri/url_cache.rb +0 -106
- data/lib/pikuri/version.rb +0 -10
- data/prompts/coding-system-prompt.txt +0 -28
- data/prompts/pikuri-chat.txt +0 -15
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'faraday'
|
|
4
|
-
require 'json'
|
|
5
|
-
|
|
6
|
-
module Pikuri
|
|
7
|
-
class Agent
|
|
8
|
-
# Resolves the model's context-window cap from three sources, in order:
|
|
9
|
-
# an explicit override, the value ruby_llm reports for the model, or a
|
|
10
|
-
# llama.cpp +/props+ probe. Returns +nil+ if none of those produce a
|
|
11
|
-
# value.
|
|
12
|
-
#
|
|
13
|
-
# Used by {Agent#initialize} at construction time to feed
|
|
14
|
-
# {Listener::TokenLog} a cap it can render alongside the running
|
|
15
|
-
# context size (so the +ctx=12.2k/32.0k+ line tells the operator how
|
|
16
|
-
# close the conversation is to the limit).
|
|
17
|
-
#
|
|
18
|
-
# == Precedence
|
|
19
|
-
#
|
|
20
|
-
# 1. +override+ — the +Agent.new(context_window:)+ kwarg. Wins over
|
|
21
|
-
# everything; an explicit value is the operator's statement of
|
|
22
|
-
# truth.
|
|
23
|
-
# 2. +ruby_llm_reported+ — +RubyLLM::Model::Info#context_window+ from
|
|
24
|
-
# {Agent#chat}'s resolved model. Populated for models in ruby_llm's
|
|
25
|
-
# bundled registry (OpenAI, Anthropic, Gemini, …); +nil+ for custom
|
|
26
|
-
# local model ids that fall through to +Model::Info.default+.
|
|
27
|
-
# 3. +llama_probe_url+ — HTTP GET against llama.cpp's non-standard
|
|
28
|
-
# +/props+ endpoint. The server exposes the launched +n_ctx+ at
|
|
29
|
-
# +default_generation_settings.n_ctx+ there. Probed only when the
|
|
30
|
-
# first two are +nil+. Provider-specific to llama.cpp; the caller
|
|
31
|
-
# (typically +bin/pikuri-chat+) derives the right URL from its configured
|
|
32
|
-
# base.
|
|
33
|
-
#
|
|
34
|
-
# == Failure handling
|
|
35
|
-
#
|
|
36
|
-
# The probe is best-effort. HTTP error, timeout, non-JSON body, or a
|
|
37
|
-
# missing/invalid +n_ctx+ field all return +nil+ and log one +warn+
|
|
38
|
-
# line via +Pikuri.logger_for('ContextWindowDetector')+. This is the
|
|
39
|
-
# CLAUDE.md "secondary to the loop" carve-out — a wedged or
|
|
40
|
-
# non-llama.cpp server should not abort agent construction over a
|
|
41
|
-
# cosmetic readout.
|
|
42
|
-
class ContextWindowDetector
|
|
43
|
-
LOGGER = Pikuri.logger_for('ContextWindowDetector')
|
|
44
|
-
|
|
45
|
-
# Probe timeouts in seconds. Short on purpose: this runs synchronously
|
|
46
|
-
# during +Agent.new+ and a wedged server should not stall startup
|
|
47
|
-
# noticeably.
|
|
48
|
-
OPEN_TIMEOUT = 2
|
|
49
|
-
READ_TIMEOUT = 2
|
|
50
|
-
|
|
51
|
-
# @param override [Integer, nil] explicit cap from the caller; wins if
|
|
52
|
-
# non-+nil+
|
|
53
|
-
# @param ruby_llm_reported [Integer, nil] value off
|
|
54
|
-
# +RubyLLM::Chat#model.context_window+
|
|
55
|
-
# @param llama_probe_url [String, nil] full URL to llama.cpp +/props+;
|
|
56
|
-
# +nil+ or empty string skips the probe
|
|
57
|
-
def initialize(override:, ruby_llm_reported:, llama_probe_url:)
|
|
58
|
-
@override = override
|
|
59
|
-
@ruby_llm_reported = ruby_llm_reported
|
|
60
|
-
@llama_probe_url = llama_probe_url
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
# @return [Integer, nil] resolved cap, or +nil+ if no source produced
|
|
64
|
-
# one
|
|
65
|
-
def detect
|
|
66
|
-
return @override if @override
|
|
67
|
-
return @ruby_llm_reported if @ruby_llm_reported
|
|
68
|
-
return nil if @llama_probe_url.nil? || @llama_probe_url.empty?
|
|
69
|
-
|
|
70
|
-
probe_llama_cpp
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
private
|
|
74
|
-
|
|
75
|
-
def probe_llama_cpp
|
|
76
|
-
response = Faraday.new(
|
|
77
|
-
request: { open_timeout: OPEN_TIMEOUT, timeout: READ_TIMEOUT }
|
|
78
|
-
).get(@llama_probe_url) do |req|
|
|
79
|
-
req.headers['Accept'] = 'application/json'
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
return warn_and_nil("HTTP #{response.status} from #{@llama_probe_url}") unless response.status == 200
|
|
83
|
-
|
|
84
|
-
data = JSON.parse(response.body)
|
|
85
|
-
n_ctx = data.dig('default_generation_settings', 'n_ctx')
|
|
86
|
-
return n_ctx if n_ctx.is_a?(Integer) && n_ctx.positive?
|
|
87
|
-
|
|
88
|
-
warn_and_nil(
|
|
89
|
-
"no positive integer at default_generation_settings.n_ctx in #{@llama_probe_url} response"
|
|
90
|
-
)
|
|
91
|
-
rescue Faraday::Error, JSON::ParserError => e
|
|
92
|
-
warn_and_nil("#{e.class.name.split('::').last}: #{e.message}")
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
def warn_and_nil(reason)
|
|
96
|
-
LOGGER.warn("llama.cpp /props probe failed: #{reason}")
|
|
97
|
-
nil
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Agent
|
|
5
|
-
module Listener
|
|
6
|
-
# Recording listener that appends every {Message} the agent emits
|
|
7
|
-
# to an in-memory list. Used by specs to assert on emissions
|
|
8
|
-
# without parsing stdout, and as the rough shape a future
|
|
9
|
-
# structured consumer (web sink, telemetry pipe) would take.
|
|
10
|
-
class InMemoryMessageList < MessageListener
|
|
11
|
-
# @return [Array<Agent::Message>] every message the listener has
|
|
12
|
-
# seen, in order; never nil
|
|
13
|
-
attr_reader :events
|
|
14
|
-
|
|
15
|
-
def initialize
|
|
16
|
-
super
|
|
17
|
-
@events = []
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
# @param message [Agent::Message]
|
|
21
|
-
# @return [void]
|
|
22
|
-
def on_message(message)
|
|
23
|
-
@events << message
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# @return [String] short label for {Agent#to_s}
|
|
27
|
-
def to_s
|
|
28
|
-
'InMemoryMessageList'
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Agent
|
|
5
|
-
module Listener
|
|
6
|
-
# Abstract base for listeners that consume the +Agent+'s normalized
|
|
7
|
-
# event stream as {Message} value objects. Concrete subclasses
|
|
8
|
-
# override the single {#on_message} hook and pattern-match on the
|
|
9
|
-
# variant.
|
|
10
|
-
#
|
|
11
|
-
# +#attach(chat)+ wires the three +RubyLLM::Chat+ callbacks once,
|
|
12
|
-
# builds the appropriate {Message} variant from each payload, and
|
|
13
|
-
# forwards it into {#on_message}. Empty +thinking+ and +assistant+
|
|
14
|
-
# content is filtered here so subclasses never receive vacuous
|
|
15
|
-
# events.
|
|
16
|
-
#
|
|
17
|
-
# Provider-reported token usage is a separate channel: every
|
|
18
|
-
# assistant +after_message+ event (including pure tool-call turns
|
|
19
|
-
# with empty content) emits an {Agent::Tokens} through {#on_tokens}
|
|
20
|
-
# so listeners that track context-window growth see every round
|
|
21
|
-
# trip. {Message::User} is not a +Chat+ event — {Agent#run_loop}
|
|
22
|
-
# constructs it directly and calls {#on_message}.
|
|
23
|
-
class MessageListener
|
|
24
|
-
# Wire the receiver into +chat+'s callback API. After this
|
|
25
|
-
# returns, +chat+'s loop will build {Message} variants from each
|
|
26
|
-
# event and forward them into {#on_message} on this instance.
|
|
27
|
-
#
|
|
28
|
-
# @param chat [RubyLLM::Chat]
|
|
29
|
-
# @return [void]
|
|
30
|
-
def attach(chat)
|
|
31
|
-
chat.after_message { |msg| dispatch_chat_message(msg) }
|
|
32
|
-
chat.before_tool_call { |tc| on_message(Message::ToolCall.new(name: tc.name, arguments: tc.arguments)) }
|
|
33
|
-
chat.after_tool_result { |r| on_message(Message::ToolResult.new(content: r)) }
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Single entry point for every event in the normalized stream.
|
|
37
|
-
# Concrete subclasses override this and dispatch on the variant
|
|
38
|
-
# (typically with a +case msg in Message::X(...)+ pattern). The
|
|
39
|
-
# default implementation is a no-op so listeners that only care
|
|
40
|
-
# about a subset can pattern-match and let everything else fall
|
|
41
|
-
# through.
|
|
42
|
-
#
|
|
43
|
-
# @param message [Message::User, Message::Thinking,
|
|
44
|
-
# Message::Assistant, Message::ToolCall, Message::ToolResult,
|
|
45
|
-
# Message::FallbackNotice]
|
|
46
|
-
# @return [void]
|
|
47
|
-
def on_message(message); end
|
|
48
|
-
|
|
49
|
-
# Hook for provider-reported token usage. Fires once per
|
|
50
|
-
# assistant turn (including pure tool-call turns with empty
|
|
51
|
-
# content) from {#dispatch_chat_message}. Default is a no-op so
|
|
52
|
-
# listeners that don't care — {Terminal}, {InMemoryMessageList} —
|
|
53
|
-
# ignore it; {TokenLog} overrides this to log and accumulate.
|
|
54
|
-
#
|
|
55
|
-
# @param tokens [Agent::Tokens]
|
|
56
|
-
# @return [void]
|
|
57
|
-
def on_tokens(tokens); end
|
|
58
|
-
|
|
59
|
-
private
|
|
60
|
-
|
|
61
|
-
# Normalizes a +RubyLLM::Chat+ +after_message+ payload into the
|
|
62
|
-
# appropriate {Message} variants for {#on_message}, plus an
|
|
63
|
-
# {Agent::Tokens} for {#on_tokens}.
|
|
64
|
-
#
|
|
65
|
-
# +msg+ is a +RubyLLM::Message+. Beyond +role+, +content+,
|
|
66
|
-
# +thinking+, and +tokens+ used here, it also carries
|
|
67
|
-
# +msg.tool_calls+ on assistant turns that requested one and
|
|
68
|
-
# +msg.raw+ for the unparsed provider payload.
|
|
69
|
-
#
|
|
70
|
-
# @param msg [RubyLLM::Message]
|
|
71
|
-
# @return [void]
|
|
72
|
-
def dispatch_chat_message(msg)
|
|
73
|
-
return unless msg.role == :assistant
|
|
74
|
-
|
|
75
|
-
text = msg.thinking&.text
|
|
76
|
-
on_message(Message::Thinking.new(content: text)) if text && !text.empty?
|
|
77
|
-
|
|
78
|
-
content = msg.content
|
|
79
|
-
on_message(Message::Assistant.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
80
|
-
|
|
81
|
-
on_tokens(Agent::Tokens.new(
|
|
82
|
-
input: msg.input_tokens,
|
|
83
|
-
output: msg.output_tokens,
|
|
84
|
-
cached: msg.cached_tokens,
|
|
85
|
-
cache_creation: msg.cache_creation_tokens,
|
|
86
|
-
thinking: msg.thinking_tokens,
|
|
87
|
-
model_id: msg.model_id
|
|
88
|
-
))
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
end
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Agent
|
|
5
|
-
module Listener
|
|
6
|
-
# Standalone listener that caps the number of tool calls per
|
|
7
|
-
# +Agent#run_loop+ invocation. Not a +MessageListener+ — its job is
|
|
8
|
-
# control flow (raising on overrun), not surfacing events.
|
|
9
|
-
#
|
|
10
|
-
# ruby_llm has no built-in step budget, so this listener counts
|
|
11
|
-
# +before_tool_call+ events on the underlying +RubyLLM::Chat+ and
|
|
12
|
-
# raises {Exceeded} once the cap is crossed. +Agent#run_loop+
|
|
13
|
-
# forwards that out of +Chat#ask+; the step-exhaustion synthesizer
|
|
14
|
-
# rescues it to salvage the run.
|
|
15
|
-
class StepLimit
|
|
16
|
-
# Raised by the +before_tool_call+ callback once tool-call count
|
|
17
|
-
# exceeds +max+. Carries the budget that was tripped so rescue
|
|
18
|
-
# clauses can include it in user-facing messages.
|
|
19
|
-
class Exceeded < StandardError
|
|
20
|
-
# @return [Integer]
|
|
21
|
-
attr_reader :max_steps
|
|
22
|
-
|
|
23
|
-
# @param max_steps [Integer]
|
|
24
|
-
def initialize(max_steps)
|
|
25
|
-
@max_steps = max_steps
|
|
26
|
-
super("Agent loop exceeded #{max_steps} steps")
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# @param max [Integer] hard cap on tool-call rounds; must be
|
|
31
|
-
# positive
|
|
32
|
-
# @raise [ArgumentError] if +max+ is zero or negative
|
|
33
|
-
def initialize(max:)
|
|
34
|
-
raise ArgumentError, "max must be positive, got #{max}" if max <= 0
|
|
35
|
-
|
|
36
|
-
@max = max
|
|
37
|
-
@step = 0
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Wire the receiver into +chat+'s +before_tool_call+ callback. The
|
|
41
|
-
# callback increments the counter on every tool call and raises
|
|
42
|
-
# {Exceeded} once it crosses the configured cap.
|
|
43
|
-
#
|
|
44
|
-
# @param chat [RubyLLM::Chat]
|
|
45
|
-
# @return [void]
|
|
46
|
-
def attach(chat)
|
|
47
|
-
chat.before_tool_call do |_|
|
|
48
|
-
@step += 1
|
|
49
|
-
raise Exceeded, @max if @step > @max
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Listener protocol hook. Resets the step counter on
|
|
54
|
-
# {Agent::Message::User} — the User message marks the start of a
|
|
55
|
-
# new turn by definition, so it doubles as the lifecycle signal.
|
|
56
|
-
# Every other variant is a no-op.
|
|
57
|
-
#
|
|
58
|
-
# @param message [Agent::Message]
|
|
59
|
-
# @return [void]
|
|
60
|
-
def on_message(message)
|
|
61
|
-
case message
|
|
62
|
-
in Agent::Message::User
|
|
63
|
-
@step = 0
|
|
64
|
-
else
|
|
65
|
-
# no-op
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Sub-agent variant: a fresh +StepLimit+ at the caller-supplied
|
|
70
|
-
# +max_steps:+, or — when the key is absent from the forwarded
|
|
71
|
-
# params — at the receiver's own cap. The mutable counter and
|
|
72
|
-
# the +before_tool_call+ registration are per-chat, so the
|
|
73
|
-
# parent's instance cannot govern a sub-agent's chat; every
|
|
74
|
-
# sub-agent needs its own.
|
|
75
|
-
#
|
|
76
|
-
# The +max_steps+ default to +@max+ makes the method safe to
|
|
77
|
-
# call with no arguments, which keeps
|
|
78
|
-
# {Agent::ListenerList#for_sub_agent} usable as a bare
|
|
79
|
-
# +.for_sub_agent+ — a caller that doesn't care about the cap
|
|
80
|
-
# gets a sensible inheritance instead of an exception.
|
|
81
|
-
#
|
|
82
|
-
# @param max_steps [Integer] positive step cap for the sub-agent;
|
|
83
|
-
# defaults to the receiver's current cap
|
|
84
|
-
# @return [StepLimit]
|
|
85
|
-
# @raise [ArgumentError] if +max_steps+ is non-positive
|
|
86
|
-
def for_sub_agent(max_steps: @max, **)
|
|
87
|
-
self.class.new(max: max_steps)
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# @return [String] short config dump for {Agent#to_s}
|
|
91
|
-
def to_s
|
|
92
|
-
"StepLimit(max=#{@max})"
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
end
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rainbow'
|
|
4
|
-
require 'tty-markdown'
|
|
5
|
-
|
|
6
|
-
module Pikuri
|
|
7
|
-
class Agent
|
|
8
|
-
module Listener
|
|
9
|
-
# Terminal renderer for the normalized event stream: dim grey
|
|
10
|
-
# reasoning, Markdown-rendered assistant content, cyan tool-call
|
|
11
|
-
# and tool-result lines. {Message::User} is intentionally silent —
|
|
12
|
-
# the user typed it, so re-rendering it adds nothing on the CLI.
|
|
13
|
-
# This is the default listener attached by {Agent#initialize}.
|
|
14
|
-
#
|
|
15
|
-
# Optionally prepends a fixed number of leading spaces to every
|
|
16
|
-
# rendered line via the +padding:+ kwarg. Sub-agents get a fresh
|
|
17
|
-
# padded instance through this listener's {#for_sub_agent} hook
|
|
18
|
-
# (dispatched by {Agent::ListenerList#for_sub_agent}) so their
|
|
19
|
-
# output is visually indented under the parent's stream.
|
|
20
|
-
class Terminal < MessageListener
|
|
21
|
-
LOGGER = Pikuri.logger_for('Terminal')
|
|
22
|
-
|
|
23
|
-
# Cap, in characters, applied to tool-result content rendered to
|
|
24
|
-
# the terminal. Anything longer is truncated with a marker that
|
|
25
|
-
# reports the original byte size so the reader can tell a 200-
|
|
26
|
-
# char return apart from a 50KB HTML dump.
|
|
27
|
-
MAX_TOOL_RESULT_CHARS = 200
|
|
28
|
-
|
|
29
|
-
# Padding applied to a sub-agent's rendered stream. Absolute, not
|
|
30
|
-
# additive — sub-agent recursion is blocked (the sub-agent's tool
|
|
31
|
-
# set excludes +sub_agent+ itself), so the depth never exceeds 1
|
|
32
|
-
# in practice and a fixed indent reads cleanly.
|
|
33
|
-
SUB_AGENT_PADDING = 2
|
|
34
|
-
|
|
35
|
-
# @param padding [Integer] non-negative number of leading spaces
|
|
36
|
-
# prepended to every rendered line. Defaults to 0; sub-agents
|
|
37
|
-
# get a fresh instance with {SUB_AGENT_PADDING} via
|
|
38
|
-
# {#for_sub_agent}.
|
|
39
|
-
def initialize(padding: 0)
|
|
40
|
-
super()
|
|
41
|
-
@padding = padding
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# @return [Integer] current padding width; exposed so callers can
|
|
45
|
-
# introspect it (and so tests can assert it).
|
|
46
|
-
attr_reader :padding
|
|
47
|
-
|
|
48
|
-
# Sub-agent variant: a fresh +Terminal+ at {SUB_AGENT_PADDING} so
|
|
49
|
-
# sub-agent output is visually indented under the parent's
|
|
50
|
-
# stream. Called by {Agent::ListenerList#for_sub_agent}; ignores
|
|
51
|
-
# any params it's handed (Terminal has no caller-provided knobs).
|
|
52
|
-
#
|
|
53
|
-
# @return [Terminal]
|
|
54
|
-
def for_sub_agent(**)
|
|
55
|
-
self.class.new(padding: SUB_AGENT_PADDING)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# @param message [Agent::Message] one of the {Agent::Message}
|
|
59
|
-
# variants; see {Agent::Message} for the full list
|
|
60
|
-
# @return [void]
|
|
61
|
-
def on_message(message)
|
|
62
|
-
case message
|
|
63
|
-
in Message::Thinking(content:)
|
|
64
|
-
puts indent(Rainbow(content).color(85, 85, 85))
|
|
65
|
-
in Message::Assistant(content:)
|
|
66
|
-
puts indent(render_markdown(content))
|
|
67
|
-
in Message::ToolCall(name:, arguments:)
|
|
68
|
-
args = arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
|
|
69
|
-
puts indent(Rainbow("→ #{name}(#{args})").cyan)
|
|
70
|
-
in Message::ToolResult(content:)
|
|
71
|
-
puts indent(Rainbow("= #{truncate_tool_result(content)}").cyan)
|
|
72
|
-
in Message::FallbackNotice(reason:)
|
|
73
|
-
puts indent(Rainbow("! #{reason}").yellow)
|
|
74
|
-
in Message::User
|
|
75
|
-
# No-op: the user just typed this; echoing it back is noise.
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# @return [String] short label for {Agent#to_s}; includes the
|
|
80
|
-
# padding suffix only when non-zero so the default form stays
|
|
81
|
-
# +"Terminal"+
|
|
82
|
-
def to_s
|
|
83
|
-
@padding.zero? ? 'Terminal' : "Terminal(padding=#{@padding})"
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
private
|
|
87
|
-
|
|
88
|
-
# Prepend +@padding+ spaces to every line of +text+. Splits on
|
|
89
|
-
# +each_line+ rather than a +gsub+ trick so a trailing newline
|
|
90
|
-
# in the input doesn't produce a stray padded blank line at the
|
|
91
|
-
# end — +puts+ adds the final newline if missing, same as
|
|
92
|
-
# before.
|
|
93
|
-
#
|
|
94
|
-
# @param text [String]
|
|
95
|
-
# @return [String]
|
|
96
|
-
def indent(text)
|
|
97
|
-
return text if @padding.zero?
|
|
98
|
-
|
|
99
|
-
prefix = ' ' * @padding
|
|
100
|
-
text.to_s.each_line.map { |line| prefix + line }.join
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Render assistant Markdown for the terminal, degrading to the
|
|
104
|
-
# raw string when the renderer raises. tty-markdown / strings
|
|
105
|
-
# have known bugs around ANSI inside tables (e.g.
|
|
106
|
-
# +Strings::Wrap.insert_ansi+ raising +IndexError+); we'd rather
|
|
107
|
-
# show ugly Markdown than abort an in-flight conversation.
|
|
108
|
-
#
|
|
109
|
-
# @param content [String] assistant Markdown
|
|
110
|
-
# @return [String] rendered ANSI text, or +content+ unchanged on
|
|
111
|
-
# render failure
|
|
112
|
-
def render_markdown(content)
|
|
113
|
-
TTY::Markdown.parse(content)
|
|
114
|
-
rescue StandardError => e
|
|
115
|
-
LOGGER.warn("TTY::Markdown render failed (#{e.class}: #{e.message}); falling back to raw text")
|
|
116
|
-
content
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# Flatten whitespace and cap to {MAX_TOOL_RESULT_CHARS}. The cap
|
|
120
|
-
# keeps multi-screen dumps (rendered HTML, PDF text) from
|
|
121
|
-
# drowning the terminal stream; the byte-count suffix on a
|
|
122
|
-
# truncated result distinguishes "tool returned exactly this"
|
|
123
|
-
# from "tool returned much more, you're seeing a slice."
|
|
124
|
-
#
|
|
125
|
-
# @param content [String] tool observation
|
|
126
|
-
# @return [String] single-line display form, possibly truncated
|
|
127
|
-
def truncate_tool_result(content)
|
|
128
|
-
original_bytes = content.to_s.bytesize
|
|
129
|
-
flattened = content.to_s.gsub(/\s+/, ' ').strip
|
|
130
|
-
return flattened if flattened.length <= MAX_TOOL_RESULT_CHARS
|
|
131
|
-
|
|
132
|
-
"#{flattened[0, MAX_TOOL_RESULT_CHARS]}… (#{original_bytes} bytes total)"
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
end
|
|
136
|
-
end
|
|
137
|
-
end
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Agent
|
|
5
|
-
module Listener
|
|
6
|
-
# Logs the conversation's context-window consumption per assistant
|
|
7
|
-
# turn via +Pikuri.logger_for('Tokens')+. Overrides
|
|
8
|
-
# {MessageListener#on_tokens} and emits one +INFO+ line per turn.
|
|
9
|
-
#
|
|
10
|
-
# Existence rationale: catch context-window growth before the
|
|
11
|
-
# provider raises +RubyLLM::ContextLengthExceededError+. +ctx+ is
|
|
12
|
-
# the headline number — tokens consumed by the conversation
|
|
13
|
-
# *through* this turn: this turn's prompt plus its reply, both of
|
|
14
|
-
# which the model will re-process on the next turn. The +Δ+ field
|
|
15
|
-
# shows the climb between turns; +↑+ / +↓+ are the per-turn input
|
|
16
|
-
# / output sizes that drove it (matching Claude Code's convention:
|
|
17
|
-
# +↑+ is what's sent to the model, +↓+ is what comes back).
|
|
18
|
-
#
|
|
19
|
-
# == State and scope
|
|
20
|
-
#
|
|
21
|
-
# The latest snapshot lives on the instance, so one +TokenLog+ is
|
|
22
|
-
# per-conversation. Sub-agents (with their own +RubyLLM::Chat+) get
|
|
23
|
-
# a fresh instance via {#for_sub_agent} (dispatched by
|
|
24
|
-
# {ListenerList#for_sub_agent}) so their counts log against their
|
|
25
|
-
# own chat. The synthesizer rescue keeps using the parent's
|
|
26
|
-
# instance because synth turns extend the same conversation.
|
|
27
|
-
#
|
|
28
|
-
# == Log line shape
|
|
29
|
-
#
|
|
30
|
-
# Without a cap (no detection source — see {Agent::ContextWindowDetector}):
|
|
31
|
-
#
|
|
32
|
-
# msg #1: ctx=6.8k Δ+6.8k ↑6.8k ↓0.0k
|
|
33
|
-
# msg #2: ctx=8.4k Δ+1.6k ↑1.0k ↓0.6k
|
|
34
|
-
#
|
|
35
|
-
# With a cap of e.g. 32k (set by {Agent#initialize} after detection):
|
|
36
|
-
#
|
|
37
|
-
# msg #1: ctx=6.8k/32.0k Δ+6.8k ↑6.8k ↓0.0k
|
|
38
|
-
#
|
|
39
|
-
# When the owning {Agent} has a non-empty {Agent#name} (i.e. a
|
|
40
|
-
# sub-agent), the line is prefixed with +[name] +:
|
|
41
|
-
#
|
|
42
|
-
# [sub_agent 0] msg #1: ctx=4.2k Δ+4.2k ↑4.2k ↓0.0k
|
|
43
|
-
#
|
|
44
|
-
# +ctx+ is the snapshot (+input + cached + cache_creation + output+;
|
|
45
|
-
# see {Agent::Tokens}), optionally suffixed with +/<cap>+ when
|
|
46
|
-
# {#context_window_cap} is set so the operator can see how close
|
|
47
|
-
# the conversation is to the limit. Including +output+ makes this
|
|
48
|
-
# turn's reply visible immediately — leaving it out would hide a
|
|
49
|
-
# long reply in the +ctx=+ headline until the next turn pulled it
|
|
50
|
-
# in as cached prompt. +Δ+ is signed: +Δ+X+ for growth,
|
|
51
|
-
# +Δ-X+ for shrinkage (legitimate if ruby_llm ever prunes between
|
|
52
|
-
# turns). On the first message the baseline is implicitly zero, so
|
|
53
|
-
# +Δ+ equals +ctx+. Sizes are scaled by 1024 and shown with one
|
|
54
|
-
# decimal + a +k+ suffix.
|
|
55
|
-
class TokenLog < MessageListener
|
|
56
|
-
LOGGER = Pikuri.logger_for('Tokens')
|
|
57
|
-
|
|
58
|
-
# Tokens consumed by the conversation through the most recent
|
|
59
|
-
# turn — this turn's prompt plus its reply, which together make
|
|
60
|
-
# up the bulk of what the next turn will re-process. Equals
|
|
61
|
-
# +input + cached + cache_creation + output+ from the latest
|
|
62
|
-
# {#on_tokens}. Zero until the first {#on_tokens} fires.
|
|
63
|
-
#
|
|
64
|
-
# @return [Integer]
|
|
65
|
-
attr_reader :context_window_size
|
|
66
|
-
|
|
67
|
-
# Model's context-window cap, or +nil+ if no source could supply
|
|
68
|
-
# one (see {Agent::ContextWindowDetector}). When set, the +ctx=+
|
|
69
|
-
# field renders as +ctx=<used>/<cap>+ instead of just +ctx=<used>+.
|
|
70
|
-
# {Agent#initialize} pushes the detected value here via
|
|
71
|
-
# {ListenerList#context_window_cap=} once it has run the detector,
|
|
72
|
-
# so an instance constructed bare (e.g. by callers, in tests, or
|
|
73
|
-
# by {#for_sub_agent}) defaults to +nil+.
|
|
74
|
-
#
|
|
75
|
-
# @return [Integer, nil]
|
|
76
|
-
attr_accessor :context_window_cap
|
|
77
|
-
|
|
78
|
-
# @return [String] owning agent's identifier. Empty by default
|
|
79
|
-
# (main agent); set by {#for_sub_agent} from the sub-agent's
|
|
80
|
-
# generated name so the log lines can be prefixed with
|
|
81
|
-
# +[<name>] +. Read-only — for a sub-agent's listener you get a
|
|
82
|
-
# fresh instance via {#for_sub_agent}.
|
|
83
|
-
attr_reader :name
|
|
84
|
-
|
|
85
|
-
# @param name [String] agent identifier prepended to each log
|
|
86
|
-
# line as +[<name>] + when non-empty. Defaults to +""+ for the
|
|
87
|
-
# main agent.
|
|
88
|
-
def initialize(name: '')
|
|
89
|
-
super()
|
|
90
|
-
@name = name
|
|
91
|
-
@msg = 0
|
|
92
|
-
@context_window_size = 0
|
|
93
|
-
@context_window_cap = nil
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
# Sub-agent variant: a fresh +TokenLog+ with a zeroed snapshot so
|
|
97
|
-
# the sub-agent's context-window readings track its own
|
|
98
|
-
# +RubyLLM::Chat+ rather than continuing the parent's. Picks the
|
|
99
|
-
# sub-agent's +name:+ out of the forwarded params so its log
|
|
100
|
-
# lines carry the +[<name>] + prefix; defaults to +""+ when
|
|
101
|
-
# absent. The cap is left +nil+ here; the sub-agent's
|
|
102
|
-
# {Agent#initialize} pushes the resolved cap onto every
|
|
103
|
-
# +TokenLog+ in its list via {ListenerList#context_window_cap=}
|
|
104
|
-
# immediately after construction.
|
|
105
|
-
#
|
|
106
|
-
# @param name [String] sub-agent's identifier
|
|
107
|
-
# @return [TokenLog]
|
|
108
|
-
def for_sub_agent(name: '', **)
|
|
109
|
-
self.class.new(name: name)
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
# Update the context-window snapshot, then log one +INFO+ line.
|
|
113
|
-
#
|
|
114
|
-
# @param tokens [Agent::Tokens]
|
|
115
|
-
# @return [void]
|
|
116
|
-
def on_tokens(tokens)
|
|
117
|
-
@msg += 1
|
|
118
|
-
input_now = tokens.input.to_i
|
|
119
|
-
cached_now = tokens.cached.to_i
|
|
120
|
-
cache_creation_now = tokens.cache_creation.to_i
|
|
121
|
-
|
|
122
|
-
prev_ctx = @context_window_size
|
|
123
|
-
@context_window_size = input_now + cached_now + cache_creation_now + tokens.output.to_i
|
|
124
|
-
delta = @context_window_size - prev_ctx
|
|
125
|
-
|
|
126
|
-
LOGGER.info(format_line(input_now, tokens.output.to_i, delta))
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
# @return [String] short label for {Agent#to_s}, including the
|
|
130
|
-
# current context-window size (with cap suffix when set)
|
|
131
|
-
def to_s
|
|
132
|
-
"TokenLog(ctx=#{format_ctx})"
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
private
|
|
136
|
-
|
|
137
|
-
def format_line(input, output, delta)
|
|
138
|
-
sign = delta.negative? ? '-' : '+'
|
|
139
|
-
prefix = @name.empty? ? '' : "[#{@name}] "
|
|
140
|
-
"#{prefix}msg ##{@msg}: ctx=#{format_ctx} Δ#{sign}#{format_k(delta.abs)} ↑#{format_k(input)} ↓#{format_k(output)}"
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
# +<used>+ when no cap is set, +<used>/<cap>+ when one is. Shared
|
|
144
|
-
# between {#format_line} and {#to_s} so the headline reads the same
|
|
145
|
-
# in the log stream and in {Agent#to_s} banners.
|
|
146
|
-
def format_ctx
|
|
147
|
-
base = format_k(@context_window_size)
|
|
148
|
-
return base if @context_window_cap.nil?
|
|
149
|
-
|
|
150
|
-
"#{base}/#{format_k(@context_window_cap)}"
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# Format a token count as a 1024-scaled +k+-suffixed string.
|
|
154
|
-
# +nil+ → +0.0k+; +0+ → +0.0k+; 12_453 → +12.2k+. Uniform
|
|
155
|
-
# format keeps lines easy to scan at the cost of looking odd
|
|
156
|
-
# for very small per-turn outputs (+↓0.0k+ on tool-call acks).
|
|
157
|
-
#
|
|
158
|
-
# @param n [Integer, nil]
|
|
159
|
-
# @return [String]
|
|
160
|
-
def format_k(n)
|
|
161
|
-
format('%.1fk', n.to_i / 1024.0)
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
end
|