pikuri 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -179
- data/lib/pikuri.rb +12 -162
- metadata +45 -159
- data/CHANGELOG.md +0 -62
- data/GETTING_STARTED.md +0 -223
- data/LICENSE +0 -21
- data/lib/pikuri/agent/chat_transport.rb +0 -41
- data/lib/pikuri/agent/context_window_detector.rb +0 -101
- data/lib/pikuri/agent/listener/in_memory_message_list.rb +0 -33
- data/lib/pikuri/agent/listener/message_listener.rb +0 -93
- data/lib/pikuri/agent/listener/step_limit.rb +0 -97
- data/lib/pikuri/agent/listener/terminal.rb +0 -137
- data/lib/pikuri/agent/listener/token_log.rb +0 -166
- data/lib/pikuri/agent/listener_list.rb +0 -113
- data/lib/pikuri/agent/message.rb +0 -61
- data/lib/pikuri/agent/synthesizer.rb +0 -120
- data/lib/pikuri/agent/tokens.rb +0 -56
- data/lib/pikuri/agent.rb +0 -286
- data/lib/pikuri/subprocess.rb +0 -166
- data/lib/pikuri/tool/bash.rb +0 -272
- data/lib/pikuri/tool/calculator.rb +0 -82
- data/lib/pikuri/tool/confirmer.rb +0 -96
- data/lib/pikuri/tool/edit.rb +0 -196
- data/lib/pikuri/tool/fetch.rb +0 -167
- data/lib/pikuri/tool/glob.rb +0 -310
- data/lib/pikuri/tool/grep.rb +0 -338
- data/lib/pikuri/tool/parameters.rb +0 -314
- data/lib/pikuri/tool/read.rb +0 -254
- data/lib/pikuri/tool/scraper/fetch_error.rb +0 -16
- data/lib/pikuri/tool/scraper/html.rb +0 -285
- data/lib/pikuri/tool/scraper/pdf.rb +0 -54
- data/lib/pikuri/tool/scraper/simple.rb +0 -177
- data/lib/pikuri/tool/search/brave.rb +0 -184
- data/lib/pikuri/tool/search/duckduckgo.rb +0 -196
- data/lib/pikuri/tool/search/engines.rb +0 -154
- data/lib/pikuri/tool/search/exa.rb +0 -217
- data/lib/pikuri/tool/search/rate_limiter.rb +0 -92
- data/lib/pikuri/tool/search/result.rb +0 -29
- data/lib/pikuri/tool/skill.rb +0 -80
- data/lib/pikuri/tool/skill_catalog.rb +0 -376
- data/lib/pikuri/tool/sub_agent.rb +0 -102
- data/lib/pikuri/tool/web_scrape.rb +0 -117
- data/lib/pikuri/tool/web_search.rb +0 -38
- data/lib/pikuri/tool/workspace.rb +0 -150
- data/lib/pikuri/tool/write.rb +0 -170
- data/lib/pikuri/tool.rb +0 -118
- data/lib/pikuri/url_cache.rb +0 -106
- data/lib/pikuri/version.rb +0 -10
- data/prompts/coding-system-prompt.txt +0 -28
- data/prompts/pikuri-chat.txt +0 -15
data/lib/pikuri/tool/edit.rb
DELETED
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Tool
|
|
5
|
-
# The +edit+ tool — exact-string replacement on an existing file.
|
|
6
|
-
# Instantiating +Tool::Edit.new(workspace: ws)+ produces a tool whose
|
|
7
|
-
# {Tool#to_ruby_llm_tool} wiring is identical to any bundled tool's.
|
|
8
|
-
# Same shape as {Tool::Read} (workspace captured by +execute+; no
|
|
9
|
-
# confirmer needed).
|
|
10
|
-
#
|
|
11
|
-
# == Why no confirmer
|
|
12
|
-
#
|
|
13
|
-
# The +old_string+ argument is itself an implicit read-check: the
|
|
14
|
-
# model can't write a correct +old_string+ without having seen the
|
|
15
|
-
# file (via {Tool::Read} or out-of-band), so the blast radius of any
|
|
16
|
-
# Edit is bounded by the model's actual knowledge of file state.
|
|
17
|
-
# That makes Edit safe to execute without prompting — by contrast,
|
|
18
|
-
# {Tool::Write} requires a confirmer because a hallucinated 500-line
|
|
19
|
-
# +content+ could clobber unread bytes.
|
|
20
|
-
#
|
|
21
|
-
# == Matching is strict (no fuzz cascade)
|
|
22
|
-
#
|
|
23
|
-
# +old_string+ must match the file byte-for-byte. v1 ships *no*
|
|
24
|
-
# fallback replacer (no whitespace-normalized, line-trimmed, block-
|
|
25
|
-
# anchor, etc.). Predictability beats fuzz: when an Edit fails, the
|
|
26
|
-
# model re-reads with {Tool::Read} and retries — clear failure mode,
|
|
27
|
-
# no compounding-heuristic risk. opencode runs a 9-replacer cascade
|
|
28
|
-
# under the hood despite its own description saying "must match
|
|
29
|
-
# exactly"; pi stays strict. We match pi.
|
|
30
|
-
#
|
|
31
|
-
# == Line endings get normalized
|
|
32
|
-
#
|
|
33
|
-
# The one structural exception to "strict bytes": files with CRLF
|
|
34
|
-
# line endings get matched in LF space, and the original line ending
|
|
35
|
-
# is restored on write. Reason: {Tool::Read} renders content via
|
|
36
|
-
# +each_line+ + +chomp+, which strips +\r\n+ to +\n+ in what the
|
|
37
|
-
# model sees. A pure strict byte-match would then never succeed on
|
|
38
|
-
# CRLF files because the model can only ever supply LF. opencode and
|
|
39
|
-
# pi both do this normalization for the same reason.
|
|
40
|
-
#
|
|
41
|
-
# Algorithm:
|
|
42
|
-
#
|
|
43
|
-
# 1. Detect whether the file contains +\r\n+ anywhere (treat as CRLF).
|
|
44
|
-
# 2. Normalize content, +old_string+, and +new_string+ to LF.
|
|
45
|
-
# 3. Match + replace in LF space.
|
|
46
|
-
# 4. If the file was CRLF, convert +\n+ → +\r\n+ on the way back out.
|
|
47
|
-
#
|
|
48
|
-
# Caveat: a mixed-line-ending file is treated as CRLF, which means
|
|
49
|
-
# any pre-existing bare-LF lines get converted on write. Rare in
|
|
50
|
-
# practice; acceptable for v1.
|
|
51
|
-
#
|
|
52
|
-
# == Refusals
|
|
53
|
-
#
|
|
54
|
-
# All returned as +"Error: ..."+ observations the LLM can react to:
|
|
55
|
-
#
|
|
56
|
-
# * Empty +old_string+ → "use the write tool" (keeps Edit/Write roles
|
|
57
|
-
# non-overlapping).
|
|
58
|
-
# * +old_string+ == +new_string+ → no-op error.
|
|
59
|
-
# * +old_string+ not found in file → "must match exactly" error
|
|
60
|
-
# pointing at the read tool.
|
|
61
|
-
# * +old_string+ found multiple times without +replace_all+ →
|
|
62
|
-
# multi-match error suggesting more context or +replace_all+.
|
|
63
|
-
# * File missing / is a directory / is binary → respective error.
|
|
64
|
-
# * Workspace boundary violation / EACCES → standard rescue path.
|
|
65
|
-
class Edit < Tool
|
|
66
|
-
# Description shown to the LLM. Follows the opencode-shape (summary
|
|
67
|
-
# + +Usage:+ bullets) prescribed by the project's tool-description
|
|
68
|
-
# convention. Per-parameter constraints live in the parameter
|
|
69
|
-
# descriptions.
|
|
70
|
-
#
|
|
71
|
-
# @return [String]
|
|
72
|
-
DESCRIPTION = <<~DESC
|
|
73
|
-
Edit a file by exact-string replacement.
|
|
74
|
-
|
|
75
|
-
Usage:
|
|
76
|
-
- Use for partial changes to an existing file; for full rewrites or new files use `write` instead.
|
|
77
|
-
- `old_string` must match the file byte-for-byte (whitespace and indentation count); re-read the file with `read` if uncertain.
|
|
78
|
-
- `old_string` and `new_string` must differ.
|
|
79
|
-
- If `old_string` matches multiple times the call fails — add surrounding context to make the match unique, or set `replace_all: true`.
|
|
80
|
-
- Cannot create files (rejects empty `old_string` and missing files).
|
|
81
|
-
- Binary files are refused.
|
|
82
|
-
- CRLF files are matched in LF space; the original line endings are preserved on write.
|
|
83
|
-
DESC
|
|
84
|
-
|
|
85
|
-
# @param workspace [Tool::Workspace] captured for path resolution;
|
|
86
|
-
# all reads/writes route through +workspace.resolve_for_write+
|
|
87
|
-
# (Edit modifies, so it uses the write-set even though it doesn't
|
|
88
|
-
# create files).
|
|
89
|
-
# @return [Edit]
|
|
90
|
-
def initialize(workspace:)
|
|
91
|
-
super(
|
|
92
|
-
name: 'edit',
|
|
93
|
-
description: DESCRIPTION,
|
|
94
|
-
parameters: Parameters.build { |p|
|
|
95
|
-
p.required_string :path,
|
|
96
|
-
'Path to the file to edit. Relative paths ' \
|
|
97
|
-
'resolve against the workspace root, e.g. ' \
|
|
98
|
-
'"lib/foo.rb".'
|
|
99
|
-
p.required_string :old_string,
|
|
100
|
-
'Exact text to find in the file. Must match ' \
|
|
101
|
-
'byte-for-byte (whitespace counts); must be ' \
|
|
102
|
-
'unique unless replace_all is true. Example: ' \
|
|
103
|
-
'"def foo\n bar\nend".'
|
|
104
|
-
p.required_string :new_string,
|
|
105
|
-
'Replacement text. Must differ from ' \
|
|
106
|
-
'old_string. Example: "def foo\n baz\nend".'
|
|
107
|
-
p.optional_boolean :replace_all,
|
|
108
|
-
'Replace every occurrence of old_string ' \
|
|
109
|
-
'instead of failing on multiple matches. ' \
|
|
110
|
-
'Defaults to false, e.g. true.'
|
|
111
|
-
},
|
|
112
|
-
execute: ->(path:, old_string:, new_string:, replace_all: false) {
|
|
113
|
-
Edit.edit(workspace: workspace, path: path,
|
|
114
|
-
old_string: old_string, new_string: new_string,
|
|
115
|
-
replace_all: replace_all)
|
|
116
|
-
}
|
|
117
|
-
)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# Resolve +path+ against +workspace+, run the precondition checks
|
|
121
|
-
# (non-empty / non-identical / file exists / not directory / not
|
|
122
|
-
# binary), match +old_string+ in line-ending-normalized form, and
|
|
123
|
-
# write the result back preserving the file's original line endings.
|
|
124
|
-
#
|
|
125
|
-
# @param workspace [Tool::Workspace]
|
|
126
|
-
# @param path [String] raw path as supplied by the LLM
|
|
127
|
-
# @param old_string [String] text to find
|
|
128
|
-
# @param new_string [String] text to substitute in
|
|
129
|
-
# @param replace_all [Boolean] when true, every occurrence is
|
|
130
|
-
# replaced; when false (default) multiple matches are an error
|
|
131
|
-
# @return [String] tool observation
|
|
132
|
-
def self.edit(workspace:, path:, old_string:, new_string:, replace_all:)
|
|
133
|
-
return 'Error: old_string is empty; use the write tool to create or overwrite a file.' if old_string.empty?
|
|
134
|
-
return 'Error: old_string and new_string are identical — this edit is a no-op.' if old_string == new_string
|
|
135
|
-
|
|
136
|
-
resolved = workspace.resolve_for_write(path)
|
|
137
|
-
return "Error: file not found: #{path}" unless resolved.exist?
|
|
138
|
-
return "Error: #{path} is a directory" if resolved.directory?
|
|
139
|
-
|
|
140
|
-
raw = resolved.binread
|
|
141
|
-
sample = raw.byteslice(0, Tool::Read::BINARY_SAMPLE_BYTES)
|
|
142
|
-
return "Error: cannot edit binary file: #{path}" if Tool::Read.binary?(sample)
|
|
143
|
-
|
|
144
|
-
crlf = raw.include?("\r\n")
|
|
145
|
-
content = crlf ? raw.gsub("\r\n", "\n") : raw
|
|
146
|
-
needle = normalize_lf(old_string)
|
|
147
|
-
patch = normalize_lf(new_string)
|
|
148
|
-
|
|
149
|
-
occurrences = content.scan(needle).size
|
|
150
|
-
if occurrences.zero?
|
|
151
|
-
return "Error: old_string not found in #{path}. It must match the file " \
|
|
152
|
-
'exactly, including whitespace and indentation; re-read with the ' \
|
|
153
|
-
'read tool if uncertain.'
|
|
154
|
-
end
|
|
155
|
-
if occurrences > 1 && !replace_all
|
|
156
|
-
return "Error: old_string matches #{occurrences} times in #{path}. " \
|
|
157
|
-
'Provide more surrounding context to make the match unique, ' \
|
|
158
|
-
'or set replace_all=true to replace all occurrences.'
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
replaced = replace_all ? occurrences : 1
|
|
162
|
-
new_content =
|
|
163
|
-
if replace_all
|
|
164
|
-
# Block form bypasses gsub's \1 / \& interpolation on the
|
|
165
|
-
# replacement String — we want literal substitution.
|
|
166
|
-
content.gsub(needle) { patch }
|
|
167
|
-
else
|
|
168
|
-
idx = content.index(needle)
|
|
169
|
-
content.byteslice(0, idx) + patch + content.byteslice(idx + needle.bytesize, content.bytesize - idx - needle.bytesize)
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
final = crlf ? new_content.gsub("\n", "\r\n") : new_content
|
|
173
|
-
resolved.write(final)
|
|
174
|
-
|
|
175
|
-
"Edited #{path}: replaced #{replaced} occurrence#{replaced == 1 ? '' : 's'}."
|
|
176
|
-
rescue Tool::Workspace::Error => e
|
|
177
|
-
"Error: #{e.message}"
|
|
178
|
-
rescue Errno::EACCES => e
|
|
179
|
-
"Error: cannot edit #{path}: #{e.message}"
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# Force a String to BINARY encoding and collapse +\r\n+ → +\n+ so
|
|
183
|
-
# all matching/replacement happens in LF space with byte-stable
|
|
184
|
-
# comparisons. Applied to the file content, +old_string+, and
|
|
185
|
-
# +new_string+ alike — symmetric normalization keeps the byte-match
|
|
186
|
-
# semantics consistent across all three inputs.
|
|
187
|
-
#
|
|
188
|
-
# @param str [String]
|
|
189
|
-
# @return [String] BINARY-encoded, CRLF-collapsed copy
|
|
190
|
-
def self.normalize_lf(str)
|
|
191
|
-
str.b.gsub("\r\n", "\n")
|
|
192
|
-
end
|
|
193
|
-
private_class_method :normalize_lf
|
|
194
|
-
end
|
|
195
|
-
end
|
|
196
|
-
end
|
data/lib/pikuri/tool/fetch.rb
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Tool
|
|
5
|
-
# Truncation policy and Tool spec for the +fetch+ tool. The HTTP work
|
|
6
|
-
# lives in {Tool::Scraper::Simple.fetch}; this module is a thin
|
|
7
|
-
# wrapper that accepts only textual content-types, applies a character
|
|
8
|
-
# cap so the LLM doesn't drown in long-form bodies, and exposes the
|
|
9
|
-
# result to the agent loop in OpenAI tool-call shape.
|
|
10
|
-
#
|
|
11
|
-
# Sister of {Tool::WebScrape}, but without HTML→Markdown or PDF→text
|
|
12
|
-
# extraction: bodies are returned verbatim. Useful for raw textual
|
|
13
|
-
# data — JSON APIs, CSV files, +robots.txt+, sitemaps, source files —
|
|
14
|
-
# where any rendering pass would corrupt the payload.
|
|
15
|
-
module Fetch
|
|
16
|
-
# @return [Integer] default character cap on the body returned by
|
|
17
|
-
# {.fetch}. Smaller than {Tool::WebScrape::DEFAULT_MAX_CHARS}
|
|
18
|
-
# because fetch's content profile is bimodal — most JSON/XML/CSV
|
|
19
|
-
# responses are tiny, and the long-tail (large data dumps) is
|
|
20
|
-
# better re-requested deliberately than padded into every default.
|
|
21
|
-
DEFAULT_MAX_CHARS = 5_000
|
|
22
|
-
|
|
23
|
-
# @return [Integer] hard ceiling on the +max_chars+ argument to
|
|
24
|
-
# {.fetch}. Matches {Tool::WebScrape::MAX_MAX_CHARS}.
|
|
25
|
-
MAX_MAX_CHARS = 100_000
|
|
26
|
-
|
|
27
|
-
# Application content-types that are textual in practice and so
|
|
28
|
-
# safe to return verbatim to the LLM, despite their +application/+
|
|
29
|
-
# prefix making them fail the +text/*+ check. Anything outside
|
|
30
|
-
# +text/*+ and this allowlist is refused.
|
|
31
|
-
# @return [Array<String>]
|
|
32
|
-
TEXTUAL_APPLICATION_TYPES = %w[
|
|
33
|
-
application/json
|
|
34
|
-
application/xml
|
|
35
|
-
application/javascript
|
|
36
|
-
application/xhtml+xml
|
|
37
|
-
application/rss+xml
|
|
38
|
-
application/atom+xml
|
|
39
|
-
].freeze
|
|
40
|
-
|
|
41
|
-
# On-disk cache used by {.fetch} to memoize downloads. Defined as a
|
|
42
|
-
# method so specs can swap it for an isolated cache or
|
|
43
|
-
# {UrlCache::NULL} without touching the shared instance. Lives in
|
|
44
|
-
# its own subdir under {UrlCache::ROOT_DIR} so a +fetch+ on a URL
|
|
45
|
-
# and a +web_scrape+ on the same URL cannot collide on the same
|
|
46
|
-
# cache file (one returns the raw body, the other returns extracted
|
|
47
|
-
# Markdown).
|
|
48
|
-
#
|
|
49
|
-
# @return [UrlCache, #fetch]
|
|
50
|
-
CACHE = UrlCache.new(ttl: UrlCache::DEFAULT_TTL, dir: "#{UrlCache::ROOT_DIR}/fetch")
|
|
51
|
-
def self.cache
|
|
52
|
-
CACHE
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Download +url+ via {Tool::Scraper::Simple.fetch} and return the
|
|
56
|
-
# response body verbatim, provided the content-type is one we deem
|
|
57
|
-
# textual (any +text/*+, plus the formats listed in
|
|
58
|
-
# {TEXTUAL_APPLICATION_TYPES}). Anything else — PDFs, images, other
|
|
59
|
-
# binaries — produces an +"Error: ..."+ string in the calculator-
|
|
60
|
-
# style convention so the agent loop feeds the failure back to the
|
|
61
|
-
# model as the next observation.
|
|
62
|
-
#
|
|
63
|
-
# The body is cached on disk via {.cache}, keyed by URL, so repeat
|
|
64
|
-
# fetches within the cache TTL skip the network. +max_chars+ is not
|
|
65
|
-
# part of the cache key — different values for the same URL share
|
|
66
|
-
# one entry, and truncation runs after the cache lookup. The cache
|
|
67
|
-
# is only populated on success: {Scraper::FetchError} (HTTP non-2xx,
|
|
68
|
-
# network failure, redirect-loop exhaustion, refused content-type)
|
|
69
|
-
# is caught outside the +cache.fetch+ block, so failure strings are
|
|
70
|
-
# never persisted and a retry on the next call hits the network
|
|
71
|
-
# again. Other exceptions (parser bugs in our own code) bubble up
|
|
72
|
-
# unchanged.
|
|
73
|
-
#
|
|
74
|
-
# @param url [String] absolute HTTP(S) URL to download
|
|
75
|
-
# @param max_chars [Integer] character cap on the returned body.
|
|
76
|
-
# Clamped to +[1, {MAX_MAX_CHARS}]+; defaults to
|
|
77
|
-
# {DEFAULT_MAX_CHARS}. When the body exceeds the cap, output is
|
|
78
|
-
# cut and a marker noting the original length is appended.
|
|
79
|
-
# @return [String] response body, possibly truncated, or
|
|
80
|
-
# +"Error: ..."+ on a recoverable failure
|
|
81
|
-
def self.fetch(url, max_chars: DEFAULT_MAX_CHARS)
|
|
82
|
-
max_chars = max_chars.clamp(1, MAX_MAX_CHARS)
|
|
83
|
-
body = cache.fetch(url) { download(url) }
|
|
84
|
-
truncate(body, max_chars)
|
|
85
|
-
rescue Scraper::FetchError => e
|
|
86
|
-
"Error: #{e.message}"
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# GET +url+ and verify the response's content-type is textual.
|
|
90
|
-
# Caller is responsible for caching and truncation; this method
|
|
91
|
-
# always hits the network.
|
|
92
|
-
#
|
|
93
|
-
# @param url [String]
|
|
94
|
-
# @return [String] response body
|
|
95
|
-
# @raise [Scraper::FetchError] on HTTP non-2xx, network failure,
|
|
96
|
-
# redirect-loop exhaustion, missing +Location+ on a 3xx, or a
|
|
97
|
-
# non-textual content-type
|
|
98
|
-
def self.download(url)
|
|
99
|
-
fetched = Scraper::Simple.fetch(url)
|
|
100
|
-
return fetched.body if textual?(fetched.content_type)
|
|
101
|
-
|
|
102
|
-
raise Scraper::FetchError,
|
|
103
|
-
"refused to fetch #{url}: content-type #{fetched.content_type.inspect} " \
|
|
104
|
-
'is not textual (use web_scrape for PDFs or rendered pages)'
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# @param content_type [String] normalized content-type (no +charset+
|
|
108
|
-
# parameter, lowercased) as produced by {Scraper::Simple.fetch}
|
|
109
|
-
# @return [Boolean] true when the content-type is +text/*+ or one
|
|
110
|
-
# of {TEXTUAL_APPLICATION_TYPES}
|
|
111
|
-
def self.textual?(content_type)
|
|
112
|
-
content_type.start_with?('text/') ||
|
|
113
|
-
TEXTUAL_APPLICATION_TYPES.include?(content_type)
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
# Cut +body+ to at most +max_chars+ characters, appending a marker
|
|
117
|
-
# describing the original length when truncation actually happens.
|
|
118
|
-
# Returns +body+ unchanged if it already fits. Same shape as
|
|
119
|
-
# {Tool::WebScrape.truncate} so the LLM sees a consistent
|
|
120
|
-
# truncation marker across both tools.
|
|
121
|
-
#
|
|
122
|
-
# @param body [String] full response body
|
|
123
|
-
# @param max_chars [Integer] character cap; assumed already clamped
|
|
124
|
-
# @return [String]
|
|
125
|
-
def self.truncate(body, max_chars)
|
|
126
|
-
return body if body.length <= max_chars
|
|
127
|
-
|
|
128
|
-
"#{body[0, max_chars]}\n\n" \
|
|
129
|
-
"... [truncated at #{max_chars} of #{body.length} chars; " \
|
|
130
|
-
'call again with a larger `max_chars` to see more]'
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
# Verbatim URL download tool. Thin wrapper over {Tool::Fetch.fetch}
|
|
135
|
-
# that exposes it to the agent loop in OpenAI tool-call shape. Use for
|
|
136
|
-
# raw textual payloads (JSON APIs, CSV files, +robots.txt+, source
|
|
137
|
-
# files); use {Tool::WEB_SCRAPE} for rendered web pages or PDFs where
|
|
138
|
-
# readability extraction makes the result usable.
|
|
139
|
-
#
|
|
140
|
-
# @return [Tool]
|
|
141
|
-
FETCH = new(
|
|
142
|
-
name: 'fetch',
|
|
143
|
-
description: <<~DESC,
|
|
144
|
-
Downloads the given URL and returns its body verbatim.
|
|
145
|
-
|
|
146
|
-
Usage:
|
|
147
|
-
- Use for raw textual payloads: JSON APIs, CSV files, robots.txt, sitemaps, source files — anywhere a rendering pass would corrupt the data.
|
|
148
|
-
- For rendered HTML pages or PDFs, use web_scrape — it extracts readable content; fetch returns the raw HTML/PDF bytes unchanged.
|
|
149
|
-
- Accepts text/* and common textual application/* types (JSON, XML, JS, XHTML, RSS, Atom). Refuses PDFs, images, and other binaries.
|
|
150
|
-
DESC
|
|
151
|
-
parameters: Parameters.build { |p|
|
|
152
|
-
p.required_string :url,
|
|
153
|
-
'Absolute URL to download, including the scheme, ' \
|
|
154
|
-
'e.g. "https://example.com/data.json".'
|
|
155
|
-
p.optional_integer :max_chars,
|
|
156
|
-
'Maximum number of characters of the body to ' \
|
|
157
|
-
'return. Defaults to 5000; hard-capped at ' \
|
|
158
|
-
'100000. When the body is longer than this, ' \
|
|
159
|
-
'output is cut and a marker reports the full ' \
|
|
160
|
-
'length.'
|
|
161
|
-
},
|
|
162
|
-
execute: ->(url:, max_chars: Fetch::DEFAULT_MAX_CHARS) {
|
|
163
|
-
Fetch.fetch(url, max_chars: max_chars)
|
|
164
|
-
}
|
|
165
|
-
)
|
|
166
|
-
end
|
|
167
|
-
end
|
data/lib/pikuri/tool/glob.rb
DELETED
|
@@ -1,310 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Tool
|
|
5
|
-
# The +glob+ tool — list files matching a glob pattern via
|
|
6
|
-
# +rg --files+, sorted by modification time (newest first).
|
|
7
|
-
# Instantiating +Tool::Glob.new(workspace: ws)+ produces a tool
|
|
8
|
-
# whose {Tool#to_ruby_llm_tool} wiring is identical to any bundled
|
|
9
|
-
# tool's. Same shape as {Tool::Grep} (workspace captured by the
|
|
10
|
-
# +execute+ closure, no confirmer — read-only).
|
|
11
|
-
#
|
|
12
|
-
# == Why a separate tool from Grep
|
|
13
|
-
#
|
|
14
|
-
# The unique capability is *mtime-descending sort* — "what's been
|
|
15
|
-
# touched recently" is a common navigation move and Grep can't
|
|
16
|
-
# express it. The rest (filter by name, default to listing all
|
|
17
|
-
# matching files) is theoretically reachable through Grep with
|
|
18
|
-
# +pattern="."+, but Glob avoids that hack and keeps Read / Grep /
|
|
19
|
-
# Glob as three clean roles: read one file, search content, list
|
|
20
|
-
# files by name.
|
|
21
|
-
#
|
|
22
|
-
# == ripgrep dependency
|
|
23
|
-
#
|
|
24
|
-
# Hard dependency: {.check_binaries!} runs in +initialize+ and
|
|
25
|
-
# raises if +rg+ isn't on +PATH+. Each tool owns its own probe so
|
|
26
|
-
# construction order doesn't matter — Glob doesn't lean on Grep's
|
|
27
|
-
# check.
|
|
28
|
-
#
|
|
29
|
-
# == Argv & filter pipeline
|
|
30
|
-
#
|
|
31
|
-
# rg --files --color=never --hidden --glob '!.git/*' \
|
|
32
|
-
# -- <relative-path-or-dot>
|
|
33
|
-
# # …then filter the result list in Ruby with File.fnmatch?
|
|
34
|
-
#
|
|
35
|
-
# Why not pass the user pattern as +--glob+ to rg? Because rg's
|
|
36
|
-
# +--glob+ documentation says *"This always overrides any other
|
|
37
|
-
# ignore logic"* — so +--glob '**/*.rb'+ would re-include
|
|
38
|
-
# +.gitignore+'d Ruby files, breaking our gitignore-respect
|
|
39
|
-
# promise. We let rg produce the full gitignore-respecting file
|
|
40
|
-
# list and filter to the user's pattern in Ruby with
|
|
41
|
-
# +File.fnmatch?(pattern, p, FNM_PATHNAME | FNM_EXTGLOB |
|
|
42
|
-
# FNM_DOTMATCH)+. The three flags together cover the common rg
|
|
43
|
-
# glob cases: +**+ recursion (+FNM_PATHNAME+), +{a,b}+ alternation
|
|
44
|
-
# (+FNM_EXTGLOB+), and dotfile inclusion (+FNM_DOTMATCH+, matching
|
|
45
|
-
# rg's +--hidden+ behavior). The +.git/+ exclusion stays on the rg
|
|
46
|
-
# side so its contents never even reach the Ruby filter.
|
|
47
|
-
#
|
|
48
|
-
# * +--hidden+ → search dotfiles (still respects +.gitignore+).
|
|
49
|
-
# * No +--sort+ flag: we re-sort by mtime in Ruby on the way out.
|
|
50
|
-
# * Output paths come back as +./...+ when the search path is +.+;
|
|
51
|
-
# the leading +./+ is stripped post-rg so the model sees clean
|
|
52
|
-
# workspace-relative paths.
|
|
53
|
-
#
|
|
54
|
-
# == Sort
|
|
55
|
-
#
|
|
56
|
-
# mtime-descending in Ruby after rg returns, with path-ascending
|
|
57
|
-
# as a tiebreaker for files with equal mtimes (the common case in
|
|
58
|
-
# fresh checkouts). Cost: one +stat+ per result. Broad patterns
|
|
59
|
-
# can make this expensive, but in practice rg's +.gitignore+ filter
|
|
60
|
-
# keeps result sets bounded; if real friction shows up later we can
|
|
61
|
-
# cap pre-sort.
|
|
62
|
-
#
|
|
63
|
-
# == Truncation
|
|
64
|
-
#
|
|
65
|
-
# Total output head-truncated to {MAX_BYTES} *after* mtime sort, so
|
|
66
|
-
# the kept rows are the newest. Matches {Tool::Grep}'s budget and
|
|
67
|
-
# head-bias.
|
|
68
|
-
#
|
|
69
|
-
# == Exit codes
|
|
70
|
-
#
|
|
71
|
-
# * +0+ → at least one file; format with footer.
|
|
72
|
-
# * +1+ → no files; return +"No files match pattern '...'"+.
|
|
73
|
-
# * +2+ → rg error (bad path, bad glob); return
|
|
74
|
-
# +"Error: ripgrep: ..."+.
|
|
75
|
-
#
|
|
76
|
-
# == Refusals
|
|
77
|
-
#
|
|
78
|
-
# All returned as +"Error: ..."+ observations:
|
|
79
|
-
#
|
|
80
|
-
# * Empty +pattern+ → fast reject.
|
|
81
|
-
# * +path+ is a regular file → fast reject pointing at the +read+
|
|
82
|
-
# tool.
|
|
83
|
-
# * +path+ not found → +"Error: path not found: <path>"+.
|
|
84
|
-
# * +path+ outside the workspace → caught from
|
|
85
|
-
# {Tool::Workspace::Error}.
|
|
86
|
-
class Glob < Tool
|
|
87
|
-
# @return [Integer] hard byte cap on combined rg output. Same
|
|
88
|
-
# value as {Tool::Grep::MAX_BYTES} so the two file-touching
|
|
89
|
-
# tools share a budget shape. Re-declared here rather than
|
|
90
|
-
# referenced cross-file because Zeitwerk's eager-load order
|
|
91
|
-
# isn't guaranteed between siblings.
|
|
92
|
-
MAX_BYTES = 50 * 1024
|
|
93
|
-
|
|
94
|
-
# @return [String] human-readable form of {MAX_BYTES} for the
|
|
95
|
-
# truncation marker.
|
|
96
|
-
MAX_BYTES_LABEL = "#{MAX_BYTES / 1024} KB"
|
|
97
|
-
|
|
98
|
-
# Description shown to the LLM. opencode-shape (summary +
|
|
99
|
-
# +Usage:+ bullets). Per-parameter constraints live in parameter
|
|
100
|
-
# descriptions.
|
|
101
|
-
#
|
|
102
|
-
# @return [String]
|
|
103
|
-
DESCRIPTION = <<~DESC
|
|
104
|
-
List files matching a glob pattern, sorted by modification time (newest first).
|
|
105
|
-
|
|
106
|
-
Usage:
|
|
107
|
-
- `.gitignore` is respected; for unfiltered listing use bash `rg --no-ignore --files -g <pattern>`.
|
|
108
|
-
- Glob syntax: `**` matches any number of directories, `*` matches any filename chars (not `/`), `{a,b}` is alternation.
|
|
109
|
-
- Default search root is the workspace root; pass `path` to narrow to a subdirectory.
|
|
110
|
-
- Use `glob` to find files by name; use `grep` to find files by content.
|
|
111
|
-
- Output is sorted by mtime descending — recently-touched files come first, so broad patterns still surface relevant files near the top.
|
|
112
|
-
- Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
|
|
113
|
-
DESC
|
|
114
|
-
|
|
115
|
-
# @param workspace [Tool::Workspace] captured for path resolution
|
|
116
|
-
# and as +chdir+ for rg. All path arguments route through
|
|
117
|
-
# +workspace.resolve_for_read+.
|
|
118
|
-
# @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
|
|
119
|
-
# construction rather than the first tool call.
|
|
120
|
-
# @return [Glob]
|
|
121
|
-
def initialize(workspace:)
|
|
122
|
-
Glob.send(:check_binaries!)
|
|
123
|
-
super(
|
|
124
|
-
name: 'glob',
|
|
125
|
-
description: DESCRIPTION,
|
|
126
|
-
parameters: Parameters.build { |p|
|
|
127
|
-
p.required_string :pattern,
|
|
128
|
-
'Glob pattern (** matches any number of ' \
|
|
129
|
-
'directories; {a,b} alternation), e.g. ' \
|
|
130
|
-
'"**/*.rb" or "lib/**/*_spec.rb".'
|
|
131
|
-
p.optional_string :path,
|
|
132
|
-
'Directory to search in. Relative paths resolve ' \
|
|
133
|
-
'against the workspace root. Defaults to the ' \
|
|
134
|
-
'workspace root, e.g. "lib/" or "spec/".'
|
|
135
|
-
},
|
|
136
|
-
execute: lambda { |pattern:, path: nil|
|
|
137
|
-
Glob.search(workspace: workspace, pattern: pattern, path: path)
|
|
138
|
-
}
|
|
139
|
-
)
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
# Validate inputs, resolve the path against the workspace, spawn
|
|
143
|
-
# rg, mtime-sort, head-truncate, render. Returns either the
|
|
144
|
-
# formatted listing, a "no files match" message, or
|
|
145
|
-
# +"Error: ..."+.
|
|
146
|
-
#
|
|
147
|
-
# @param workspace [Tool::Workspace]
|
|
148
|
-
# @param pattern [String]
|
|
149
|
-
# @param path [String, nil]
|
|
150
|
-
# @return [String]
|
|
151
|
-
def self.search(workspace:, pattern:, path:)
|
|
152
|
-
return 'Error: empty pattern.' if pattern.empty?
|
|
153
|
-
|
|
154
|
-
search_target = '.'
|
|
155
|
-
if path
|
|
156
|
-
resolved = workspace.resolve_for_read(path)
|
|
157
|
-
return "Error: path not found: #{path}" unless resolved.exist?
|
|
158
|
-
if resolved.file?
|
|
159
|
-
return "Error: #{path} is a file, not a directory; use the read tool to view it."
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
rel = resolved.relative_path_from(workspace.cwd).to_s
|
|
163
|
-
search_target = rel
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
argv = build_argv(path: search_target)
|
|
167
|
-
result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
|
|
168
|
-
exit_code = result.status.exitstatus
|
|
169
|
-
|
|
170
|
-
case exit_code
|
|
171
|
-
when 0
|
|
172
|
-
format_output(result.output, workspace: workspace,
|
|
173
|
-
pattern: pattern, path: path)
|
|
174
|
-
when 1
|
|
175
|
-
no_match_message(pattern: pattern, path: path)
|
|
176
|
-
else
|
|
177
|
-
stderr = result.output.strip
|
|
178
|
-
stderr = "exited #{exit_code}" if stderr.empty?
|
|
179
|
-
"Error: ripgrep: #{stderr}"
|
|
180
|
-
end
|
|
181
|
-
rescue Tool::Workspace::Error => e
|
|
182
|
-
"Error: #{e.message}"
|
|
183
|
-
end
|
|
184
|
-
|
|
185
|
-
# @return [Integer] flags for {File.fnmatch?}: +FNM_PATHNAME+ for
|
|
186
|
-
# +**+ recursion + path-aware +/+ matching, +FNM_EXTGLOB+ for
|
|
187
|
-
# +{a,b}+ alternation, +FNM_DOTMATCH+ to match dotfiles (rg
|
|
188
|
-
# does this when +--hidden+ is set).
|
|
189
|
-
FNMATCH_FLAGS = File::FNM_PATHNAME | File::FNM_EXTGLOB | File::FNM_DOTMATCH
|
|
190
|
-
|
|
191
|
-
# Build the +rg+ argv. User pattern is NOT passed to rg — see
|
|
192
|
-
# the class header for why (rg's +--glob+ overrides
|
|
193
|
-
# +.gitignore+).
|
|
194
|
-
#
|
|
195
|
-
# @return [Array<String>]
|
|
196
|
-
def self.build_argv(path:)
|
|
197
|
-
[
|
|
198
|
-
'rg',
|
|
199
|
-
'--files',
|
|
200
|
-
'--color=never',
|
|
201
|
-
'--hidden',
|
|
202
|
-
'--glob', '!.git/*',
|
|
203
|
-
'--', path
|
|
204
|
-
]
|
|
205
|
-
end
|
|
206
|
-
private_class_method :build_argv
|
|
207
|
-
|
|
208
|
-
# Strip the +./+ prefix rg adds when invoked with +.+ as the
|
|
209
|
-
# search path, filter to the user pattern with +fnmatch+,
|
|
210
|
-
# mtime-sort descending (path ascending as tiebreaker),
|
|
211
|
-
# head-truncate at {MAX_BYTES}, append a footer summarizing the
|
|
212
|
-
# count.
|
|
213
|
-
#
|
|
214
|
-
# @return [String]
|
|
215
|
-
def self.format_output(raw, workspace:, pattern:, path:)
|
|
216
|
-
all_paths = raw.split("\n").reject(&:empty?).map { |p| p.sub(%r{\A\./}, '') }
|
|
217
|
-
paths = all_paths.select { |p| File.fnmatch?(pattern, p, FNMATCH_FLAGS) }
|
|
218
|
-
return no_match_message(pattern: pattern, path: path) if paths.empty?
|
|
219
|
-
|
|
220
|
-
sorted = mtime_sort(paths, workspace.cwd)
|
|
221
|
-
joined = sorted.join("\n") + "\n"
|
|
222
|
-
content, truncation_marker = head_truncate(joined)
|
|
223
|
-
stripped = content.chomp
|
|
224
|
-
count = stripped.split("\n").size
|
|
225
|
-
|
|
226
|
-
footer = "Found #{pluralize(count, 'file', 'files')}."
|
|
227
|
-
[stripped, '', footer + truncation_marker].join("\n")
|
|
228
|
-
end
|
|
229
|
-
private_class_method :format_output
|
|
230
|
-
|
|
231
|
-
# mtime descending; path ascending for stable order on ties.
|
|
232
|
-
#
|
|
233
|
-
# @return [Array<String>]
|
|
234
|
-
def self.mtime_sort(paths, cwd)
|
|
235
|
-
paths
|
|
236
|
-
.map { |p| [p, mtime_of(cwd + p)] }
|
|
237
|
-
.sort_by { |(p, m)| [-m, p] }
|
|
238
|
-
.map(&:first)
|
|
239
|
-
end
|
|
240
|
-
private_class_method :mtime_sort
|
|
241
|
-
|
|
242
|
-
# @return [Float] epoch-seconds mtime; 0 for paths we can't stat
|
|
243
|
-
# (race between rg listing and our stat, deleted symlinks,
|
|
244
|
-
# etc.). The fallback puts unstattable entries at the bottom.
|
|
245
|
-
def self.mtime_of(absolute)
|
|
246
|
-
File.mtime(absolute).to_f
|
|
247
|
-
rescue Errno::ENOENT
|
|
248
|
-
0.0
|
|
249
|
-
end
|
|
250
|
-
private_class_method :mtime_of
|
|
251
|
-
|
|
252
|
-
# Head-truncate +raw+ to {MAX_BYTES}, cutting at the last newline
|
|
253
|
-
# boundary so the final row is never partial. Returns the
|
|
254
|
-
# truncated content and a marker String (empty if no truncation).
|
|
255
|
-
#
|
|
256
|
-
# @return [Array(String, String)]
|
|
257
|
-
def self.head_truncate(raw)
|
|
258
|
-
total = raw.bytesize
|
|
259
|
-
return [raw, ''] if total <= MAX_BYTES
|
|
260
|
-
|
|
261
|
-
head = raw.byteslice(0, MAX_BYTES)
|
|
262
|
-
last_nl = head.rindex("\n")
|
|
263
|
-
head = head.byteslice(0, last_nl) if last_nl
|
|
264
|
-
omitted = total - head.bytesize
|
|
265
|
-
marker = "\n\n... [#{omitted} bytes omitted; total was #{total} bytes; " \
|
|
266
|
-
'refine pattern or path] ...'
|
|
267
|
-
[head, marker]
|
|
268
|
-
end
|
|
269
|
-
private_class_method :head_truncate
|
|
270
|
-
|
|
271
|
-
# @return [String]
|
|
272
|
-
def self.no_match_message(pattern:, path:)
|
|
273
|
-
base = "No files match pattern '#{pattern}'"
|
|
274
|
-
base += " in #{path}" if path
|
|
275
|
-
"#{base}."
|
|
276
|
-
end
|
|
277
|
-
private_class_method :no_match_message
|
|
278
|
-
|
|
279
|
-
# @return [String] +"1 file"+ / +"2 files"+
|
|
280
|
-
def self.pluralize(n, sing, plural)
|
|
281
|
-
"#{n} #{n == 1 ? sing : plural}"
|
|
282
|
-
end
|
|
283
|
-
private_class_method :pluralize
|
|
284
|
-
|
|
285
|
-
# Verify +rg+ is reachable on +PATH+. Routed through
|
|
286
|
-
# {Pikuri::Subprocess.spawn} to honor the subprocess seam. rg
|
|
287
|
-
# missing surfaces as +Errno::ENOENT+; an installed rg returns
|
|
288
|
-
# exit 0 from +--version+.
|
|
289
|
-
#
|
|
290
|
-
# @return [void]
|
|
291
|
-
# @raise [RuntimeError] if rg is missing
|
|
292
|
-
def self.check_binaries!
|
|
293
|
-
result = Pikuri::Subprocess.spawn('rg', '--version', chdir: '/').wait
|
|
294
|
-
return if result.status.success?
|
|
295
|
-
|
|
296
|
-
raise install_hint
|
|
297
|
-
rescue Errno::ENOENT
|
|
298
|
-
raise install_hint
|
|
299
|
-
end
|
|
300
|
-
private_class_method :check_binaries!
|
|
301
|
-
|
|
302
|
-
# @return [String]
|
|
303
|
-
def self.install_hint
|
|
304
|
-
"Tool::Glob requires 'rg' (ripgrep) on PATH; install via your " \
|
|
305
|
-
"distro's package manager (e.g. 'apt install ripgrep')."
|
|
306
|
-
end
|
|
307
|
-
private_class_method :install_hint
|
|
308
|
-
end
|
|
309
|
-
end
|
|
310
|
-
end
|