pikuri 0.0.1 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +43 -179
  3. data/lib/pikuri.rb +16 -162
  4. metadata +66 -123
  5. data/CHANGELOG.md +0 -62
  6. data/GETTING_STARTED.md +0 -223
  7. data/LICENSE +0 -21
  8. data/lib/pikuri/agent/chat_transport.rb +0 -41
  9. data/lib/pikuri/agent/context_window_detector.rb +0 -101
  10. data/lib/pikuri/agent/listener/in_memory_message_list.rb +0 -33
  11. data/lib/pikuri/agent/listener/message_listener.rb +0 -93
  12. data/lib/pikuri/agent/listener/step_limit.rb +0 -97
  13. data/lib/pikuri/agent/listener/terminal.rb +0 -137
  14. data/lib/pikuri/agent/listener/token_log.rb +0 -166
  15. data/lib/pikuri/agent/listener_list.rb +0 -113
  16. data/lib/pikuri/agent/message.rb +0 -61
  17. data/lib/pikuri/agent/synthesizer.rb +0 -120
  18. data/lib/pikuri/agent/tokens.rb +0 -56
  19. data/lib/pikuri/agent.rb +0 -286
  20. data/lib/pikuri/subprocess.rb +0 -166
  21. data/lib/pikuri/tool/bash.rb +0 -272
  22. data/lib/pikuri/tool/calculator.rb +0 -82
  23. data/lib/pikuri/tool/confirmer.rb +0 -96
  24. data/lib/pikuri/tool/edit.rb +0 -196
  25. data/lib/pikuri/tool/fetch.rb +0 -167
  26. data/lib/pikuri/tool/glob.rb +0 -310
  27. data/lib/pikuri/tool/grep.rb +0 -338
  28. data/lib/pikuri/tool/parameters.rb +0 -314
  29. data/lib/pikuri/tool/read.rb +0 -254
  30. data/lib/pikuri/tool/scraper/fetch_error.rb +0 -16
  31. data/lib/pikuri/tool/scraper/html.rb +0 -285
  32. data/lib/pikuri/tool/scraper/pdf.rb +0 -54
  33. data/lib/pikuri/tool/scraper/simple.rb +0 -177
  34. data/lib/pikuri/tool/search/brave.rb +0 -184
  35. data/lib/pikuri/tool/search/duckduckgo.rb +0 -196
  36. data/lib/pikuri/tool/search/engines.rb +0 -154
  37. data/lib/pikuri/tool/search/exa.rb +0 -217
  38. data/lib/pikuri/tool/search/rate_limiter.rb +0 -92
  39. data/lib/pikuri/tool/search/result.rb +0 -29
  40. data/lib/pikuri/tool/skill.rb +0 -80
  41. data/lib/pikuri/tool/skill_catalog.rb +0 -376
  42. data/lib/pikuri/tool/sub_agent.rb +0 -102
  43. data/lib/pikuri/tool/web_scrape.rb +0 -117
  44. data/lib/pikuri/tool/web_search.rb +0 -38
  45. data/lib/pikuri/tool/workspace.rb +0 -150
  46. data/lib/pikuri/tool/write.rb +0 -170
  47. data/lib/pikuri/tool.rb +0 -118
  48. data/lib/pikuri/url_cache.rb +0 -106
  49. data/lib/pikuri/version.rb +0 -10
  50. data/prompts/coding-system-prompt.txt +0 -28
  51. data/prompts/pikuri-chat.txt +0 -15
@@ -1,196 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pikuri
4
- class Tool
5
- # The +edit+ tool — exact-string replacement on an existing file.
6
- # Instantiating +Tool::Edit.new(workspace: ws)+ produces a tool whose
7
- # {Tool#to_ruby_llm_tool} wiring is identical to any bundled tool's.
8
- # Same shape as {Tool::Read} (workspace captured by +execute+; no
9
- # confirmer needed).
10
- #
11
- # == Why no confirmer
12
- #
13
- # The +old_string+ argument is itself an implicit read-check: the
14
- # model can't write a correct +old_string+ without having seen the
15
- # file (via {Tool::Read} or out-of-band), so the blast radius of any
16
- # Edit is bounded by the model's actual knowledge of file state.
17
- # That makes Edit safe to execute without prompting — by contrast,
18
- # {Tool::Write} requires a confirmer because a hallucinated 500-line
19
- # +content+ could clobber unread bytes.
20
- #
21
- # == Matching is strict (no fuzz cascade)
22
- #
23
- # +old_string+ must match the file byte-for-byte. v1 ships *no*
24
- # fallback replacer (no whitespace-normalized, line-trimmed, block-
25
- # anchor, etc.). Predictability beats fuzz: when an Edit fails, the
26
- # model re-reads with {Tool::Read} and retries — clear failure mode,
27
- # no compounding-heuristic risk. opencode runs a 9-replacer cascade
28
- # under the hood despite its own description saying "must match
29
- # exactly"; pi stays strict. We match pi.
30
- #
31
- # == Line endings get normalized
32
- #
33
- # The one structural exception to "strict bytes": files with CRLF
34
- # line endings get matched in LF space, and the original line ending
35
- # is restored on write. Reason: {Tool::Read} renders content via
36
- # +each_line+ + +chomp+, which strips +\r\n+ to +\n+ in what the
37
- # model sees. A pure strict byte-match would then never succeed on
38
- # CRLF files because the model can only ever supply LF. opencode and
39
- # pi both do this normalization for the same reason.
40
- #
41
- # Algorithm:
42
- #
43
- # 1. Detect whether the file contains +\r\n+ anywhere (treat as CRLF).
44
- # 2. Normalize content, +old_string+, and +new_string+ to LF.
45
- # 3. Match + replace in LF space.
46
- # 4. If the file was CRLF, convert +\n+ → +\r\n+ on the way back out.
47
- #
48
- # Caveat: a mixed-line-ending file is treated as CRLF, which means
49
- # any pre-existing bare-LF lines get converted on write. Rare in
50
- # practice; acceptable for v1.
51
- #
52
- # == Refusals
53
- #
54
- # All returned as +"Error: ..."+ observations the LLM can react to:
55
- #
56
- # * Empty +old_string+ → "use the write tool" (keeps Edit/Write roles
57
- # non-overlapping).
58
- # * +old_string+ == +new_string+ → no-op error.
59
- # * +old_string+ not found in file → "must match exactly" error
60
- # pointing at the read tool.
61
- # * +old_string+ found multiple times without +replace_all+ →
62
- # multi-match error suggesting more context or +replace_all+.
63
- # * File missing / is a directory / is binary → respective error.
64
- # * Workspace boundary violation / EACCES → standard rescue path.
65
- class Edit < Tool
66
- # Description shown to the LLM. Follows the opencode-shape (summary
67
- # + +Usage:+ bullets) prescribed by the project's tool-description
68
- # convention. Per-parameter constraints live in the parameter
69
- # descriptions.
70
- #
71
- # @return [String]
72
- DESCRIPTION = <<~DESC
73
- Edit a file by exact-string replacement.
74
-
75
- Usage:
76
- - Use for partial changes to an existing file; for full rewrites or new files use `write` instead.
77
- - `old_string` must match the file byte-for-byte (whitespace and indentation count); re-read the file with `read` if uncertain.
78
- - `old_string` and `new_string` must differ.
79
- - If `old_string` matches multiple times the call fails — add surrounding context to make the match unique, or set `replace_all: true`.
80
- - Cannot create files (rejects empty `old_string` and missing files).
81
- - Binary files are refused.
82
- - CRLF files are matched in LF space; the original line endings are preserved on write.
83
- DESC
84
-
85
- # @param workspace [Tool::Workspace] captured for path resolution;
86
- # all reads/writes route through +workspace.resolve_for_write+
87
- # (Edit modifies, so it uses the write-set even though it doesn't
88
- # create files).
89
- # @return [Edit]
90
- def initialize(workspace:)
91
- super(
92
- name: 'edit',
93
- description: DESCRIPTION,
94
- parameters: Parameters.build { |p|
95
- p.required_string :path,
96
- 'Path to the file to edit. Relative paths ' \
97
- 'resolve against the workspace root, e.g. ' \
98
- '"lib/foo.rb".'
99
- p.required_string :old_string,
100
- 'Exact text to find in the file. Must match ' \
101
- 'byte-for-byte (whitespace counts); must be ' \
102
- 'unique unless replace_all is true. Example: ' \
103
- '"def foo\n bar\nend".'
104
- p.required_string :new_string,
105
- 'Replacement text. Must differ from ' \
106
- 'old_string. Example: "def foo\n baz\nend".'
107
- p.optional_boolean :replace_all,
108
- 'Replace every occurrence of old_string ' \
109
- 'instead of failing on multiple matches. ' \
110
- 'Defaults to false, e.g. true.'
111
- },
112
- execute: ->(path:, old_string:, new_string:, replace_all: false) {
113
- Edit.edit(workspace: workspace, path: path,
114
- old_string: old_string, new_string: new_string,
115
- replace_all: replace_all)
116
- }
117
- )
118
- end
119
-
120
- # Resolve +path+ against +workspace+, run the precondition checks
121
- # (non-empty / non-identical / file exists / not directory / not
122
- # binary), match +old_string+ in line-ending-normalized form, and
123
- # write the result back preserving the file's original line endings.
124
- #
125
- # @param workspace [Tool::Workspace]
126
- # @param path [String] raw path as supplied by the LLM
127
- # @param old_string [String] text to find
128
- # @param new_string [String] text to substitute in
129
- # @param replace_all [Boolean] when true, every occurrence is
130
- # replaced; when false (default) multiple matches are an error
131
- # @return [String] tool observation
132
- def self.edit(workspace:, path:, old_string:, new_string:, replace_all:)
133
- return 'Error: old_string is empty; use the write tool to create or overwrite a file.' if old_string.empty?
134
- return 'Error: old_string and new_string are identical — this edit is a no-op.' if old_string == new_string
135
-
136
- resolved = workspace.resolve_for_write(path)
137
- return "Error: file not found: #{path}" unless resolved.exist?
138
- return "Error: #{path} is a directory" if resolved.directory?
139
-
140
- raw = resolved.binread
141
- sample = raw.byteslice(0, Tool::Read::BINARY_SAMPLE_BYTES)
142
- return "Error: cannot edit binary file: #{path}" if Tool::Read.binary?(sample)
143
-
144
- crlf = raw.include?("\r\n")
145
- content = crlf ? raw.gsub("\r\n", "\n") : raw
146
- needle = normalize_lf(old_string)
147
- patch = normalize_lf(new_string)
148
-
149
- occurrences = content.scan(needle).size
150
- if occurrences.zero?
151
- return "Error: old_string not found in #{path}. It must match the file " \
152
- 'exactly, including whitespace and indentation; re-read with the ' \
153
- 'read tool if uncertain.'
154
- end
155
- if occurrences > 1 && !replace_all
156
- return "Error: old_string matches #{occurrences} times in #{path}. " \
157
- 'Provide more surrounding context to make the match unique, ' \
158
- 'or set replace_all=true to replace all occurrences.'
159
- end
160
-
161
- replaced = replace_all ? occurrences : 1
162
- new_content =
163
- if replace_all
164
- # Block form bypasses gsub's \1 / \& interpolation on the
165
- # replacement String — we want literal substitution.
166
- content.gsub(needle) { patch }
167
- else
168
- idx = content.index(needle)
169
- content.byteslice(0, idx) + patch + content.byteslice(idx + needle.bytesize, content.bytesize - idx - needle.bytesize)
170
- end
171
-
172
- final = crlf ? new_content.gsub("\n", "\r\n") : new_content
173
- resolved.write(final)
174
-
175
- "Edited #{path}: replaced #{replaced} occurrence#{replaced == 1 ? '' : 's'}."
176
- rescue Tool::Workspace::Error => e
177
- "Error: #{e.message}"
178
- rescue Errno::EACCES => e
179
- "Error: cannot edit #{path}: #{e.message}"
180
- end
181
-
182
- # Force a String to BINARY encoding and collapse +\r\n+ → +\n+ so
183
- # all matching/replacement happens in LF space with byte-stable
184
- # comparisons. Applied to the file content, +old_string+, and
185
- # +new_string+ alike — symmetric normalization keeps the byte-match
186
- # semantics consistent across all three inputs.
187
- #
188
- # @param str [String]
189
- # @return [String] BINARY-encoded, CRLF-collapsed copy
190
- def self.normalize_lf(str)
191
- str.b.gsub("\r\n", "\n")
192
- end
193
- private_class_method :normalize_lf
194
- end
195
- end
196
- end
@@ -1,167 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pikuri
4
- class Tool
5
- # Truncation policy and Tool spec for the +fetch+ tool. The HTTP work
6
- # lives in {Tool::Scraper::Simple.fetch}; this module is a thin
7
- # wrapper that accepts only textual content-types, applies a character
8
- # cap so the LLM doesn't drown in long-form bodies, and exposes the
9
- # result to the agent loop in OpenAI tool-call shape.
10
- #
11
- # Sister of {Tool::WebScrape}, but without HTML→Markdown or PDF→text
12
- # extraction: bodies are returned verbatim. Useful for raw textual
13
- # data — JSON APIs, CSV files, +robots.txt+, sitemaps, source files —
14
- # where any rendering pass would corrupt the payload.
15
- module Fetch
16
- # @return [Integer] default character cap on the body returned by
17
- # {.fetch}. Smaller than {Tool::WebScrape::DEFAULT_MAX_CHARS}
18
- # because fetch's content profile is bimodal — most JSON/XML/CSV
19
- # responses are tiny, and the long-tail (large data dumps) is
20
- # better re-requested deliberately than padded into every default.
21
- DEFAULT_MAX_CHARS = 5_000
22
-
23
- # @return [Integer] hard ceiling on the +max_chars+ argument to
24
- # {.fetch}. Matches {Tool::WebScrape::MAX_MAX_CHARS}.
25
- MAX_MAX_CHARS = 100_000
26
-
27
- # Application content-types that are textual in practice and so
28
- # safe to return verbatim to the LLM, despite their +application/+
29
- # prefix making them fail the +text/*+ check. Anything outside
30
- # +text/*+ and this allowlist is refused.
31
- # @return [Array<String>]
32
- TEXTUAL_APPLICATION_TYPES = %w[
33
- application/json
34
- application/xml
35
- application/javascript
36
- application/xhtml+xml
37
- application/rss+xml
38
- application/atom+xml
39
- ].freeze
40
-
41
- # On-disk cache used by {.fetch} to memoize downloads. Defined as a
42
- # method so specs can swap it for an isolated cache or
43
- # {UrlCache::NULL} without touching the shared instance. Lives in
44
- # its own subdir under {UrlCache::ROOT_DIR} so a +fetch+ on a URL
45
- # and a +web_scrape+ on the same URL cannot collide on the same
46
- # cache file (one returns the raw body, the other returns extracted
47
- # Markdown).
48
- #
49
- # @return [UrlCache, #fetch]
50
- CACHE = UrlCache.new(ttl: UrlCache::DEFAULT_TTL, dir: "#{UrlCache::ROOT_DIR}/fetch")
51
- def self.cache
52
- CACHE
53
- end
54
-
55
- # Download +url+ via {Tool::Scraper::Simple.fetch} and return the
56
- # response body verbatim, provided the content-type is one we deem
57
- # textual (any +text/*+, plus the formats listed in
58
- # {TEXTUAL_APPLICATION_TYPES}). Anything else — PDFs, images, other
59
- # binaries — produces an +"Error: ..."+ string in the calculator-
60
- # style convention so the agent loop feeds the failure back to the
61
- # model as the next observation.
62
- #
63
- # The body is cached on disk via {.cache}, keyed by URL, so repeat
64
- # fetches within the cache TTL skip the network. +max_chars+ is not
65
- # part of the cache key — different values for the same URL share
66
- # one entry, and truncation runs after the cache lookup. The cache
67
- # is only populated on success: {Scraper::FetchError} (HTTP non-2xx,
68
- # network failure, redirect-loop exhaustion, refused content-type)
69
- # is caught outside the +cache.fetch+ block, so failure strings are
70
- # never persisted and a retry on the next call hits the network
71
- # again. Other exceptions (parser bugs in our own code) bubble up
72
- # unchanged.
73
- #
74
- # @param url [String] absolute HTTP(S) URL to download
75
- # @param max_chars [Integer] character cap on the returned body.
76
- # Clamped to +[1, {MAX_MAX_CHARS}]+; defaults to
77
- # {DEFAULT_MAX_CHARS}. When the body exceeds the cap, output is
78
- # cut and a marker noting the original length is appended.
79
- # @return [String] response body, possibly truncated, or
80
- # +"Error: ..."+ on a recoverable failure
81
- def self.fetch(url, max_chars: DEFAULT_MAX_CHARS)
82
- max_chars = max_chars.clamp(1, MAX_MAX_CHARS)
83
- body = cache.fetch(url) { download(url) }
84
- truncate(body, max_chars)
85
- rescue Scraper::FetchError => e
86
- "Error: #{e.message}"
87
- end
88
-
89
- # GET +url+ and verify the response's content-type is textual.
90
- # Caller is responsible for caching and truncation; this method
91
- # always hits the network.
92
- #
93
- # @param url [String]
94
- # @return [String] response body
95
- # @raise [Scraper::FetchError] on HTTP non-2xx, network failure,
96
- # redirect-loop exhaustion, missing +Location+ on a 3xx, or a
97
- # non-textual content-type
98
- def self.download(url)
99
- fetched = Scraper::Simple.fetch(url)
100
- return fetched.body if textual?(fetched.content_type)
101
-
102
- raise Scraper::FetchError,
103
- "refused to fetch #{url}: content-type #{fetched.content_type.inspect} " \
104
- 'is not textual (use web_scrape for PDFs or rendered pages)'
105
- end
106
-
107
- # @param content_type [String] normalized content-type (no +charset+
108
- # parameter, lowercased) as produced by {Scraper::Simple.fetch}
109
- # @return [Boolean] true when the content-type is +text/*+ or one
110
- # of {TEXTUAL_APPLICATION_TYPES}
111
- def self.textual?(content_type)
112
- content_type.start_with?('text/') ||
113
- TEXTUAL_APPLICATION_TYPES.include?(content_type)
114
- end
115
-
116
- # Cut +body+ to at most +max_chars+ characters, appending a marker
117
- # describing the original length when truncation actually happens.
118
- # Returns +body+ unchanged if it already fits. Same shape as
119
- # {Tool::WebScrape.truncate} so the LLM sees a consistent
120
- # truncation marker across both tools.
121
- #
122
- # @param body [String] full response body
123
- # @param max_chars [Integer] character cap; assumed already clamped
124
- # @return [String]
125
- def self.truncate(body, max_chars)
126
- return body if body.length <= max_chars
127
-
128
- "#{body[0, max_chars]}\n\n" \
129
- "... [truncated at #{max_chars} of #{body.length} chars; " \
130
- 'call again with a larger `max_chars` to see more]'
131
- end
132
- end
133
-
134
- # Verbatim URL download tool. Thin wrapper over {Tool::Fetch.fetch}
135
- # that exposes it to the agent loop in OpenAI tool-call shape. Use for
136
- # raw textual payloads (JSON APIs, CSV files, +robots.txt+, source
137
- # files); use {Tool::WEB_SCRAPE} for rendered web pages or PDFs where
138
- # readability extraction makes the result usable.
139
- #
140
- # @return [Tool]
141
- FETCH = new(
142
- name: 'fetch',
143
- description: <<~DESC,
144
- Downloads the given URL and returns its body verbatim.
145
-
146
- Usage:
147
- - Use for raw textual payloads: JSON APIs, CSV files, robots.txt, sitemaps, source files — anywhere a rendering pass would corrupt the data.
148
- - For rendered HTML pages or PDFs, use web_scrape — it extracts readable content; fetch returns the raw HTML/PDF bytes unchanged.
149
- - Accepts text/* and common textual application/* types (JSON, XML, JS, XHTML, RSS, Atom). Refuses PDFs, images, and other binaries.
150
- DESC
151
- parameters: Parameters.build { |p|
152
- p.required_string :url,
153
- 'Absolute URL to download, including the scheme, ' \
154
- 'e.g. "https://example.com/data.json".'
155
- p.optional_integer :max_chars,
156
- 'Maximum number of characters of the body to ' \
157
- 'return. Defaults to 5000; hard-capped at ' \
158
- '100000. When the body is longer than this, ' \
159
- 'output is cut and a marker reports the full ' \
160
- 'length.'
161
- },
162
- execute: ->(url:, max_chars: Fetch::DEFAULT_MAX_CHARS) {
163
- Fetch.fetch(url, max_chars: max_chars)
164
- }
165
- )
166
- end
167
- end
@@ -1,310 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pikuri
4
- class Tool
5
- # The +glob+ tool — list files matching a glob pattern via
6
- # +rg --files+, sorted by modification time (newest first).
7
- # Instantiating +Tool::Glob.new(workspace: ws)+ produces a tool
8
- # whose {Tool#to_ruby_llm_tool} wiring is identical to any bundled
9
- # tool's. Same shape as {Tool::Grep} (workspace captured by the
10
- # +execute+ closure, no confirmer — read-only).
11
- #
12
- # == Why a separate tool from Grep
13
- #
14
- # The unique capability is *mtime-descending sort* — "what's been
15
- # touched recently" is a common navigation move and Grep can't
16
- # express it. The rest (filter by name, default to listing all
17
- # matching files) is theoretically reachable through Grep with
18
- # +pattern="."+, but Glob avoids that hack and keeps Read / Grep /
19
- # Glob as three clean roles: read one file, search content, list
20
- # files by name.
21
- #
22
- # == ripgrep dependency
23
- #
24
- # Hard dependency: {.check_binaries!} runs in +initialize+ and
25
- # raises if +rg+ isn't on +PATH+. Each tool owns its own probe so
26
- # construction order doesn't matter — Glob doesn't lean on Grep's
27
- # check.
28
- #
29
- # == Argv & filter pipeline
30
- #
31
- # rg --files --color=never --hidden --glob '!.git/*' \
32
- # -- <relative-path-or-dot>
33
- # # …then filter the result list in Ruby with File.fnmatch?
34
- #
35
- # Why not pass the user pattern as +--glob+ to rg? Because rg's
36
- # +--glob+ documentation says *"This always overrides any other
37
- # ignore logic"* — so +--glob '**/*.rb'+ would re-include
38
- # +.gitignore+'d Ruby files, breaking our gitignore-respect
39
- # promise. We let rg produce the full gitignore-respecting file
40
- # list and filter to the user's pattern in Ruby with
41
- # +File.fnmatch?(pattern, p, FNM_PATHNAME | FNM_EXTGLOB |
42
- # FNM_DOTMATCH)+. The three flags together cover the common rg
43
- # glob cases: +**+ recursion (+FNM_PATHNAME+), +{a,b}+ alternation
44
- # (+FNM_EXTGLOB+), and dotfile inclusion (+FNM_DOTMATCH+, matching
45
- # rg's +--hidden+ behavior). The +.git/+ exclusion stays on the rg
46
- # side so its contents never even reach the Ruby filter.
47
- #
48
- # * +--hidden+ → search dotfiles (still respects +.gitignore+).
49
- # * No +--sort+ flag: we re-sort by mtime in Ruby on the way out.
50
- # * Output paths come back as +./...+ when the search path is +.+;
51
- # the leading +./+ is stripped post-rg so the model sees clean
52
- # workspace-relative paths.
53
- #
54
- # == Sort
55
- #
56
- # mtime-descending in Ruby after rg returns, with path-ascending
57
- # as a tiebreaker for files with equal mtimes (the common case in
58
- # fresh checkouts). Cost: one +stat+ per result. Broad patterns
59
- # can make this expensive, but in practice rg's +.gitignore+ filter
60
- # keeps result sets bounded; if real friction shows up later we can
61
- # cap pre-sort.
62
- #
63
- # == Truncation
64
- #
65
- # Total output head-truncated to {MAX_BYTES} *after* mtime sort, so
66
- # the kept rows are the newest. Matches {Tool::Grep}'s budget and
67
- # head-bias.
68
- #
69
- # == Exit codes
70
- #
71
- # * +0+ → at least one file; format with footer.
72
- # * +1+ → no files; return +"No files match pattern '...'"+.
73
- # * +2+ → rg error (bad path, bad glob); return
74
- # +"Error: ripgrep: ..."+.
75
- #
76
- # == Refusals
77
- #
78
- # All returned as +"Error: ..."+ observations:
79
- #
80
- # * Empty +pattern+ → fast reject.
81
- # * +path+ is a regular file → fast reject pointing at the +read+
82
- # tool.
83
- # * +path+ not found → +"Error: path not found: <path>"+.
84
- # * +path+ outside the workspace → caught from
85
- # {Tool::Workspace::Error}.
86
- class Glob < Tool
87
- # @return [Integer] hard byte cap on combined rg output. Same
88
- # value as {Tool::Grep::MAX_BYTES} so the two file-touching
89
- # tools share a budget shape. Re-declared here rather than
90
- # referenced cross-file because Zeitwerk's eager-load order
91
- # isn't guaranteed between siblings.
92
- MAX_BYTES = 50 * 1024
93
-
94
- # @return [String] human-readable form of {MAX_BYTES} for the
95
- # truncation marker.
96
- MAX_BYTES_LABEL = "#{MAX_BYTES / 1024} KB"
97
-
98
- # Description shown to the LLM. opencode-shape (summary +
99
- # +Usage:+ bullets). Per-parameter constraints live in parameter
100
- # descriptions.
101
- #
102
- # @return [String]
103
- DESCRIPTION = <<~DESC
104
- List files matching a glob pattern, sorted by modification time (newest first).
105
-
106
- Usage:
107
- - `.gitignore` is respected; for unfiltered listing use bash `rg --no-ignore --files -g <pattern>`.
108
- - Glob syntax: `**` matches any number of directories, `*` matches any filename chars (not `/`), `{a,b}` is alternation.
109
- - Default search root is the workspace root; pass `path` to narrow to a subdirectory.
110
- - Use `glob` to find files by name; use `grep` to find files by content.
111
- - Output is sorted by mtime descending — recently-touched files come first, so broad patterns still surface relevant files near the top.
112
- - Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
113
- DESC
114
-
115
- # @param workspace [Tool::Workspace] captured for path resolution
116
- # and as +chdir+ for rg. All path arguments route through
117
- # +workspace.resolve_for_read+.
118
- # @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
119
- # construction rather than the first tool call.
120
- # @return [Glob]
121
- def initialize(workspace:)
122
- Glob.send(:check_binaries!)
123
- super(
124
- name: 'glob',
125
- description: DESCRIPTION,
126
- parameters: Parameters.build { |p|
127
- p.required_string :pattern,
128
- 'Glob pattern (** matches any number of ' \
129
- 'directories; {a,b} alternation), e.g. ' \
130
- '"**/*.rb" or "lib/**/*_spec.rb".'
131
- p.optional_string :path,
132
- 'Directory to search in. Relative paths resolve ' \
133
- 'against the workspace root. Defaults to the ' \
134
- 'workspace root, e.g. "lib/" or "spec/".'
135
- },
136
- execute: lambda { |pattern:, path: nil|
137
- Glob.search(workspace: workspace, pattern: pattern, path: path)
138
- }
139
- )
140
- end
141
-
142
- # Validate inputs, resolve the path against the workspace, spawn
143
- # rg, mtime-sort, head-truncate, render. Returns either the
144
- # formatted listing, a "no files match" message, or
145
- # +"Error: ..."+.
146
- #
147
- # @param workspace [Tool::Workspace]
148
- # @param pattern [String]
149
- # @param path [String, nil]
150
- # @return [String]
151
- def self.search(workspace:, pattern:, path:)
152
- return 'Error: empty pattern.' if pattern.empty?
153
-
154
- search_target = '.'
155
- if path
156
- resolved = workspace.resolve_for_read(path)
157
- return "Error: path not found: #{path}" unless resolved.exist?
158
- if resolved.file?
159
- return "Error: #{path} is a file, not a directory; use the read tool to view it."
160
- end
161
-
162
- rel = resolved.relative_path_from(workspace.cwd).to_s
163
- search_target = rel
164
- end
165
-
166
- argv = build_argv(path: search_target)
167
- result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
168
- exit_code = result.status.exitstatus
169
-
170
- case exit_code
171
- when 0
172
- format_output(result.output, workspace: workspace,
173
- pattern: pattern, path: path)
174
- when 1
175
- no_match_message(pattern: pattern, path: path)
176
- else
177
- stderr = result.output.strip
178
- stderr = "exited #{exit_code}" if stderr.empty?
179
- "Error: ripgrep: #{stderr}"
180
- end
181
- rescue Tool::Workspace::Error => e
182
- "Error: #{e.message}"
183
- end
184
-
185
- # @return [Integer] flags for {File.fnmatch?}: +FNM_PATHNAME+ for
186
- # +**+ recursion + path-aware +/+ matching, +FNM_EXTGLOB+ for
187
- # +{a,b}+ alternation, +FNM_DOTMATCH+ to match dotfiles (rg
188
- # does this when +--hidden+ is set).
189
- FNMATCH_FLAGS = File::FNM_PATHNAME | File::FNM_EXTGLOB | File::FNM_DOTMATCH
190
-
191
- # Build the +rg+ argv. User pattern is NOT passed to rg — see
192
- # the class header for why (rg's +--glob+ overrides
193
- # +.gitignore+).
194
- #
195
- # @return [Array<String>]
196
- def self.build_argv(path:)
197
- [
198
- 'rg',
199
- '--files',
200
- '--color=never',
201
- '--hidden',
202
- '--glob', '!.git/*',
203
- '--', path
204
- ]
205
- end
206
- private_class_method :build_argv
207
-
208
- # Strip the +./+ prefix rg adds when invoked with +.+ as the
209
- # search path, filter to the user pattern with +fnmatch+,
210
- # mtime-sort descending (path ascending as tiebreaker),
211
- # head-truncate at {MAX_BYTES}, append a footer summarizing the
212
- # count.
213
- #
214
- # @return [String]
215
- def self.format_output(raw, workspace:, pattern:, path:)
216
- all_paths = raw.split("\n").reject(&:empty?).map { |p| p.sub(%r{\A\./}, '') }
217
- paths = all_paths.select { |p| File.fnmatch?(pattern, p, FNMATCH_FLAGS) }
218
- return no_match_message(pattern: pattern, path: path) if paths.empty?
219
-
220
- sorted = mtime_sort(paths, workspace.cwd)
221
- joined = sorted.join("\n") + "\n"
222
- content, truncation_marker = head_truncate(joined)
223
- stripped = content.chomp
224
- count = stripped.split("\n").size
225
-
226
- footer = "Found #{pluralize(count, 'file', 'files')}."
227
- [stripped, '', footer + truncation_marker].join("\n")
228
- end
229
- private_class_method :format_output
230
-
231
- # mtime descending; path ascending for stable order on ties.
232
- #
233
- # @return [Array<String>]
234
- def self.mtime_sort(paths, cwd)
235
- paths
236
- .map { |p| [p, mtime_of(cwd + p)] }
237
- .sort_by { |(p, m)| [-m, p] }
238
- .map(&:first)
239
- end
240
- private_class_method :mtime_sort
241
-
242
- # @return [Float] epoch-seconds mtime; 0 for paths we can't stat
243
- # (race between rg listing and our stat, deleted symlinks,
244
- # etc.). The fallback puts unstattable entries at the bottom.
245
- def self.mtime_of(absolute)
246
- File.mtime(absolute).to_f
247
- rescue Errno::ENOENT
248
- 0.0
249
- end
250
- private_class_method :mtime_of
251
-
252
- # Head-truncate +raw+ to {MAX_BYTES}, cutting at the last newline
253
- # boundary so the final row is never partial. Returns the
254
- # truncated content and a marker String (empty if no truncation).
255
- #
256
- # @return [Array(String, String)]
257
- def self.head_truncate(raw)
258
- total = raw.bytesize
259
- return [raw, ''] if total <= MAX_BYTES
260
-
261
- head = raw.byteslice(0, MAX_BYTES)
262
- last_nl = head.rindex("\n")
263
- head = head.byteslice(0, last_nl) if last_nl
264
- omitted = total - head.bytesize
265
- marker = "\n\n... [#{omitted} bytes omitted; total was #{total} bytes; " \
266
- 'refine pattern or path] ...'
267
- [head, marker]
268
- end
269
- private_class_method :head_truncate
270
-
271
- # @return [String]
272
- def self.no_match_message(pattern:, path:)
273
- base = "No files match pattern '#{pattern}'"
274
- base += " in #{path}" if path
275
- "#{base}."
276
- end
277
- private_class_method :no_match_message
278
-
279
- # @return [String] +"1 file"+ / +"2 files"+
280
- def self.pluralize(n, sing, plural)
281
- "#{n} #{n == 1 ? sing : plural}"
282
- end
283
- private_class_method :pluralize
284
-
285
- # Verify +rg+ is reachable on +PATH+. Routed through
286
- # {Pikuri::Subprocess.spawn} to honor the subprocess seam. rg
287
- # missing surfaces as +Errno::ENOENT+; an installed rg returns
288
- # exit 0 from +--version+.
289
- #
290
- # @return [void]
291
- # @raise [RuntimeError] if rg is missing
292
- def self.check_binaries!
293
- result = Pikuri::Subprocess.spawn('rg', '--version', chdir: '/').wait
294
- return if result.status.success?
295
-
296
- raise install_hint
297
- rescue Errno::ENOENT
298
- raise install_hint
299
- end
300
- private_class_method :check_binaries!
301
-
302
- # @return [String]
303
- def self.install_hint
304
- "Tool::Glob requires 'rg' (ripgrep) on PATH; install via your " \
305
- "distro's package manager (e.g. 'apt install ripgrep')."
306
- end
307
- private_class_method :install_hint
308
- end
309
- end
310
- end