pikuri-workspace 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +50 -0
- data/lib/pikuri/tool/confirmer.rb +96 -0
- data/lib/pikuri/tool/edit.rb +196 -0
- data/lib/pikuri/tool/glob.rb +310 -0
- data/lib/pikuri/tool/grep.rb +338 -0
- data/lib/pikuri/tool/read.rb +254 -0
- data/lib/pikuri/tool/workspace.rb +150 -0
- data/lib/pikuri/tool/write.rb +170 -0
- data/lib/pikuri-workspace.rb +27 -0
- metadata +80 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 7cd37f2cda8958c1a0098160a71b6a0a3e375cd6f8169c298909950a8c6fdb87
|
|
4
|
+
data.tar.gz: fb99c3d2b8dcb886f5f5e93bb6562bf2e87b8bad0d44cdf71da2ece883ef3270
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 4a79642151199e5e4ceeefda2130c270de29015350fa5a128d54d1a692fac9e37949363db2f4222d635d7619e09409140d6bad41c7cc841f1b9f1cde58762fc0
|
|
7
|
+
data.tar.gz: eb45fe6977711e452f399f58a9e4f0d80450e62008c1af32fc6c0d9fe04d7b3c63b8149c685f296dbed1de3e9f9cd883dea13714e23baa87681abeb28eb02f05
|
data/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# pikuri-workspace
|
|
2
|
+
|
|
3
|
+
Filesystem tools + Workspace/Confirmer seams for the
|
|
4
|
+
[pikuri](https://codeberg.org/mvysny/pikuri) AI-assistant toolkit.
|
|
5
|
+
|
|
6
|
+
Self-contained "operate on a directory tree" toolkit:
|
|
7
|
+
- `Pikuri::Tool::Workspace` — abstract base + bundled
|
|
8
|
+
`Workspace::Cwd` that scopes filesystem access to a chosen root,
|
|
9
|
+
rejecting `..`-escapes and symlinks that resolve outside the root.
|
|
10
|
+
- `Pikuri::Tool::Confirmer` — abstract base + `AUTO_APPROVE` /
|
|
11
|
+
`TERMINAL` for user-state mutations.
|
|
12
|
+
- Five file tools: `Pikuri::Tool::Read`, `Pikuri::Tool::Write`,
|
|
13
|
+
`Pikuri::Tool::Edit`, `Pikuri::Tool::Grep`, `Pikuri::Tool::Glob`.
|
|
14
|
+
|
|
15
|
+
No shell execution — `Pikuri::Tool::Bash` ships in
|
|
16
|
+
[`pikuri-code`](../pikuri-code/README.md) on top of these.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```ruby
|
|
21
|
+
# Gemfile
|
|
22
|
+
gem 'pikuri-workspace'
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
require 'pikuri-core'
|
|
29
|
+
require 'pikuri-workspace'
|
|
30
|
+
|
|
31
|
+
workspace = Pikuri::Tool::Workspace::Cwd.new(root: Dir.pwd)
|
|
32
|
+
confirmer = Pikuri::Tool::Confirmer::TERMINAL
|
|
33
|
+
|
|
34
|
+
agent = Pikuri::Agent.new(
|
|
35
|
+
transport: ...,
|
|
36
|
+
system_prompt: ...,
|
|
37
|
+
) do |c|
|
|
38
|
+
c.add_tool Pikuri::Tool::Read.new(workspace: workspace)
|
|
39
|
+
c.add_tool Pikuri::Tool::Grep.new(workspace: workspace)
|
|
40
|
+
c.add_tool Pikuri::Tool::Glob.new(workspace: workspace)
|
|
41
|
+
c.add_tool Pikuri::Tool::Edit.new(workspace: workspace)
|
|
42
|
+
c.add_tool Pikuri::Tool::Write.new(workspace: workspace, confirmer: confirmer)
|
|
43
|
+
c.add_listener ...
|
|
44
|
+
end
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
`Workspace::Cwd` is the "look-but-don't-leak" guard around filesystem
|
|
48
|
+
access. Read tools route through `#resolve_for_read(path)`; mutating
|
|
49
|
+
tools route through `#resolve_for_write(path)` + the Confirmer's
|
|
50
|
+
`#confirm?(prompt:)`.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Tool
|
|
5
|
+
# Port for asking the user to confirm a potentially destructive tool
|
|
6
|
+
# operation — currently {Tool::Bash} (every command) and
|
|
7
|
+
# {Tool::Write} (overwrite of an existing file with non-identical
|
|
8
|
+
# content). Subclass and implement {#confirm?}.
|
|
9
|
+
#
|
|
10
|
+
# == Why a Boolean return
|
|
11
|
+
#
|
|
12
|
+
# v1 returns +true+ or +false+. Two paths-not-taken worth recording
|
|
13
|
+
# so a future reader knows the design space was considered:
|
|
14
|
+
#
|
|
15
|
+
# 1. Richer return (+:once+ / +:always+ / +:reject+) — rejected
|
|
16
|
+
# because it creates decision fatigue, and the long-term answer
|
|
17
|
+
# is to make confirmations rare rather than smart (sandboxing,
|
|
18
|
+
# agentic destructiveness analysis).
|
|
19
|
+
# 2. Agentic destructive-or-not classifier — deferred to v2.
|
|
20
|
+
#
|
|
21
|
+
# The intended escape from confirmation prompts today is sandboxing
|
|
22
|
+
# (docker / dev-container) plus the +--yolo+ flag on +bin/pikuri-code+
|
|
23
|
+
# (which wires {AUTO_APPROVE} instead of {TERMINAL}).
|
|
24
|
+
#
|
|
25
|
+
# == Seam discipline
|
|
26
|
+
#
|
|
27
|
+
# Tools that need confirmation take a {Confirmer} via constructor and
|
|
28
|
+
# invoke {#confirm?} with a fully-composed prompt String. Tools do
|
|
29
|
+
# *not* call +gets+ / +puts+ directly — same lesson as listeners,
|
|
30
|
+
# keep IO at the seam so a future TUI / web client can plug a
|
|
31
|
+
# different implementation in without touching tool code.
|
|
32
|
+
class Confirmer
|
|
33
|
+
# @param prompt [String] human-readable question composed by the
|
|
34
|
+
# calling tool. The confirmer renders it and parses the answer;
|
|
35
|
+
# it does NOT compose its own prompt content. Caller owns the
|
|
36
|
+
# closing punctuation and any "(y/n)" cue.
|
|
37
|
+
# @return [Boolean] +true+ iff approved
|
|
38
|
+
# @raise [NotImplementedError] in the abstract base
|
|
39
|
+
def confirm?(prompt:)
|
|
40
|
+
raise NotImplementedError, "#{self.class}#confirm? must be implemented"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Stdin/stdout implementation: prints +prompt+ on its own line (a
|
|
44
|
+
# leading +puts+ guarantees separation from any streamed output
|
|
45
|
+
# the +Terminal+ listener may have produced just above), reads one
|
|
46
|
+
# line from +$stdin+, parses it strictly:
|
|
47
|
+
#
|
|
48
|
+
# * +"y"+ / +"yes"+ (case-insensitive, stripped) → +true+
|
|
49
|
+
# * +"n"+ / +"no"+ → +false+
|
|
50
|
+
# * EOF / Ctrl+D (+gets+ returns +nil+) → +false+, deliberate abort
|
|
51
|
+
# * anything else (blank, typo, +"maybe"+) → re-prompt with a short
|
|
52
|
+
# "Please answer y or n: " line and loop
|
|
53
|
+
#
|
|
54
|
+
# No retry cap; EOF eventually breaks adversarial input.
|
|
55
|
+
class Terminal < Confirmer
|
|
56
|
+
# @param prompt [String]
|
|
57
|
+
# @return [Boolean]
|
|
58
|
+
def confirm?(prompt:)
|
|
59
|
+
puts
|
|
60
|
+
puts prompt
|
|
61
|
+
$stdout.flush
|
|
62
|
+
loop do
|
|
63
|
+
line = $stdin.gets
|
|
64
|
+
return false if line.nil?
|
|
65
|
+
|
|
66
|
+
answer = line.strip.downcase
|
|
67
|
+
return true if answer == 'y' || answer == 'yes'
|
|
68
|
+
return false if answer == 'n' || answer == 'no'
|
|
69
|
+
|
|
70
|
+
print 'Please answer y or n: '
|
|
71
|
+
$stdout.flush
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Approves everything. Used by +bin/pikuri-code --yolo+ (docker /
|
|
77
|
+
# dev-container mode) and by tool specs that don't want to
|
|
78
|
+
# coordinate stdin. The name +AUTO_APPROVE+ matches the public
|
|
79
|
+
# constant {AUTO_APPROVE}.
|
|
80
|
+
class AutoApprove < Confirmer
|
|
81
|
+
# @param prompt [String] ignored
|
|
82
|
+
# @return [true]
|
|
83
|
+
def confirm?(prompt:)
|
|
84
|
+
true
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Shared singleton instance of {Terminal}. Stateless; reusable
|
|
89
|
+
# across tools and sub-agents.
|
|
90
|
+
TERMINAL = Terminal.new
|
|
91
|
+
|
|
92
|
+
# Shared singleton instance of {AutoApprove}.
|
|
93
|
+
AUTO_APPROVE = AutoApprove.new
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Tool
|
|
5
|
+
# The +edit+ tool — exact-string replacement on an existing file.
|
|
6
|
+
# Instantiating +Tool::Edit.new(workspace: ws)+ produces a tool whose
|
|
7
|
+
# {Tool#to_ruby_llm_tool} wiring is identical to any bundled tool's.
|
|
8
|
+
# Same shape as {Tool::Read} (workspace captured by +execute+; no
|
|
9
|
+
# confirmer needed).
|
|
10
|
+
#
|
|
11
|
+
# == Why no confirmer
|
|
12
|
+
#
|
|
13
|
+
# The +old_string+ argument is itself an implicit read-check: the
|
|
14
|
+
# model can't write a correct +old_string+ without having seen the
|
|
15
|
+
# file (via {Tool::Read} or out-of-band), so the blast radius of any
|
|
16
|
+
# Edit is bounded by the model's actual knowledge of file state.
|
|
17
|
+
# That makes Edit safe to execute without prompting — by contrast,
|
|
18
|
+
# {Tool::Write} requires a confirmer because a hallucinated 500-line
|
|
19
|
+
# +content+ could clobber unread bytes.
|
|
20
|
+
#
|
|
21
|
+
# == Matching is strict (no fuzz cascade)
|
|
22
|
+
#
|
|
23
|
+
# +old_string+ must match the file byte-for-byte. v1 ships *no*
|
|
24
|
+
# fallback replacer (no whitespace-normalized, line-trimmed, block-
|
|
25
|
+
# anchor, etc.). Predictability beats fuzz: when an Edit fails, the
|
|
26
|
+
# model re-reads with {Tool::Read} and retries — clear failure mode,
|
|
27
|
+
# no compounding-heuristic risk. opencode runs a 9-replacer cascade
|
|
28
|
+
# under the hood despite its own description saying "must match
|
|
29
|
+
# exactly"; pi stays strict. We match pi.
|
|
30
|
+
#
|
|
31
|
+
# == Line endings get normalized
|
|
32
|
+
#
|
|
33
|
+
# The one structural exception to "strict bytes": files with CRLF
|
|
34
|
+
# line endings get matched in LF space, and the original line ending
|
|
35
|
+
# is restored on write. Reason: {Tool::Read} renders content via
|
|
36
|
+
# +each_line+ + +chomp+, which strips +\r\n+ to +\n+ in what the
|
|
37
|
+
# model sees. A pure strict byte-match would then never succeed on
|
|
38
|
+
# CRLF files because the model can only ever supply LF. opencode and
|
|
39
|
+
# pi both do this normalization for the same reason.
|
|
40
|
+
#
|
|
41
|
+
# Algorithm:
|
|
42
|
+
#
|
|
43
|
+
# 1. Detect whether the file contains +\r\n+ anywhere (treat as CRLF).
|
|
44
|
+
# 2. Normalize content, +old_string+, and +new_string+ to LF.
|
|
45
|
+
# 3. Match + replace in LF space.
|
|
46
|
+
# 4. If the file was CRLF, convert +\n+ → +\r\n+ on the way back out.
|
|
47
|
+
#
|
|
48
|
+
# Caveat: a mixed-line-ending file is treated as CRLF, which means
|
|
49
|
+
# any pre-existing bare-LF lines get converted on write. Rare in
|
|
50
|
+
# practice; acceptable for v1.
|
|
51
|
+
#
|
|
52
|
+
# == Refusals
|
|
53
|
+
#
|
|
54
|
+
# All returned as +"Error: ..."+ observations the LLM can react to:
|
|
55
|
+
#
|
|
56
|
+
# * Empty +old_string+ → "use the write tool" (keeps Edit/Write roles
|
|
57
|
+
# non-overlapping).
|
|
58
|
+
# * +old_string+ == +new_string+ → no-op error.
|
|
59
|
+
# * +old_string+ not found in file → "must match exactly" error
|
|
60
|
+
# pointing at the read tool.
|
|
61
|
+
# * +old_string+ found multiple times without +replace_all+ →
|
|
62
|
+
# multi-match error suggesting more context or +replace_all+.
|
|
63
|
+
# * File missing / is a directory / is binary → respective error.
|
|
64
|
+
# * Workspace boundary violation / EACCES → standard rescue path.
|
|
65
|
+
class Edit < Tool
|
|
66
|
+
# Description shown to the LLM. Follows the opencode-shape (summary
|
|
67
|
+
# + +Usage:+ bullets) prescribed by the project's tool-description
|
|
68
|
+
# convention. Per-parameter constraints live in the parameter
|
|
69
|
+
# descriptions.
|
|
70
|
+
#
|
|
71
|
+
# @return [String]
|
|
72
|
+
DESCRIPTION = <<~DESC
|
|
73
|
+
Edit a file by exact-string replacement.
|
|
74
|
+
|
|
75
|
+
Usage:
|
|
76
|
+
- Use for partial changes to an existing file; for full rewrites or new files use `write` instead.
|
|
77
|
+
- `old_string` must match the file byte-for-byte (whitespace and indentation count); re-read the file with `read` if uncertain.
|
|
78
|
+
- `old_string` and `new_string` must differ.
|
|
79
|
+
- If `old_string` matches multiple times the call fails — add surrounding context to make the match unique, or set `replace_all: true`.
|
|
80
|
+
- Cannot create files (rejects empty `old_string` and missing files).
|
|
81
|
+
- Binary files are refused.
|
|
82
|
+
- CRLF files are matched in LF space; the original line endings are preserved on write.
|
|
83
|
+
DESC
|
|
84
|
+
|
|
85
|
+
# @param workspace [Tool::Workspace] captured for path resolution;
|
|
86
|
+
# all reads/writes route through +workspace.resolve_for_write+
|
|
87
|
+
# (Edit modifies, so it uses the write-set even though it doesn't
|
|
88
|
+
# create files).
|
|
89
|
+
# @return [Edit]
|
|
90
|
+
def initialize(workspace:)
|
|
91
|
+
super(
|
|
92
|
+
name: 'edit',
|
|
93
|
+
description: DESCRIPTION,
|
|
94
|
+
parameters: Parameters.build { |p|
|
|
95
|
+
p.required_string :path,
|
|
96
|
+
'Path to the file to edit. Relative paths ' \
|
|
97
|
+
'resolve against the workspace root, e.g. ' \
|
|
98
|
+
'"lib/foo.rb".'
|
|
99
|
+
p.required_string :old_string,
|
|
100
|
+
'Exact text to find in the file. Must match ' \
|
|
101
|
+
'byte-for-byte (whitespace counts); must be ' \
|
|
102
|
+
'unique unless replace_all is true. Example: ' \
|
|
103
|
+
'"def foo\n bar\nend".'
|
|
104
|
+
p.required_string :new_string,
|
|
105
|
+
'Replacement text. Must differ from ' \
|
|
106
|
+
'old_string. Example: "def foo\n baz\nend".'
|
|
107
|
+
p.optional_boolean :replace_all,
|
|
108
|
+
'Replace every occurrence of old_string ' \
|
|
109
|
+
'instead of failing on multiple matches. ' \
|
|
110
|
+
'Defaults to false, e.g. true.'
|
|
111
|
+
},
|
|
112
|
+
execute: ->(path:, old_string:, new_string:, replace_all: false) {
|
|
113
|
+
Edit.edit(workspace: workspace, path: path,
|
|
114
|
+
old_string: old_string, new_string: new_string,
|
|
115
|
+
replace_all: replace_all)
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Resolve +path+ against +workspace+, run the precondition checks
|
|
121
|
+
# (non-empty / non-identical / file exists / not directory / not
|
|
122
|
+
# binary), match +old_string+ in line-ending-normalized form, and
|
|
123
|
+
# write the result back preserving the file's original line endings.
|
|
124
|
+
#
|
|
125
|
+
# @param workspace [Tool::Workspace]
|
|
126
|
+
# @param path [String] raw path as supplied by the LLM
|
|
127
|
+
# @param old_string [String] text to find
|
|
128
|
+
# @param new_string [String] text to substitute in
|
|
129
|
+
# @param replace_all [Boolean] when true, every occurrence is
|
|
130
|
+
# replaced; when false (default) multiple matches are an error
|
|
131
|
+
# @return [String] tool observation
|
|
132
|
+
def self.edit(workspace:, path:, old_string:, new_string:, replace_all:)
|
|
133
|
+
return 'Error: old_string is empty; use the write tool to create or overwrite a file.' if old_string.empty?
|
|
134
|
+
return 'Error: old_string and new_string are identical — this edit is a no-op.' if old_string == new_string
|
|
135
|
+
|
|
136
|
+
resolved = workspace.resolve_for_write(path)
|
|
137
|
+
return "Error: file not found: #{path}" unless resolved.exist?
|
|
138
|
+
return "Error: #{path} is a directory" if resolved.directory?
|
|
139
|
+
|
|
140
|
+
raw = resolved.binread
|
|
141
|
+
sample = raw.byteslice(0, Tool::Read::BINARY_SAMPLE_BYTES)
|
|
142
|
+
return "Error: cannot edit binary file: #{path}" if Tool::Read.binary?(sample)
|
|
143
|
+
|
|
144
|
+
crlf = raw.include?("\r\n")
|
|
145
|
+
content = crlf ? raw.gsub("\r\n", "\n") : raw
|
|
146
|
+
needle = normalize_lf(old_string)
|
|
147
|
+
patch = normalize_lf(new_string)
|
|
148
|
+
|
|
149
|
+
occurrences = content.scan(needle).size
|
|
150
|
+
if occurrences.zero?
|
|
151
|
+
return "Error: old_string not found in #{path}. It must match the file " \
|
|
152
|
+
'exactly, including whitespace and indentation; re-read with the ' \
|
|
153
|
+
'read tool if uncertain.'
|
|
154
|
+
end
|
|
155
|
+
if occurrences > 1 && !replace_all
|
|
156
|
+
return "Error: old_string matches #{occurrences} times in #{path}. " \
|
|
157
|
+
'Provide more surrounding context to make the match unique, ' \
|
|
158
|
+
'or set replace_all=true to replace all occurrences.'
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
replaced = replace_all ? occurrences : 1
|
|
162
|
+
new_content =
|
|
163
|
+
if replace_all
|
|
164
|
+
# Block form bypasses gsub's \1 / \& interpolation on the
|
|
165
|
+
# replacement String — we want literal substitution.
|
|
166
|
+
content.gsub(needle) { patch }
|
|
167
|
+
else
|
|
168
|
+
idx = content.index(needle)
|
|
169
|
+
content.byteslice(0, idx) + patch + content.byteslice(idx + needle.bytesize, content.bytesize - idx - needle.bytesize)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
final = crlf ? new_content.gsub("\n", "\r\n") : new_content
|
|
173
|
+
resolved.write(final)
|
|
174
|
+
|
|
175
|
+
"Edited #{path}: replaced #{replaced} occurrence#{replaced == 1 ? '' : 's'}."
|
|
176
|
+
rescue Tool::Workspace::Error => e
|
|
177
|
+
"Error: #{e.message}"
|
|
178
|
+
rescue Errno::EACCES => e
|
|
179
|
+
"Error: cannot edit #{path}: #{e.message}"
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Force a String to BINARY encoding and collapse +\r\n+ → +\n+ so
|
|
183
|
+
# all matching/replacement happens in LF space with byte-stable
|
|
184
|
+
# comparisons. Applied to the file content, +old_string+, and
|
|
185
|
+
# +new_string+ alike — symmetric normalization keeps the byte-match
|
|
186
|
+
# semantics consistent across all three inputs.
|
|
187
|
+
#
|
|
188
|
+
# @param str [String]
|
|
189
|
+
# @return [String] BINARY-encoded, CRLF-collapsed copy
|
|
190
|
+
def self.normalize_lf(str)
|
|
191
|
+
str.b.gsub("\r\n", "\n")
|
|
192
|
+
end
|
|
193
|
+
private_class_method :normalize_lf
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Tool
|
|
5
|
+
# The +glob+ tool — list files matching a glob pattern via
|
|
6
|
+
# +rg --files+, sorted by modification time (newest first).
|
|
7
|
+
# Instantiating +Tool::Glob.new(workspace: ws)+ produces a tool
|
|
8
|
+
# whose {Tool#to_ruby_llm_tool} wiring is identical to any bundled
|
|
9
|
+
# tool's. Same shape as {Tool::Grep} (workspace captured by the
|
|
10
|
+
# +execute+ closure, no confirmer — read-only).
|
|
11
|
+
#
|
|
12
|
+
# == Why a separate tool from Grep
|
|
13
|
+
#
|
|
14
|
+
# The unique capability is *mtime-descending sort* — "what's been
|
|
15
|
+
# touched recently" is a common navigation move and Grep can't
|
|
16
|
+
# express it. The rest (filter by name, default to listing all
|
|
17
|
+
# matching files) is theoretically reachable through Grep with
|
|
18
|
+
# +pattern="."+, but Glob avoids that hack and keeps Read / Grep /
|
|
19
|
+
# Glob as three clean roles: read one file, search content, list
|
|
20
|
+
# files by name.
|
|
21
|
+
#
|
|
22
|
+
# == ripgrep dependency
|
|
23
|
+
#
|
|
24
|
+
# Hard dependency: {.check_binaries!} runs in +initialize+ and
|
|
25
|
+
# raises if +rg+ isn't on +PATH+. Each tool owns its own probe so
|
|
26
|
+
# construction order doesn't matter — Glob doesn't lean on Grep's
|
|
27
|
+
# check.
|
|
28
|
+
#
|
|
29
|
+
# == Argv & filter pipeline
|
|
30
|
+
#
|
|
31
|
+
# rg --files --color=never --hidden --glob '!.git/*' \
|
|
32
|
+
# -- <relative-path-or-dot>
|
|
33
|
+
# # …then filter the result list in Ruby with File.fnmatch?
|
|
34
|
+
#
|
|
35
|
+
# Why not pass the user pattern as +--glob+ to rg? Because rg's
|
|
36
|
+
# +--glob+ documentation says *"This always overrides any other
|
|
37
|
+
# ignore logic"* — so +--glob '**/*.rb'+ would re-include
|
|
38
|
+
# +.gitignore+'d Ruby files, breaking our gitignore-respect
|
|
39
|
+
# promise. We let rg produce the full gitignore-respecting file
|
|
40
|
+
# list and filter to the user's pattern in Ruby with
|
|
41
|
+
# +File.fnmatch?(pattern, p, FNM_PATHNAME | FNM_EXTGLOB |
|
|
42
|
+
# FNM_DOTMATCH)+. The three flags together cover the common rg
|
|
43
|
+
# glob cases: +**+ recursion (+FNM_PATHNAME+), +{a,b}+ alternation
|
|
44
|
+
# (+FNM_EXTGLOB+), and dotfile inclusion (+FNM_DOTMATCH+, matching
|
|
45
|
+
# rg's +--hidden+ behavior). The +.git/+ exclusion stays on the rg
|
|
46
|
+
# side so its contents never even reach the Ruby filter.
|
|
47
|
+
#
|
|
48
|
+
# * +--hidden+ → search dotfiles (still respects +.gitignore+).
|
|
49
|
+
# * No +--sort+ flag: we re-sort by mtime in Ruby on the way out.
|
|
50
|
+
# * Output paths come back as +./...+ when the search path is +.+;
|
|
51
|
+
# the leading +./+ is stripped post-rg so the model sees clean
|
|
52
|
+
# workspace-relative paths.
|
|
53
|
+
#
|
|
54
|
+
# == Sort
|
|
55
|
+
#
|
|
56
|
+
# mtime-descending in Ruby after rg returns, with path-ascending
|
|
57
|
+
# as a tiebreaker for files with equal mtimes (the common case in
|
|
58
|
+
# fresh checkouts). Cost: one +stat+ per result. Broad patterns
|
|
59
|
+
# can make this expensive, but in practice rg's +.gitignore+ filter
|
|
60
|
+
# keeps result sets bounded; if real friction shows up later we can
|
|
61
|
+
# cap pre-sort.
|
|
62
|
+
#
|
|
63
|
+
# == Truncation
|
|
64
|
+
#
|
|
65
|
+
# Total output head-truncated to {MAX_BYTES} *after* mtime sort, so
|
|
66
|
+
# the kept rows are the newest. Matches {Tool::Grep}'s budget and
|
|
67
|
+
# head-bias.
|
|
68
|
+
#
|
|
69
|
+
# == Exit codes
|
|
70
|
+
#
|
|
71
|
+
# * +0+ → at least one file; format with footer.
|
|
72
|
+
# * +1+ → no files; return +"No files match pattern '...'"+.
|
|
73
|
+
# * +2+ → rg error (bad path, bad glob); return
|
|
74
|
+
# +"Error: ripgrep: ..."+.
|
|
75
|
+
#
|
|
76
|
+
# == Refusals
|
|
77
|
+
#
|
|
78
|
+
# All returned as +"Error: ..."+ observations:
|
|
79
|
+
#
|
|
80
|
+
# * Empty +pattern+ → fast reject.
|
|
81
|
+
# * +path+ is a regular file → fast reject pointing at the +read+
|
|
82
|
+
# tool.
|
|
83
|
+
# * +path+ not found → +"Error: path not found: <path>"+.
|
|
84
|
+
# * +path+ outside the workspace → caught from
|
|
85
|
+
# {Tool::Workspace::Error}.
|
|
86
|
+
class Glob < Tool
|
|
87
|
+
# @return [Integer] hard byte cap on combined rg output. Same
|
|
88
|
+
# value as {Tool::Grep::MAX_BYTES} so the two file-touching
|
|
89
|
+
# tools share a budget shape. Re-declared here rather than
|
|
90
|
+
# referenced cross-file because Zeitwerk's eager-load order
|
|
91
|
+
# isn't guaranteed between siblings.
|
|
92
|
+
MAX_BYTES = 50 * 1024
|
|
93
|
+
|
|
94
|
+
# @return [String] human-readable form of {MAX_BYTES} for the
|
|
95
|
+
# truncation marker.
|
|
96
|
+
MAX_BYTES_LABEL = "#{MAX_BYTES / 1024} KB"
|
|
97
|
+
|
|
98
|
+
# Description shown to the LLM. opencode-shape (summary +
|
|
99
|
+
# +Usage:+ bullets). Per-parameter constraints live in parameter
|
|
100
|
+
# descriptions.
|
|
101
|
+
#
|
|
102
|
+
# @return [String]
|
|
103
|
+
DESCRIPTION = <<~DESC
|
|
104
|
+
List files matching a glob pattern, sorted by modification time (newest first).
|
|
105
|
+
|
|
106
|
+
Usage:
|
|
107
|
+
- `.gitignore` is respected; for unfiltered listing use bash `rg --no-ignore --files -g <pattern>`.
|
|
108
|
+
- Glob syntax: `**` matches any number of directories, `*` matches any filename chars (not `/`), `{a,b}` is alternation.
|
|
109
|
+
- Default search root is the workspace root; pass `path` to narrow to a subdirectory.
|
|
110
|
+
- Use `glob` to find files by name; use `grep` to find files by content.
|
|
111
|
+
- Output is sorted by mtime descending — recently-touched files come first, so broad patterns still surface relevant files near the top.
|
|
112
|
+
- Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
|
|
113
|
+
DESC
|
|
114
|
+
|
|
115
|
+
# @param workspace [Tool::Workspace] captured for path resolution
|
|
116
|
+
# and as +chdir+ for rg. All path arguments route through
|
|
117
|
+
# +workspace.resolve_for_read+.
|
|
118
|
+
# @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
|
|
119
|
+
# construction rather than the first tool call.
|
|
120
|
+
# @return [Glob]
|
|
121
|
+
def initialize(workspace:)
|
|
122
|
+
Glob.send(:check_binaries!)
|
|
123
|
+
super(
|
|
124
|
+
name: 'glob',
|
|
125
|
+
description: DESCRIPTION,
|
|
126
|
+
parameters: Parameters.build { |p|
|
|
127
|
+
p.required_string :pattern,
|
|
128
|
+
'Glob pattern (** matches any number of ' \
|
|
129
|
+
'directories; {a,b} alternation), e.g. ' \
|
|
130
|
+
'"**/*.rb" or "lib/**/*_spec.rb".'
|
|
131
|
+
p.optional_string :path,
|
|
132
|
+
'Directory to search in. Relative paths resolve ' \
|
|
133
|
+
'against the workspace root. Defaults to the ' \
|
|
134
|
+
'workspace root, e.g. "lib/" or "spec/".'
|
|
135
|
+
},
|
|
136
|
+
execute: lambda { |pattern:, path: nil|
|
|
137
|
+
Glob.search(workspace: workspace, pattern: pattern, path: path)
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Validate inputs, resolve the path against the workspace, spawn
|
|
143
|
+
# rg, mtime-sort, head-truncate, render. Returns either the
|
|
144
|
+
# formatted listing, a "no files match" message, or
|
|
145
|
+
# +"Error: ..."+.
|
|
146
|
+
#
|
|
147
|
+
# @param workspace [Tool::Workspace]
|
|
148
|
+
# @param pattern [String]
|
|
149
|
+
# @param path [String, nil]
|
|
150
|
+
# @return [String]
|
|
151
|
+
def self.search(workspace:, pattern:, path:)
|
|
152
|
+
return 'Error: empty pattern.' if pattern.empty?
|
|
153
|
+
|
|
154
|
+
search_target = '.'
|
|
155
|
+
if path
|
|
156
|
+
resolved = workspace.resolve_for_read(path)
|
|
157
|
+
return "Error: path not found: #{path}" unless resolved.exist?
|
|
158
|
+
if resolved.file?
|
|
159
|
+
return "Error: #{path} is a file, not a directory; use the read tool to view it."
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
rel = resolved.relative_path_from(workspace.cwd).to_s
|
|
163
|
+
search_target = rel
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
argv = build_argv(path: search_target)
|
|
167
|
+
result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
|
|
168
|
+
exit_code = result.status.exitstatus
|
|
169
|
+
|
|
170
|
+
case exit_code
|
|
171
|
+
when 0
|
|
172
|
+
format_output(result.output, workspace: workspace,
|
|
173
|
+
pattern: pattern, path: path)
|
|
174
|
+
when 1
|
|
175
|
+
no_match_message(pattern: pattern, path: path)
|
|
176
|
+
else
|
|
177
|
+
stderr = result.output.strip
|
|
178
|
+
stderr = "exited #{exit_code}" if stderr.empty?
|
|
179
|
+
"Error: ripgrep: #{stderr}"
|
|
180
|
+
end
|
|
181
|
+
rescue Tool::Workspace::Error => e
|
|
182
|
+
"Error: #{e.message}"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# @return [Integer] flags for {File.fnmatch?}: +FNM_PATHNAME+ for
|
|
186
|
+
# +**+ recursion + path-aware +/+ matching, +FNM_EXTGLOB+ for
|
|
187
|
+
# +{a,b}+ alternation, +FNM_DOTMATCH+ to match dotfiles (rg
|
|
188
|
+
# does this when +--hidden+ is set).
|
|
189
|
+
FNMATCH_FLAGS = File::FNM_PATHNAME | File::FNM_EXTGLOB | File::FNM_DOTMATCH
|
|
190
|
+
|
|
191
|
+
# Build the +rg+ argv. User pattern is NOT passed to rg — see
|
|
192
|
+
# the class header for why (rg's +--glob+ overrides
|
|
193
|
+
# +.gitignore+).
|
|
194
|
+
#
|
|
195
|
+
# @return [Array<String>]
|
|
196
|
+
def self.build_argv(path:)
|
|
197
|
+
[
|
|
198
|
+
'rg',
|
|
199
|
+
'--files',
|
|
200
|
+
'--color=never',
|
|
201
|
+
'--hidden',
|
|
202
|
+
'--glob', '!.git/*',
|
|
203
|
+
'--', path
|
|
204
|
+
]
|
|
205
|
+
end
|
|
206
|
+
private_class_method :build_argv
|
|
207
|
+
|
|
208
|
+
# Strip the +./+ prefix rg adds when invoked with +.+ as the
|
|
209
|
+
# search path, filter to the user pattern with +fnmatch+,
|
|
210
|
+
# mtime-sort descending (path ascending as tiebreaker),
|
|
211
|
+
# head-truncate at {MAX_BYTES}, append a footer summarizing the
|
|
212
|
+
# count.
|
|
213
|
+
#
|
|
214
|
+
# @return [String]
|
|
215
|
+
def self.format_output(raw, workspace:, pattern:, path:)
|
|
216
|
+
all_paths = raw.split("\n").reject(&:empty?).map { |p| p.sub(%r{\A\./}, '') }
|
|
217
|
+
paths = all_paths.select { |p| File.fnmatch?(pattern, p, FNMATCH_FLAGS) }
|
|
218
|
+
return no_match_message(pattern: pattern, path: path) if paths.empty?
|
|
219
|
+
|
|
220
|
+
sorted = mtime_sort(paths, workspace.cwd)
|
|
221
|
+
joined = sorted.join("\n") + "\n"
|
|
222
|
+
content, truncation_marker = head_truncate(joined)
|
|
223
|
+
stripped = content.chomp
|
|
224
|
+
count = stripped.split("\n").size
|
|
225
|
+
|
|
226
|
+
footer = "Found #{pluralize(count, 'file', 'files')}."
|
|
227
|
+
[stripped, '', footer + truncation_marker].join("\n")
|
|
228
|
+
end
|
|
229
|
+
private_class_method :format_output
|
|
230
|
+
|
|
231
|
+
# mtime descending; path ascending for stable order on ties.
|
|
232
|
+
#
|
|
233
|
+
# @return [Array<String>]
|
|
234
|
+
def self.mtime_sort(paths, cwd)
|
|
235
|
+
paths
|
|
236
|
+
.map { |p| [p, mtime_of(cwd + p)] }
|
|
237
|
+
.sort_by { |(p, m)| [-m, p] }
|
|
238
|
+
.map(&:first)
|
|
239
|
+
end
|
|
240
|
+
private_class_method :mtime_sort
|
|
241
|
+
|
|
242
|
+
# @return [Float] epoch-seconds mtime; 0 for paths we can't stat
|
|
243
|
+
# (race between rg listing and our stat, deleted symlinks,
|
|
244
|
+
# etc.). The fallback puts unstattable entries at the bottom.
|
|
245
|
+
def self.mtime_of(absolute)
|
|
246
|
+
File.mtime(absolute).to_f
|
|
247
|
+
rescue Errno::ENOENT
|
|
248
|
+
0.0
|
|
249
|
+
end
|
|
250
|
+
private_class_method :mtime_of
|
|
251
|
+
|
|
252
|
+
# Head-truncate +raw+ to {MAX_BYTES}, cutting at the last newline
|
|
253
|
+
# boundary so the final row is never partial. Returns the
|
|
254
|
+
# truncated content and a marker String (empty if no truncation).
|
|
255
|
+
#
|
|
256
|
+
# @return [Array(String, String)]
|
|
257
|
+
def self.head_truncate(raw)
|
|
258
|
+
total = raw.bytesize
|
|
259
|
+
return [raw, ''] if total <= MAX_BYTES
|
|
260
|
+
|
|
261
|
+
head = raw.byteslice(0, MAX_BYTES)
|
|
262
|
+
last_nl = head.rindex("\n")
|
|
263
|
+
head = head.byteslice(0, last_nl) if last_nl
|
|
264
|
+
omitted = total - head.bytesize
|
|
265
|
+
marker = "\n\n... [#{omitted} bytes omitted; total was #{total} bytes; " \
|
|
266
|
+
'refine pattern or path] ...'
|
|
267
|
+
[head, marker]
|
|
268
|
+
end
|
|
269
|
+
private_class_method :head_truncate
|
|
270
|
+
|
|
271
|
+
# @return [String]
|
|
272
|
+
def self.no_match_message(pattern:, path:)
|
|
273
|
+
base = "No files match pattern '#{pattern}'"
|
|
274
|
+
base += " in #{path}" if path
|
|
275
|
+
"#{base}."
|
|
276
|
+
end
|
|
277
|
+
private_class_method :no_match_message
|
|
278
|
+
|
|
279
|
+
# @return [String] +"1 file"+ / +"2 files"+
|
|
280
|
+
def self.pluralize(n, sing, plural)
|
|
281
|
+
"#{n} #{n == 1 ? sing : plural}"
|
|
282
|
+
end
|
|
283
|
+
private_class_method :pluralize
|
|
284
|
+
|
|
285
|
+
# Verify +rg+ is reachable on +PATH+. Routed through
|
|
286
|
+
# {Pikuri::Subprocess.spawn} to honor the subprocess seam. rg
|
|
287
|
+
# missing surfaces as +Errno::ENOENT+; an installed rg returns
|
|
288
|
+
# exit 0 from +--version+.
|
|
289
|
+
#
|
|
290
|
+
# @return [void]
|
|
291
|
+
# @raise [RuntimeError] if rg is missing
|
|
292
|
+
def self.check_binaries!
|
|
293
|
+
result = Pikuri::Subprocess.spawn('rg', '--version', chdir: '/').wait
|
|
294
|
+
return if result.status.success?
|
|
295
|
+
|
|
296
|
+
raise install_hint
|
|
297
|
+
rescue Errno::ENOENT
|
|
298
|
+
raise install_hint
|
|
299
|
+
end
|
|
300
|
+
private_class_method :check_binaries!
|
|
301
|
+
|
|
302
|
+
# @return [String]
|
|
303
|
+
def self.install_hint
|
|
304
|
+
"Tool::Glob requires 'rg' (ripgrep) on PATH; install via your " \
|
|
305
|
+
"distro's package manager (e.g. 'apt install ripgrep')."
|
|
306
|
+
end
|
|
307
|
+
private_class_method :install_hint
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
end
|