pikuri-workspace 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -16
- data/lib/pikuri/{tool → workspace}/confirmer.rb +3 -3
- data/lib/pikuri/{tool → workspace}/edit.rb +14 -14
- data/lib/pikuri/workspace/filesystem.rb +543 -0
- data/lib/pikuri/{tool → workspace}/glob.rb +29 -17
- data/lib/pikuri/{tool → workspace}/grep.rb +28 -16
- data/lib/pikuri/workspace/project_root.rb +102 -0
- data/lib/pikuri/workspace/read.rb +411 -0
- data/lib/pikuri/{tool → workspace}/write.rb +14 -14
- data/lib/pikuri-workspace.rb +16 -13
- metadata +20 -18
- data/lib/pikuri/tool/read.rb +0 -254
- data/lib/pikuri/tool/workspace.rb +0 -150
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'pathname'
|
|
4
|
+
require 'tmpdir'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
|
|
7
|
+
module Pikuri
|
|
8
|
+
module Workspace
|
|
9
|
+
# Defines which paths the agent can see and write to. Constructed with
|
|
10
|
+
# explicit +readable+ / +writable+ prefix lists; every Read/Write/Edit/
|
|
11
|
+
# Grep/Glob/Bash path the agent supplies is checked against those lists
|
|
12
|
+
# before touching the filesystem. Returned Pathnames are absolute,
|
|
13
|
+
# post-symlink-resolution.
|
|
14
|
+
#
|
|
15
|
+
# == Project root, readable, writable
|
|
16
|
+
#
|
|
17
|
+
# +project_root+ is the writable containment ceiling — automatically
|
|
18
|
+
# folded into both +readable+ and +writable+; you can read and write
|
|
19
|
+
# anywhere under the project unconditionally. It is also the base
|
|
20
|
+
# for resolving relative paths supplied by the LLM, and the chdir
|
|
21
|
+
# target tools like {Bash} / {Grep} / {Glob} pass to the subprocess.
|
|
22
|
+
# There is deliberately no separate "cwd" concept: hosts that want
|
|
23
|
+
# the agent to operate inside a specific subtree pass that subtree
|
|
24
|
+
# as +project_root+. +bin/pikuri-code+ enforces this by
|
|
25
|
+
# +Dir.chdir+'ing the Ruby process to the discovered project root
|
|
26
|
+
# at startup if it differs from the launch +Dir.pwd+, so the
|
|
27
|
+
# workspace and the surrounding process agree on one anchor.
|
|
28
|
+
#
|
|
29
|
+
# The extra +readable+ list grants read-only access to additional
|
|
30
|
+
# roots (system toolchains, dependency caches, skill catalogs);
|
|
31
|
+
# the extra +writable+ list grants read+write to additional roots
|
|
32
|
+
# (other project directories the agent should be able to touch).
|
|
33
|
+
#
|
|
34
|
+
# == Session umbrella ({#internal_temp})
|
|
35
|
+
#
|
|
36
|
+
# Every workspace owns a per-process umbrella dir at
|
|
37
|
+
# +~/.cache/pikuri/workspace-XXX/+ ({#internal_temp}). It is minted
|
|
38
|
+
# lazily on first access — workspaces that never touch it (most
|
|
39
|
+
# specs, hosts that don't want a playground and don't use the
|
|
40
|
+
# bubblewrap overlay) pay nothing — and removed by a single
|
|
41
|
+
# +at_exit+ handler when the process exits. Everything ephemeral
|
|
42
|
+
# this workspace produces lives inside the umbrella, so one
|
|
43
|
+
# +remove_entry+ at process exit cleans the lot:
|
|
44
|
+
#
|
|
45
|
+
# * {#temp} — the LLM-visible playground subdir, present only when
|
|
46
|
+
# +temp: true+ (see below).
|
|
47
|
+
# * The bubblewrap sandbox's per-toolchain overlay state
|
|
48
|
+
# (+overlay-<slug>/{upper,work}+ for +~/.gradle/caches+, +~/.m2/repository+,
|
|
49
|
+
# …), used to keep cross-project toolchain caches isolated to one
|
|
50
|
+
# pikuri-code session. See
|
|
51
|
+
# {Pikuri::Code::Bash::Sandbox::Bubblewrap}.
|
|
52
|
+
#
|
|
53
|
+
# The umbrella deliberately lives in +~/.cache/pikuri+ rather than
|
|
54
|
+
# +/tmp+: the {Pikuri::Code::Bash::Sandbox::Bubblewrap} sandbox
|
|
55
|
+
# binds {#temp} at +/tmp+ inside the sandbox (so the LLM's
|
|
56
|
+
# reflexive +/tmp+ writes persist across bash calls). With the
|
|
57
|
+
# umbrella already under +/tmp+, that bind would land on top of
|
|
58
|
+
# itself and per-call mountpoint creation would pollute the dir
|
|
59
|
+
# recursively. +~/.cache/pikuri+ avoids the collision.
|
|
60
|
+
#
|
|
61
|
+
# At gem load, {.sweep_stale_internal_temps!} prunes umbrella dirs
|
|
62
|
+
# older than seven days — a safety net for sessions that died
|
|
63
|
+
# before +at_exit+ could run (SIGKILL, OOM). Recent umbrellas are
|
|
64
|
+
# left alone so a concurrent pikuri-code in another shell isn't
|
|
65
|
+
# disturbed.
|
|
66
|
+
#
|
|
67
|
+
# == Optional temp playground
|
|
68
|
+
#
|
|
69
|
+
# When constructed with +temp: true+, the workspace adds
|
|
70
|
+
# +<internal_temp>/playground+ to +writable+ and exposes it via
|
|
71
|
+
# {#temp}. The binary advertises this path to the LLM (e.g. in the
|
|
72
|
+
# system prompt) as scratch space. Default is +false+: specs and
|
|
73
|
+
# tests that build many workspaces don't pay the mkdir cost, and
|
|
74
|
+
# hosts that don't need a playground don't get one. Either way,
|
|
75
|
+
# the umbrella is shared with everything else that wants ephemeral
|
|
76
|
+
# state — no second tempdir is minted.
|
|
77
|
+
#
|
|
78
|
+
# == Optional /tmp alias
|
|
79
|
+
#
|
|
80
|
+
# When +alias_tmp_to_temp: true+ AND +temp:+ is set, file paths
|
|
81
|
+
# supplied to {#resolve_for_read} / {#resolve_for_write} that
|
|
82
|
+
# start with +/tmp/+ (or are exactly +/tmp+) are rewritten to the
|
|
83
|
+
# host {#temp} path before containment is checked. This is the
|
|
84
|
+
# host-side counterpart to the sandbox's +--bind <temp> /tmp+: bash
|
|
85
|
+
# inside the sandbox writes to +/tmp/foo+, then the file tools
|
|
86
|
+
# (which run on the host, not in the sandbox) accept the same
|
|
87
|
+
# +/tmp/foo+ path and resolve it to the workspace temp's host
|
|
88
|
+
# path. Without this, the LLM would have to remember two paths
|
|
89
|
+
# for the same dir. Off by default; +bin/pikuri-code+ flips it on
|
|
90
|
+
# when the bubblewrap sandbox is enabled.
|
|
91
|
+
#
|
|
92
|
+
# == Read-set vs. write-set
|
|
93
|
+
#
|
|
94
|
+
# {#resolve_for_read} checks against +readable + writable+ (you can
|
|
95
|
+
# read anything writable). {#resolve_for_write} checks against
|
|
96
|
+
# +writable+ only. Tools that mutate state route through the second
|
|
97
|
+
# method; tools that only inspect route through the first.
|
|
98
|
+
#
|
|
99
|
+
# == Existence is not the workspace's concern
|
|
100
|
+
#
|
|
101
|
+
# +resolve_for_read('foo.rb')+ succeeds (returns a +Pathname+) even if
|
|
102
|
+
# +foo.rb+ doesn't exist; the caller ({Read}) errors with
|
|
103
|
+
# file-not-found when it tries to open it. +resolve_for_write+
|
|
104
|
+
# tolerates entirely non-existent paths (Write can create
|
|
105
|
+
# +lib/new/dir/foo.rb+ even when +lib/new/+ doesn't exist) — the
|
|
106
|
+
# caller is responsible for any +mkdir_p+ before writing. This split
|
|
107
|
+
# keeps the workspace narrowly responsible for *containment*, not for
|
|
108
|
+
# filesystem-state checks.
|
|
109
|
+
#
|
|
110
|
+
# == Subprocess environment ({#env})
|
|
111
|
+
#
|
|
112
|
+
# Workspaces own a {#env} +Hash<String,String>+ that subprocess-
|
|
113
|
+
# spawning tools (currently {Pikuri::Code::Bash}) thread into
|
|
114
|
+
# {Pikuri::Subprocess.spawn}. The motivating case: the bubblewrap
|
|
115
|
+
# sandbox doesn't bind-mount +~/.gitconfig+, so +git commit+ inside
|
|
116
|
+
# fails (modern git refuses rather than synthesizing a default
|
|
117
|
+
# from +/etc/passwd+ — which isn't bind-mounted either). The
|
|
118
|
+
# workspace resolves the host's effective git identity at
|
|
119
|
+
# {#project_root} (so +includeIf+ rules apply: a repo under
|
|
120
|
+
# +~/work/my/+ that's covered by a +gitdir:~/work/my/+ include
|
|
121
|
+
# gets that identity, not the global default) and exposes it as
|
|
122
|
+
# +GIT_AUTHOR_*+ / +GIT_COMMITTER_*+ env vars that override config
|
|
123
|
+
# entirely and need no file in the sandbox.
|
|
124
|
+
#
|
|
125
|
+
# The lookup is *lazy* and *memoized*: the constructor doesn't
|
|
126
|
+
# shell out, so building a workspace stays cheap (specs that
|
|
127
|
+
# never read +#env+ pay nothing). First access runs +git -C
|
|
128
|
+
# project_root config user.{name,email}+ once. Falls back to +{}+
|
|
129
|
+
# if +git+ isn't on +PATH+ or no identity is configured — the
|
|
130
|
+
# subprocess then runs unmediated and git's own "please tell me
|
|
131
|
+
# who you are" surfaces inside the sandbox.
|
|
132
|
+
#
|
|
133
|
+
# Hosts that want a different env (extra vars, no git resolution,
|
|
134
|
+
# explicit identity) pass +env:+ to the constructor; that value
|
|
135
|
+
# is used verbatim and the git lookup is skipped. Always frozen
|
|
136
|
+
# by the time it leaves the workspace.
|
|
137
|
+
#
|
|
138
|
+
# == Containment algorithm
|
|
139
|
+
#
|
|
140
|
+
# {#resolve} walks up the input path to its deepest existing ancestor,
|
|
141
|
+
# +realpath+'s that ancestor (resolving any symlinks in the existing
|
|
142
|
+
# portion), then verifies the resolved base matches one of the
|
|
143
|
+
# candidate roots. Four cases:
|
|
144
|
+
#
|
|
145
|
+
# 1. +lib/foo.rb+ (exists) → +existing+ = full path, +base+ matches a
|
|
146
|
+
# root → returns the realpath'd file.
|
|
147
|
+
# 2. +lib/new/dir/foo.rb+ (intermediates missing) → walks up to the
|
|
148
|
+
# deepest existing parent inside a root → returns the intended new
|
|
149
|
+
# path (caller +mkdir_p+s the parent before writing).
|
|
150
|
+
# 3. +lib/../../etc/passwd+ (+..+ escape) → +cleanpath+ collapses +..+
|
|
151
|
+
# syntactically, walks land outside every root → {Error}.
|
|
152
|
+
# 4. +link/foo.rb+ where +link → /etc+ (symlink escape) → walks to
|
|
153
|
+
# +link+ (which exists), +realpath+ resolves through the symlink to
|
|
154
|
+
# +/etc+, outside every root → {Error}.
|
|
155
|
+
#
|
|
156
|
+
# Pure lexical normalization (+cleanpath+ + prefix check) catches cases
|
|
157
|
+
# 1–3 but misses case 4. The walk-up +realpath+ pass closes that gap.
|
|
158
|
+
#
|
|
159
|
+
# == Project-root denylist
|
|
160
|
+
#
|
|
161
|
+
# Setting +project_root+ to a system root or a user's home directory
|
|
162
|
+
# is almost always a misconfiguration: it makes the entire system or
|
|
163
|
+
# home tree writable. The constructor rejects {DENIED_PROJECT_ROOTS}
|
|
164
|
+
# (system tops: +/+, +/etc+, +/var+, …) and any directory whose
|
|
165
|
+
# parent is +/home+ (catches +/home/$USER+ and +/home/$OTHER_USER+).
|
|
166
|
+
# This is a sanity guard against fat-fingering, not a security
|
|
167
|
+
# perimeter — the real security is the +readable+/+writable+ lists.
|
|
168
|
+
# Other-OS home roots (+/Users/$USER+ on macOS) are not denied;
|
|
169
|
+
# Linux-first per +CLAUDE.md+.
|
|
170
|
+
class Filesystem
|
|
171
|
+
# Raised for any path that resolves outside the workspace. Recoverable
|
|
172
|
+
# at the tool layer — tools rescue this and emit +"Error: ..."+
|
|
173
|
+
# observations so the LLM can self-correct on the next turn. Also
|
|
174
|
+
# raised at construction time for a denied project root.
|
|
175
|
+
class Error < StandardError; end
|
|
176
|
+
|
|
177
|
+
# Parent directory under which every workspace mints its
|
|
178
|
+
# umbrella ({#internal_temp}). Honors +XDG_CACHE_HOME+ when set,
|
|
179
|
+
# else +~/.cache+; the +pikuri+ subdir is owned by us.
|
|
180
|
+
# +mkdir_p+'d lazily on first umbrella access.
|
|
181
|
+
CACHE_BASE = File.join(ENV['XDG_CACHE_HOME'] || File.join(Dir.home, '.cache'), 'pikuri')
|
|
182
|
+
|
|
183
|
+
# Umbrella dirs older than this are reaped by
|
|
184
|
+
# {.sweep_stale_internal_temps!} at gem load. Generous enough
|
|
185
|
+
# that a long-lived pikuri session in another shell isn't
|
|
186
|
+
# disturbed; tight enough that a process killed last week
|
|
187
|
+
# doesn't leak forever.
|
|
188
|
+
INTERNAL_TEMP_STALE_SECONDS = 7 * 24 * 60 * 60
|
|
189
|
+
|
|
190
|
+
# System-root project_roots the constructor refuses. Exact-match
|
|
191
|
+
# (not prefix) — +/home/user/project+ passes, +/home/user+ is
|
|
192
|
+
# rejected by the parent-is-/home check below. Frozen list;
|
|
193
|
+
# downstream hosts with unusual layouts can subclass if they
|
|
194
|
+
# really need a different policy.
|
|
195
|
+
DENIED_PROJECT_ROOTS = %w[
|
|
196
|
+
/ /etc /var /proc /sys /dev /boot /root
|
|
197
|
+
/usr /opt /lib /lib64 /bin /sbin /tmp
|
|
198
|
+
].map { |p| Pathname.new(p) }.freeze
|
|
199
|
+
|
|
200
|
+
# @return [Pathname] project root, post-realpath. The writable
|
|
201
|
+
# containment ceiling, the base for relative-path resolution,
|
|
202
|
+
# and the chdir target for Bash/Grep/Glob — always in
|
|
203
|
+
# {#readable} and {#writable}.
|
|
204
|
+
attr_reader :project_root
|
|
205
|
+
|
|
206
|
+
# @return [Array<Pathname>] read-only roots (in addition to writable
|
|
207
|
+
# ones, which are also readable). Post-realpath, deduped.
|
|
208
|
+
attr_reader :readable
|
|
209
|
+
|
|
210
|
+
# @return [Array<Pathname>] writable roots (read+write). Includes
|
|
211
|
+
# {#project_root} and, if +temp: true+, {#temp}. Post-realpath,
|
|
212
|
+
# deduped.
|
|
213
|
+
attr_reader :writable
|
|
214
|
+
|
|
215
|
+
# @return [Pathname, nil] the LLM-visible scratch playground
|
|
216
|
+
# (writable, owned by this workspace) when constructed with
|
|
217
|
+
# +temp: true+, else +nil+. Lives at +<internal_temp>/playground+;
|
|
218
|
+
# removed transitively when the umbrella is wiped on process exit.
|
|
219
|
+
attr_reader :temp
|
|
220
|
+
|
|
221
|
+
# @param project_root [String, Pathname] absolute (or working-
|
|
222
|
+
# directory-relative) path to the project root. +realpath+'d
|
|
223
|
+
# once; must exist; must not match {DENIED_PROJECT_ROOTS} or be
|
|
224
|
+
# a direct child of +/home+.
|
|
225
|
+
# @param readable [Array<String, Pathname>] additional read-only
|
|
226
|
+
# roots. +realpath+'d at construction; missing entries raise
|
|
227
|
+
# loudly via {Pathname#realpath}.
|
|
228
|
+
# @param writable [Array<String, Pathname>] additional read+write
|
|
229
|
+
# roots. Same treatment as +readable+.
|
|
230
|
+
# @param temp [Boolean] when +true+, adds
|
|
231
|
+
# +<internal_temp>/playground+ to {#writable} and exposes it
|
|
232
|
+
# via {#temp}. Forces the umbrella to mint up-front (the
|
|
233
|
+
# playground is created eagerly so {#writable} reflects it).
|
|
234
|
+
# @param alias_tmp_to_temp [Boolean] when +true+ AND +temp:+ is
|
|
235
|
+
# set, +/tmp/*+ paths supplied to {#resolve_for_read} /
|
|
236
|
+
# {#resolve_for_write} are rewritten to point at {#temp}.
|
|
237
|
+
# Pairs with the bubblewrap sandbox's +--bind <temp> /tmp+.
|
|
238
|
+
# @param env [Hash{String=>String}, nil] subprocess environment
|
|
239
|
+
# exposed via {#env}. +nil+ (default) → lazy-derive the host
|
|
240
|
+
# git identity from {#project_root} on first access; explicit
|
|
241
|
+
# hash → use verbatim (and skip the git lookup entirely). See
|
|
242
|
+
# the class header §"Subprocess environment" for the
|
|
243
|
+
# rationale.
|
|
244
|
+
# @raise [Errno::ENOENT] if +project_root+ or any
|
|
245
|
+
# +readable+/+writable+ entry does not exist.
|
|
246
|
+
# @raise [Error] if +project_root+ is denied (system root or
|
|
247
|
+
# +/home/*+).
|
|
248
|
+
def initialize(project_root:, readable: [], writable: [], temp: false, alias_tmp_to_temp: false, env: nil)
|
|
249
|
+
@project_root = Pathname.new(project_root).realpath
|
|
250
|
+
validate_project_root!(@project_root)
|
|
251
|
+
|
|
252
|
+
@internal_temp = nil
|
|
253
|
+
@temp = temp ? mint_playground : nil
|
|
254
|
+
@alias_tmp_to_temp = alias_tmp_to_temp && !@temp.nil?
|
|
255
|
+
@env_override = env
|
|
256
|
+
|
|
257
|
+
@writable = ([@project_root] + writable.map { |p| Pathname.new(p).realpath } + [@temp].compact).uniq
|
|
258
|
+
@readable = (@writable + readable.map { |p| Pathname.new(p).realpath }).uniq
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# @return [Boolean] whether {#resolve_for_read} / {#resolve_for_write}
|
|
262
|
+
# rewrite +/tmp/*+ inputs to {#temp}.
|
|
263
|
+
attr_reader :alias_tmp_to_temp
|
|
264
|
+
|
|
265
|
+
# Environment variables for subprocesses spawned in this
|
|
266
|
+
# workspace. Lazy, memoized, frozen.
|
|
267
|
+
#
|
|
268
|
+
# When the constructor received +env: nil+ (the default), the
|
|
269
|
+
# first call here runs +git -C project_root config user.name+ +
|
|
270
|
+
# +user.email+ once and returns +GIT_AUTHOR_*+ / +GIT_COMMITTER_*+
|
|
271
|
+
# accordingly. When the constructor received an explicit hash,
|
|
272
|
+
# this returns that hash (frozen) and never shells out. Returns
|
|
273
|
+
# +{}+ if git resolution finds no identity for {#project_root}
|
|
274
|
+
# or +git+ isn't on +PATH+.
|
|
275
|
+
#
|
|
276
|
+
# @return [Hash{String=>String}]
|
|
277
|
+
def env
|
|
278
|
+
@env ||= (@env_override || compute_git_identity_env).freeze
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Per-workspace ephemeral umbrella. Minted lazily on first call
|
|
282
|
+
# under {CACHE_BASE}. Registered for +at_exit+ removal the
|
|
283
|
+
# moment it's minted, so anything subsequently placed inside
|
|
284
|
+
# (the playground, {Pikuri::Code::Bash::Sandbox::Bubblewrap}'s
|
|
285
|
+
# overlay state) gets wiped together. Callers that want
|
|
286
|
+
# ephemeral state owned by the workspace should put it under
|
|
287
|
+
# this dir rather than minting their own siblings.
|
|
288
|
+
#
|
|
289
|
+
# @return [Pathname]
|
|
290
|
+
def internal_temp
|
|
291
|
+
@internal_temp ||= Filesystem.mint_internal_temp
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# @api private — minting helper shared with {AllowAll}. The
|
|
295
|
+
# +FileUtils.remove_entry+ +at_exit+ guards against the dir
|
|
296
|
+
# being already gone (test cleanup, manual rm).
|
|
297
|
+
def self.mint_internal_temp
|
|
298
|
+
FileUtils.mkdir_p(CACHE_BASE)
|
|
299
|
+
path = Pathname.new(Dir.mktmpdir('workspace-', CACHE_BASE)).realpath
|
|
300
|
+
at_exit { FileUtils.remove_entry(path.to_s) if path.exist? }
|
|
301
|
+
path
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Reap +workspace-*+ umbrella dirs that have outlived
|
|
305
|
+
# {INTERNAL_TEMP_STALE_SECONDS}. Called once at gem load via
|
|
306
|
+
# {Pikuri::Workspace} so each process boot inherits a tidy
|
|
307
|
+
# {CACHE_BASE}. Failures (permission denied, racing concurrent
|
|
308
|
+
# sweeper) are swallowed — best-effort cleanup, the real
|
|
309
|
+
# +at_exit+ path is the load-bearing one.
|
|
310
|
+
#
|
|
311
|
+
# @return [void]
|
|
312
|
+
def self.sweep_stale_internal_temps!
|
|
313
|
+
return unless File.directory?(CACHE_BASE)
|
|
314
|
+
|
|
315
|
+
cutoff = Time.now - INTERNAL_TEMP_STALE_SECONDS
|
|
316
|
+
Dir.children(CACHE_BASE).each do |entry|
|
|
317
|
+
next unless entry.start_with?('workspace-')
|
|
318
|
+
path = File.join(CACHE_BASE, entry)
|
|
319
|
+
next unless File.directory?(path)
|
|
320
|
+
next if File.mtime(path) > cutoff
|
|
321
|
+
|
|
322
|
+
FileUtils.remove_entry(path)
|
|
323
|
+
rescue StandardError
|
|
324
|
+
# best-effort sweep; never block the host on dead state
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Resolve a user-supplied path against the read-set (readable ∪
|
|
329
|
+
# writable). Returned Pathname is absolute and may not exist on
|
|
330
|
+
# disk; the caller validates existence separately.
|
|
331
|
+
#
|
|
332
|
+
# @param path [String]
|
|
333
|
+
# @return [Pathname]
|
|
334
|
+
# @raise [Error] if the resolved path falls outside every root
|
|
335
|
+
def resolve_for_read(path)
|
|
336
|
+
resolve(path, @readable)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Resolve a user-supplied path against the write-set.
|
|
340
|
+
#
|
|
341
|
+
# @param path [String]
|
|
342
|
+
# @return [Pathname]
|
|
343
|
+
# @raise [Error] if the resolved path falls outside every writable root
|
|
344
|
+
def resolve_for_write(path)
|
|
345
|
+
resolve(path, @writable)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
private
|
|
349
|
+
|
|
350
|
+
# Eager-create +<internal_temp>/playground+. Used only when
|
|
351
|
+
# +temp: true+; touches {#internal_temp} so the umbrella is
|
|
352
|
+
# minted now (the playground needs a parent dir to live under
|
|
353
|
+
# AND has to be in {#writable} by the end of +initialize+).
|
|
354
|
+
def mint_playground
|
|
355
|
+
path = internal_temp + 'playground'
|
|
356
|
+
FileUtils.mkdir_p(path)
|
|
357
|
+
path.realpath
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def validate_project_root!(canonical)
|
|
361
|
+
if DENIED_PROJECT_ROOTS.include?(canonical) || canonical.parent.to_s == '/home'
|
|
362
|
+
raise Error,
|
|
363
|
+
"project_root '#{canonical}' is a system or home root; the workspace " \
|
|
364
|
+
'project_root must be a project subdirectory (a system or home root would expose ' \
|
|
365
|
+
'the entire tree as writable).'
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# See the class header for the algorithm rationale.
|
|
370
|
+
def resolve(path, roots)
|
|
371
|
+
path = apply_tmp_alias(path)
|
|
372
|
+
pn = Pathname.new(path)
|
|
373
|
+
pn = @project_root + pn unless pn.absolute?
|
|
374
|
+
cleaned = pn.cleanpath
|
|
375
|
+
|
|
376
|
+
existing = cleaned
|
|
377
|
+
existing = existing.parent until existing.exist? || existing.parent == existing
|
|
378
|
+
base = existing.realpath
|
|
379
|
+
|
|
380
|
+
matched = roots.find { |r| base == r || base.to_s.start_with?(r.to_s + File::SEPARATOR) }
|
|
381
|
+
unless matched
|
|
382
|
+
raise Error, "path '#{path}' is outside the workspace " \
|
|
383
|
+
"(roots: #{roots.map(&:to_s).join(', ')})"
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
base + cleaned.relative_path_from(existing)
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Resolve the host's effective git identity for {#project_root}
|
|
390
|
+
# via +git -C project_root config+ — runs git's full chain
|
|
391
|
+
# (system → global → includeIf → per-repo), so what comes back
|
|
392
|
+
# is what +git commit+ would attribute to on the host outside
|
|
393
|
+
# the sandbox. +Errno::ENOENT+ (no +git+ on +PATH+) and a
|
|
394
|
+
# non-zero exit (no value configured) both fall back to a nil
|
|
395
|
+
# entry; an empty identity field is treated the same. Result:
|
|
396
|
+
# either the full four-entry hash or +{}+.
|
|
397
|
+
def compute_git_identity_env
|
|
398
|
+
name = git_config('user.name')
|
|
399
|
+
email = git_config('user.email')
|
|
400
|
+
return {} if name.nil? || name.empty? || email.nil? || email.empty?
|
|
401
|
+
|
|
402
|
+
{
|
|
403
|
+
'GIT_AUTHOR_NAME' => name,
|
|
404
|
+
'GIT_AUTHOR_EMAIL' => email,
|
|
405
|
+
'GIT_COMMITTER_NAME' => name,
|
|
406
|
+
'GIT_COMMITTER_EMAIL' => email
|
|
407
|
+
}
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def git_config(key)
|
|
411
|
+
result = Pikuri::Subprocess.spawn(
|
|
412
|
+
'git', '-C', @project_root.to_s, 'config', key,
|
|
413
|
+
chdir: @project_root.to_s
|
|
414
|
+
).wait
|
|
415
|
+
result.status.success? ? result.output.strip : nil
|
|
416
|
+
rescue Errno::ENOENT
|
|
417
|
+
nil
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Rewrite +/tmp+ / +/tmp/foo+ inputs to land under {#temp} when
|
|
421
|
+
# {#alias_tmp_to_temp} is set. Other inputs pass through. Lives
|
|
422
|
+
# in the base class so {AllowAll} inherits identical behavior.
|
|
423
|
+
def apply_tmp_alias(path)
|
|
424
|
+
return path unless @alias_tmp_to_temp
|
|
425
|
+
return @temp.to_s if path == '/tmp'
|
|
426
|
+
return File.join(@temp.to_s, path[5..]) if path.start_with?('/tmp/')
|
|
427
|
+
|
|
428
|
+
path
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
# Unrestricted variant of {Filesystem}: every path resolves, with one
|
|
432
|
+
# carve-out — paths under {CREDENTIAL_DENYLIST} (the user's +~/.ssh+,
|
|
433
|
+
# +~/.aws+, +~/.gnupg+, +~/.docker+, +~/.kube+, +~/.netrc+, and
|
|
434
|
+
# +/etc/shadow+) still raise. Intended for dev-container / Docker
|
|
435
|
+
# mode where the container is the security boundary; the denylist
|
|
436
|
+
# remains as defense-in-depth against prompt-injection exfiltration
|
|
437
|
+
# of credentials that are commonly bind-mounted into a container
|
|
438
|
+
# from the host. Pairs naturally with +--yolo+ (no Confirmer
|
|
439
|
+
# prompt) — but note that combining with
|
|
440
|
+
# +Code::Bash::Sandbox::Bubblewrap+ defeats the sandbox (the
|
|
441
|
+
# whole filesystem ends up bind-mounted in), and bash inside
|
|
442
|
+
# the sandbox bypasses the denylist anyway, so the intended
|
|
443
|
+
# combo is ALLOW_ALL + +Sandbox::NONE+ inside a container.
|
|
444
|
+
#
|
|
445
|
+
# == Project root, temp
|
|
446
|
+
#
|
|
447
|
+
# +project_root:+ is still accepted for parity with {Filesystem},
|
|
448
|
+
# even though it's not a real containment ceiling under AllowAll.
|
|
449
|
+
# It is NOT validated against {DENIED_PROJECT_ROOTS} (passing
|
|
450
|
+
# +'/'+ is legitimate inside a container). It still serves as the
|
|
451
|
+
# base for relative-path resolution and the chdir target tools
|
|
452
|
+
# like Bash use. +temp:+ behaves the same way as {Filesystem}.
|
|
453
|
+
#
|
|
454
|
+
# == Denylist semantics
|
|
455
|
+
#
|
|
456
|
+
# Each entry is realpath'd lazily at check time when it exists
|
|
457
|
+
# (so a symlink at e.g. +/tmp/decoy → ~/.ssh/id_rsa+ still gets
|
|
458
|
+
# caught — the resolved path lands under +~/.ssh+). Non-existent
|
|
459
|
+
# entries fall back to a literal-prefix match — blocks the agent
|
|
460
|
+
# from *creating* a credential dir the user doesn't have yet (e.g.
|
|
461
|
+
# planting +~/.gnupg+ for a later GPG operation to pick up). The
|
|
462
|
+
# list is hardcoded; subclass for a host with a different policy.
|
|
463
|
+
class AllowAll < Filesystem
|
|
464
|
+
# Credential locations that even ALLOW_ALL refuses. Expanded
|
|
465
|
+
# via +ENV['HOME']+ at class-load time and unioned with the
|
|
466
|
+
# +/root+ variants (dev-container default user). Frozen.
|
|
467
|
+
CREDENTIAL_DENYLIST = begin
|
|
468
|
+
homes = [ENV.fetch('HOME', nil), '/root'].compact.uniq
|
|
469
|
+
per_home = %w[.ssh .aws .gnupg .docker .kube .netrc]
|
|
470
|
+
user_paths = homes.flat_map { |h| per_home.map { |p| File.join(h, p) } }
|
|
471
|
+
(user_paths + %w[/etc/shadow]).map { |p| Pathname.new(p) }.uniq.freeze
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# @param project_root [String, Pathname] surface-level project
|
|
475
|
+
# root. Not validated against {DENIED_PROJECT_ROOTS}.
|
|
476
|
+
# @param temp [Boolean] same semantics as {Filesystem}.
|
|
477
|
+
# @param alias_tmp_to_temp [Boolean] same semantics as
|
|
478
|
+
# {Filesystem} — when +true+ AND +temp:+ is set, +/tmp/*+
|
|
479
|
+
# inputs rewrite to land under {#temp}.
|
|
480
|
+
# @param env [Hash{String=>String}, nil] same semantics as
|
|
481
|
+
# {Filesystem}; see {Filesystem#env}.
|
|
482
|
+
def initialize(project_root: Dir.pwd, temp: false, alias_tmp_to_temp: false, env: nil)
|
|
483
|
+
@project_root = Pathname.new(project_root).realpath
|
|
484
|
+
|
|
485
|
+
@internal_temp = nil
|
|
486
|
+
@temp = temp ? mint_playground : nil
|
|
487
|
+
@alias_tmp_to_temp = alias_tmp_to_temp && !@temp.nil?
|
|
488
|
+
@env_override = env
|
|
489
|
+
|
|
490
|
+
# Advertise "everything is in scope" via the accessor surface so
|
|
491
|
+
# callers that inspect +readable+/+writable+ (system-prompt
|
|
492
|
+
# rendering, Bubblewrap bind-mount construction) see the
|
|
493
|
+
# intended semantics. The actual containment is the denylist
|
|
494
|
+
# in {#resolve_for_read}/{#resolve_for_write}.
|
|
495
|
+
@writable = [Pathname.new('/').realpath, @temp].compact.uniq
|
|
496
|
+
@readable = @writable.dup
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
# @param path [String]
|
|
500
|
+
# @return [Pathname]
|
|
501
|
+
# @raise [Error] if the resolved path lands under {CREDENTIAL_DENYLIST}.
|
|
502
|
+
def resolve_for_read(path)
|
|
503
|
+
resolve_unrestricted(path)
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# @param path [String]
|
|
507
|
+
# @return [Pathname]
|
|
508
|
+
# @raise [Error] if the resolved path lands under {CREDENTIAL_DENYLIST}.
|
|
509
|
+
def resolve_for_write(path)
|
|
510
|
+
resolve_unrestricted(path)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
private
|
|
514
|
+
|
|
515
|
+
def resolve_unrestricted(path)
|
|
516
|
+
path = apply_tmp_alias(path)
|
|
517
|
+
pn = Pathname.new(path)
|
|
518
|
+
pn = @project_root + pn unless pn.absolute?
|
|
519
|
+
cleaned = pn.cleanpath
|
|
520
|
+
|
|
521
|
+
existing = cleaned
|
|
522
|
+
existing = existing.parent until existing.exist? || existing.parent == existing
|
|
523
|
+
base = existing.realpath
|
|
524
|
+
resolved = base + cleaned.relative_path_from(existing)
|
|
525
|
+
|
|
526
|
+
CREDENTIAL_DENYLIST.each do |denied|
|
|
527
|
+
candidate = denied.exist? ? denied.realpath : denied
|
|
528
|
+
next unless resolved == candidate ||
|
|
529
|
+
resolved.to_s.start_with?(candidate.to_s + File::SEPARATOR)
|
|
530
|
+
|
|
531
|
+
raise Error,
|
|
532
|
+
"path '#{path}' resolves under a denied credential location " \
|
|
533
|
+
"('#{denied}') — even in ALLOW_ALL mode, ~/.ssh / ~/.aws / " \
|
|
534
|
+
'~/.gnupg / ~/.docker / ~/.kube / ~/.netrc / /etc/shadow ' \
|
|
535
|
+
'are off-limits.'
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
resolved
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
end
|
|
543
|
+
end
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Pikuri
|
|
4
|
-
|
|
4
|
+
module Workspace
|
|
5
5
|
# The +glob+ tool — list files matching a glob pattern via
|
|
6
6
|
# +rg --files+, sorted by modification time (newest first).
|
|
7
|
-
# Instantiating +
|
|
8
|
-
# whose {Tool#to_ruby_llm_tool} wiring is identical to any
|
|
9
|
-
# tool's. Same shape as {
|
|
7
|
+
# Instantiating +Glob.new(workspace: ws)+ produces a tool
|
|
8
|
+
# whose {Pikuri::Tool#to_ruby_llm_tool} wiring is identical to any
|
|
9
|
+
# bundled tool's. Same shape as {Grep} (workspace captured by the
|
|
10
10
|
# +execute+ closure, no confirmer — read-only).
|
|
11
11
|
#
|
|
12
12
|
# == Why a separate tool from Grep
|
|
@@ -63,7 +63,7 @@ module Pikuri
|
|
|
63
63
|
# == Truncation
|
|
64
64
|
#
|
|
65
65
|
# Total output head-truncated to {MAX_BYTES} *after* mtime sort, so
|
|
66
|
-
# the kept rows are the newest. Matches {
|
|
66
|
+
# the kept rows are the newest. Matches {Grep}'s budget and
|
|
67
67
|
# head-bias.
|
|
68
68
|
#
|
|
69
69
|
# == Exit codes
|
|
@@ -82,10 +82,10 @@ module Pikuri
|
|
|
82
82
|
# tool.
|
|
83
83
|
# * +path+ not found → +"Error: path not found: <path>"+.
|
|
84
84
|
# * +path+ outside the workspace → caught from
|
|
85
|
-
# {
|
|
86
|
-
class Glob < Tool
|
|
85
|
+
# {Filesystem::Error}.
|
|
86
|
+
class Glob < Pikuri::Tool
|
|
87
87
|
# @return [Integer] hard byte cap on combined rg output. Same
|
|
88
|
-
# value as {
|
|
88
|
+
# value as {Grep::MAX_BYTES} so the two file-touching
|
|
89
89
|
# tools share a budget shape. Re-declared here rather than
|
|
90
90
|
# referenced cross-file because Zeitwerk's eager-load order
|
|
91
91
|
# isn't guaranteed between siblings.
|
|
@@ -112,7 +112,7 @@ module Pikuri
|
|
|
112
112
|
- Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
|
|
113
113
|
DESC
|
|
114
114
|
|
|
115
|
-
# @param workspace [
|
|
115
|
+
# @param workspace [Filesystem] captured for path resolution
|
|
116
116
|
# and as +chdir+ for rg. All path arguments route through
|
|
117
117
|
# +workspace.resolve_for_read+.
|
|
118
118
|
# @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
|
|
@@ -139,12 +139,24 @@ module Pikuri
|
|
|
139
139
|
)
|
|
140
140
|
end
|
|
141
141
|
|
|
142
|
+
# Produce a new {Glob} bound to +workspace+. Used by
|
|
143
|
+
# {Pikuri::SubAgent::SubAgentTool} when a persona supplies a
|
|
144
|
+
# +workspace_factory:+ — the parent's instance is rebuilt for
|
|
145
|
+
# the sub-agent's fresh workspace so paths resolve against the
|
|
146
|
+
# right root.
|
|
147
|
+
#
|
|
148
|
+
# @param workspace [Filesystem]
|
|
149
|
+
# @return [Glob]
|
|
150
|
+
def with_workspace(workspace)
|
|
151
|
+
self.class.new(workspace: workspace)
|
|
152
|
+
end
|
|
153
|
+
|
|
142
154
|
# Validate inputs, resolve the path against the workspace, spawn
|
|
143
155
|
# rg, mtime-sort, head-truncate, render. Returns either the
|
|
144
156
|
# formatted listing, a "no files match" message, or
|
|
145
157
|
# +"Error: ..."+.
|
|
146
158
|
#
|
|
147
|
-
# @param workspace [
|
|
159
|
+
# @param workspace [Filesystem]
|
|
148
160
|
# @param pattern [String]
|
|
149
161
|
# @param path [String, nil]
|
|
150
162
|
# @return [String]
|
|
@@ -159,12 +171,12 @@ module Pikuri
|
|
|
159
171
|
return "Error: #{path} is a file, not a directory; use the read tool to view it."
|
|
160
172
|
end
|
|
161
173
|
|
|
162
|
-
rel = resolved.relative_path_from(workspace.
|
|
174
|
+
rel = resolved.relative_path_from(workspace.project_root).to_s
|
|
163
175
|
search_target = rel
|
|
164
176
|
end
|
|
165
177
|
|
|
166
178
|
argv = build_argv(path: search_target)
|
|
167
|
-
result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.
|
|
179
|
+
result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.project_root.to_s).wait
|
|
168
180
|
exit_code = result.status.exitstatus
|
|
169
181
|
|
|
170
182
|
case exit_code
|
|
@@ -178,7 +190,7 @@ module Pikuri
|
|
|
178
190
|
stderr = "exited #{exit_code}" if stderr.empty?
|
|
179
191
|
"Error: ripgrep: #{stderr}"
|
|
180
192
|
end
|
|
181
|
-
rescue
|
|
193
|
+
rescue Filesystem::Error => e
|
|
182
194
|
"Error: #{e.message}"
|
|
183
195
|
end
|
|
184
196
|
|
|
@@ -217,7 +229,7 @@ module Pikuri
|
|
|
217
229
|
paths = all_paths.select { |p| File.fnmatch?(pattern, p, FNMATCH_FLAGS) }
|
|
218
230
|
return no_match_message(pattern: pattern, path: path) if paths.empty?
|
|
219
231
|
|
|
220
|
-
sorted = mtime_sort(paths, workspace.
|
|
232
|
+
sorted = mtime_sort(paths, workspace.project_root)
|
|
221
233
|
joined = sorted.join("\n") + "\n"
|
|
222
234
|
content, truncation_marker = head_truncate(joined)
|
|
223
235
|
stripped = content.chomp
|
|
@@ -231,9 +243,9 @@ module Pikuri
|
|
|
231
243
|
# mtime descending; path ascending for stable order on ties.
|
|
232
244
|
#
|
|
233
245
|
# @return [Array<String>]
|
|
234
|
-
def self.mtime_sort(paths,
|
|
246
|
+
def self.mtime_sort(paths, base)
|
|
235
247
|
paths
|
|
236
|
-
.map { |p| [p, mtime_of(
|
|
248
|
+
.map { |p| [p, mtime_of(base + p)] }
|
|
237
249
|
.sort_by { |(p, m)| [-m, p] }
|
|
238
250
|
.map(&:first)
|
|
239
251
|
end
|
|
@@ -301,7 +313,7 @@ module Pikuri
|
|
|
301
313
|
|
|
302
314
|
# @return [String]
|
|
303
315
|
def self.install_hint
|
|
304
|
-
"
|
|
316
|
+
"Glob requires 'rg' (ripgrep) on PATH; install via your " \
|
|
305
317
|
"distro's package manager (e.g. 'apt install ripgrep')."
|
|
306
318
|
end
|
|
307
319
|
private_class_method :install_hint
|