pikuri-workspace 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'tmpdir'
5
+ require 'fileutils'
6
+
7
+ module Pikuri
8
+ module Workspace
9
+ # Defines which paths the agent can see and write to. Constructed with
10
+ # explicit +readable+ / +writable+ prefix lists; every Read/Write/Edit/
11
+ # Grep/Glob/Bash path the agent supplies is checked against those lists
12
+ # before touching the filesystem. Returned Pathnames are absolute,
13
+ # post-symlink-resolution.
14
+ #
15
+ # == Project root, readable, writable
16
+ #
17
+ # +project_root+ is the writable containment ceiling — automatically
18
+ # folded into both +readable+ and +writable+; you can read and write
19
+ # anywhere under the project unconditionally. It is also the base
20
+ # for resolving relative paths supplied by the LLM, and the chdir
21
+ # target tools like {Bash} / {Grep} / {Glob} pass to the subprocess.
22
+ # There is deliberately no separate "cwd" concept: hosts that want
23
+ # the agent to operate inside a specific subtree pass that subtree
24
+ # as +project_root+. +bin/pikuri-code+ enforces this by
25
+ # +Dir.chdir+'ing the Ruby process to the discovered project root
26
+ # at startup if it differs from the launch +Dir.pwd+, so the
27
+ # workspace and the surrounding process agree on one anchor.
28
+ #
29
+ # The extra +readable+ list grants read-only access to additional
30
+ # roots (system toolchains, dependency caches, skill catalogs);
31
+ # the extra +writable+ list grants read+write to additional roots
32
+ # (other project directories the agent should be able to touch).
33
+ #
34
+ # == Session umbrella ({#internal_temp})
35
+ #
36
+ # Every workspace owns a per-process umbrella dir at
37
+ # +~/.cache/pikuri/workspace-XXX/+ ({#internal_temp}). It is minted
38
+ # lazily on first access — workspaces that never touch it (most
39
+ # specs, hosts that don't want a playground and don't use the
40
+ # bubblewrap overlay) pay nothing — and removed by a single
41
+ # +at_exit+ handler when the process exits. Everything ephemeral
42
+ # this workspace produces lives inside the umbrella, so one
43
+ # +remove_entry+ at process exit cleans the lot:
44
+ #
45
+ # * {#temp} — the LLM-visible playground subdir, present only when
46
+ # +temp: true+ (see below).
47
+ # * The bubblewrap sandbox's per-toolchain overlay state
48
+ # (+overlay-<slug>/{upper,work}+ for +~/.gradle/caches+, +~/.m2/repository+,
49
+ # …), used to keep cross-project toolchain caches isolated to one
50
+ # pikuri-code session. See
51
+ # {Pikuri::Code::Bash::Sandbox::Bubblewrap}.
52
+ #
53
+ # The umbrella deliberately lives in +~/.cache/pikuri+ rather than
54
+ # +/tmp+: the {Pikuri::Code::Bash::Sandbox::Bubblewrap} sandbox
55
+ # binds {#temp} at +/tmp+ inside the sandbox (so the LLM's
56
+ # reflexive +/tmp+ writes persist across bash calls). With the
57
+ # umbrella already under +/tmp+, that bind would land on top of
58
+ # itself and per-call mountpoint creation would pollute the dir
59
+ # recursively. +~/.cache/pikuri+ avoids the collision.
60
+ #
61
+ # At gem load, {.sweep_stale_internal_temps!} prunes umbrella dirs
62
+ # older than seven days — a safety net for sessions that died
63
+ # before +at_exit+ could run (SIGKILL, OOM). Recent umbrellas are
64
+ # left alone so a concurrent pikuri-code in another shell isn't
65
+ # disturbed.
66
+ #
67
+ # == Optional temp playground
68
+ #
69
+ # When constructed with +temp: true+, the workspace adds
70
+ # +<internal_temp>/playground+ to +writable+ and exposes it via
71
+ # {#temp}. The binary advertises this path to the LLM (e.g. in the
72
+ # system prompt) as scratch space. Default is +false+: specs and
73
+ # tests that build many workspaces don't pay the mkdir cost, and
74
+ # hosts that don't need a playground don't get one. Either way,
75
+ # the umbrella is shared with everything else that wants ephemeral
76
+ # state — no second tempdir is minted.
77
+ #
78
+ # == Optional /tmp alias
79
+ #
80
+ # When +alias_tmp_to_temp: true+ AND +temp:+ is set, file paths
81
+ # supplied to {#resolve_for_read} / {#resolve_for_write} that
82
+ # start with +/tmp/+ (or are exactly +/tmp+) are rewritten to the
83
+ # host {#temp} path before containment is checked. This is the
84
+ # host-side counterpart to the sandbox's +--bind <temp> /tmp+: bash
85
+ # inside the sandbox writes to +/tmp/foo+, then the file tools
86
+ # (which run on the host, not in the sandbox) accept the same
87
+ # +/tmp/foo+ path and resolve it to the workspace temp's host
88
+ # path. Without this, the LLM would have to remember two paths
89
+ # for the same dir. Off by default; +bin/pikuri-code+ flips it on
90
+ # when the bubblewrap sandbox is enabled.
91
+ #
92
+ # == Read-set vs. write-set
93
+ #
94
+ # {#resolve_for_read} checks against +readable + writable+ (you can
95
+ # read anything writable). {#resolve_for_write} checks against
96
+ # +writable+ only. Tools that mutate state route through the second
97
+ # method; tools that only inspect route through the first.
98
+ #
99
+ # == Existence is not the workspace's concern
100
+ #
101
+ # +resolve_for_read('foo.rb')+ succeeds (returns a +Pathname+) even if
102
+ # +foo.rb+ doesn't exist; the caller ({Read}) errors with
103
+ # file-not-found when it tries to open it. +resolve_for_write+
104
+ # tolerates entirely non-existent paths (Write can create
105
+ # +lib/new/dir/foo.rb+ even when +lib/new/+ doesn't exist) — the
106
+ # caller is responsible for any +mkdir_p+ before writing. This split
107
+ # keeps the workspace narrowly responsible for *containment*, not for
108
+ # filesystem-state checks.
109
+ #
110
+ # == Subprocess environment ({#env})
111
+ #
112
+ # Workspaces own a {#env} +Hash<String,String>+ that subprocess-
113
+ # spawning tools (currently {Pikuri::Code::Bash}) thread into
114
+ # {Pikuri::Subprocess.spawn}. The motivating case: the bubblewrap
115
+ # sandbox doesn't bind-mount +~/.gitconfig+, so +git commit+ inside
116
+ # fails (modern git refuses rather than synthesizing a default
117
+ # from +/etc/passwd+ — which isn't bind-mounted either). The
118
+ # workspace resolves the host's effective git identity at
119
+ # {#project_root} (so +includeIf+ rules apply: a repo under
120
+ # +~/work/my/+ that's covered by a +gitdir:~/work/my/+ include
121
+ # gets that identity, not the global default) and exposes it as
122
+ # +GIT_AUTHOR_*+ / +GIT_COMMITTER_*+ env vars that override config
123
+ # entirely and need no file in the sandbox.
124
+ #
125
+ # The lookup is *lazy* and *memoized*: the constructor doesn't
126
+ # shell out, so building a workspace stays cheap (specs that
127
+ # never read +#env+ pay nothing). First access runs +git -C
128
+ # project_root config user.{name,email}+ once. Falls back to +{}+
129
+ # if +git+ isn't on +PATH+ or no identity is configured — the
130
+ # subprocess then runs unmediated and git's own "please tell me
131
+ # who you are" surfaces inside the sandbox.
132
+ #
133
+ # Hosts that want a different env (extra vars, no git resolution,
134
+ # explicit identity) pass +env:+ to the constructor; that value
135
+ # is used verbatim and the git lookup is skipped. Always frozen
136
+ # by the time it leaves the workspace.
137
+ #
138
+ # == Containment algorithm
139
+ #
140
+ # {#resolve} walks up the input path to its deepest existing ancestor,
141
+ # +realpath+'s that ancestor (resolving any symlinks in the existing
142
+ # portion), then verifies the resolved base matches one of the
143
+ # candidate roots. Four cases:
144
+ #
145
+ # 1. +lib/foo.rb+ (exists) → +existing+ = full path, +base+ matches a
146
+ # root → returns the realpath'd file.
147
+ # 2. +lib/new/dir/foo.rb+ (intermediates missing) → walks up to the
148
+ # deepest existing parent inside a root → returns the intended new
149
+ # path (caller +mkdir_p+s the parent before writing).
150
+ # 3. +lib/../../etc/passwd+ (+..+ escape) → +cleanpath+ collapses +..+
151
+ # syntactically, walks land outside every root → {Error}.
152
+ # 4. +link/foo.rb+ where +link → /etc+ (symlink escape) → walks to
153
+ # +link+ (which exists), +realpath+ resolves through the symlink to
154
+ # +/etc+, outside every root → {Error}.
155
+ #
156
+ # Pure lexical normalization (+cleanpath+ + prefix check) catches cases
157
+ # 1–3 but misses case 4. The walk-up +realpath+ pass closes that gap.
158
+ #
159
+ # == Project-root denylist
160
+ #
161
+ # Setting +project_root+ to a system root or a user's home directory
162
+ # is almost always a misconfiguration: it makes the entire system or
163
+ # home tree writable. The constructor rejects {DENIED_PROJECT_ROOTS}
164
+ # (system tops: +/+, +/etc+, +/var+, …) and any directory whose
165
+ # parent is +/home+ (catches +/home/$USER+ and +/home/$OTHER_USER+).
166
+ # This is a sanity guard against fat-fingering, not a security
167
+ # perimeter — the real security is the +readable+/+writable+ lists.
168
+ # Other-OS home roots (+/Users/$USER+ on macOS) are not denied;
169
+ # Linux-first per +CLAUDE.md+.
170
+ class Filesystem
171
+ # Raised for any path that resolves outside the workspace. Recoverable
172
+ # at the tool layer — tools rescue this and emit +"Error: ..."+
173
+ # observations so the LLM can self-correct on the next turn. Also
174
+ # raised at construction time for a denied project root.
175
+ class Error < StandardError; end
176
+
177
+ # Parent directory under which every workspace mints its
178
+ # umbrella ({#internal_temp}). Honors +XDG_CACHE_HOME+ when set,
179
+ # else +~/.cache+; the +pikuri+ subdir is owned by us.
180
+ # +mkdir_p+'d lazily on first umbrella access.
181
+ CACHE_BASE = File.join(ENV['XDG_CACHE_HOME'] || File.join(Dir.home, '.cache'), 'pikuri')
182
+
183
+ # Umbrella dirs older than this are reaped by
184
+ # {.sweep_stale_internal_temps!} at gem load. Generous enough
185
+ # that a long-lived pikuri session in another shell isn't
186
+ # disturbed; tight enough that a process killed last week
187
+ # doesn't leak forever.
188
+ INTERNAL_TEMP_STALE_SECONDS = 7 * 24 * 60 * 60
189
+
190
+ # System-root project_roots the constructor refuses. Exact-match
191
+ # (not prefix) — +/home/user/project+ passes, +/home/user+ is
192
+ # rejected by the parent-is-/home check below. Frozen list;
193
+ # downstream hosts with unusual layouts can subclass if they
194
+ # really need a different policy.
195
+ DENIED_PROJECT_ROOTS = %w[
196
+ / /etc /var /proc /sys /dev /boot /root
197
+ /usr /opt /lib /lib64 /bin /sbin /tmp
198
+ ].map { |p| Pathname.new(p) }.freeze
199
+
200
+ # @return [Pathname] project root, post-realpath. The writable
201
+ # containment ceiling, the base for relative-path resolution,
202
+ # and the chdir target for Bash/Grep/Glob — always in
203
+ # {#readable} and {#writable}.
204
+ attr_reader :project_root
205
+
206
+ # @return [Array<Pathname>] read-only roots (in addition to writable
207
+ # ones, which are also readable). Post-realpath, deduped.
208
+ attr_reader :readable
209
+
210
+ # @return [Array<Pathname>] writable roots (read+write). Includes
211
+ # {#project_root} and, if +temp: true+, {#temp}. Post-realpath,
212
+ # deduped.
213
+ attr_reader :writable
214
+
215
+ # @return [Pathname, nil] the LLM-visible scratch playground
216
+ # (writable, owned by this workspace) when constructed with
217
+ # +temp: true+, else +nil+. Lives at +<internal_temp>/playground+;
218
+ # removed transitively when the umbrella is wiped on process exit.
219
+ attr_reader :temp
220
+
221
+ # @param project_root [String, Pathname] absolute (or working-
222
+ # directory-relative) path to the project root. +realpath+'d
223
+ # once; must exist; must not match {DENIED_PROJECT_ROOTS} or be
224
+ # a direct child of +/home+.
225
+ # @param readable [Array<String, Pathname>] additional read-only
226
+ # roots. +realpath+'d at construction; missing entries raise
227
+ # loudly via {Pathname#realpath}.
228
+ # @param writable [Array<String, Pathname>] additional read+write
229
+ # roots. Same treatment as +readable+.
230
+ # @param temp [Boolean] when +true+, adds
231
+ # +<internal_temp>/playground+ to {#writable} and exposes it
232
+ # via {#temp}. Forces the umbrella to mint up-front (the
233
+ # playground is created eagerly so {#writable} reflects it).
234
+ # @param alias_tmp_to_temp [Boolean] when +true+ AND +temp:+ is
235
+ # set, +/tmp/*+ paths supplied to {#resolve_for_read} /
236
+ # {#resolve_for_write} are rewritten to point at {#temp}.
237
+ # Pairs with the bubblewrap sandbox's +--bind <temp> /tmp+.
238
+ # @param env [Hash{String=>String}, nil] subprocess environment
239
+ # exposed via {#env}. +nil+ (default) → lazy-derive the host
240
+ # git identity from {#project_root} on first access; explicit
241
+ # hash → use verbatim (and skip the git lookup entirely). See
242
+ # the class header §"Subprocess environment" for the
243
+ # rationale.
244
+ # @raise [Errno::ENOENT] if +project_root+ or any
245
+ # +readable+/+writable+ entry does not exist.
246
+ # @raise [Error] if +project_root+ is denied (system root or
247
+ # +/home/*+).
248
+ def initialize(project_root:, readable: [], writable: [], temp: false, alias_tmp_to_temp: false, env: nil)
249
+ @project_root = Pathname.new(project_root).realpath
250
+ validate_project_root!(@project_root)
251
+
252
+ @internal_temp = nil
253
+ @temp = temp ? mint_playground : nil
254
+ @alias_tmp_to_temp = alias_tmp_to_temp && !@temp.nil?
255
+ @env_override = env
256
+
257
+ @writable = ([@project_root] + writable.map { |p| Pathname.new(p).realpath } + [@temp].compact).uniq
258
+ @readable = (@writable + readable.map { |p| Pathname.new(p).realpath }).uniq
259
+ end
260
+
261
+ # @return [Boolean] whether {#resolve_for_read} / {#resolve_for_write}
262
+ # rewrite +/tmp/*+ inputs to {#temp}.
263
+ attr_reader :alias_tmp_to_temp
264
+
265
+ # Environment variables for subprocesses spawned in this
266
+ # workspace. Lazy, memoized, frozen.
267
+ #
268
+ # When the constructor received +env: nil+ (the default), the
269
+ # first call here runs +git -C project_root config user.name+ +
270
+ # +user.email+ once and returns +GIT_AUTHOR_*+ / +GIT_COMMITTER_*+
271
+ # accordingly. When the constructor received an explicit hash,
272
+ # this returns that hash (frozen) and never shells out. Returns
273
+ # +{}+ if git resolution finds no identity for {#project_root}
274
+ # or +git+ isn't on +PATH+.
275
+ #
276
+ # @return [Hash{String=>String}]
277
+ def env
278
+ @env ||= (@env_override || compute_git_identity_env).freeze
279
+ end
280
+
281
+ # Per-workspace ephemeral umbrella. Minted lazily on first call
282
+ # under {CACHE_BASE}. Registered for +at_exit+ removal the
283
+ # moment it's minted, so anything subsequently placed inside
284
+ # (the playground, {Pikuri::Code::Bash::Sandbox::Bubblewrap}'s
285
+ # overlay state) gets wiped together. Callers that want
286
+ # ephemeral state owned by the workspace should put it under
287
+ # this dir rather than minting their own siblings.
288
+ #
289
+ # @return [Pathname]
290
+ def internal_temp
291
+ @internal_temp ||= Filesystem.mint_internal_temp
292
+ end
293
+
294
+ # @api private — minting helper shared with {AllowAll}. The
295
+ # +FileUtils.remove_entry+ +at_exit+ guards against the dir
296
+ # being already gone (test cleanup, manual rm).
297
+ def self.mint_internal_temp
298
+ FileUtils.mkdir_p(CACHE_BASE)
299
+ path = Pathname.new(Dir.mktmpdir('workspace-', CACHE_BASE)).realpath
300
+ at_exit { FileUtils.remove_entry(path.to_s) if path.exist? }
301
+ path
302
+ end
303
+
304
+ # Reap +workspace-*+ umbrella dirs that have outlived
305
+ # {INTERNAL_TEMP_STALE_SECONDS}. Called once at gem load via
306
+ # {Pikuri::Workspace} so each process boot inherits a tidy
307
+ # {CACHE_BASE}. Failures (permission denied, racing concurrent
308
+ # sweeper) are swallowed — best-effort cleanup, the real
309
+ # +at_exit+ path is the load-bearing one.
310
+ #
311
+ # @return [void]
312
+ def self.sweep_stale_internal_temps!
313
+ return unless File.directory?(CACHE_BASE)
314
+
315
+ cutoff = Time.now - INTERNAL_TEMP_STALE_SECONDS
316
+ Dir.children(CACHE_BASE).each do |entry|
317
+ next unless entry.start_with?('workspace-')
318
+ path = File.join(CACHE_BASE, entry)
319
+ next unless File.directory?(path)
320
+ next if File.mtime(path) > cutoff
321
+
322
+ FileUtils.remove_entry(path)
323
+ rescue StandardError
324
+ # best-effort sweep; never block the host on dead state
325
+ end
326
+ end
327
+
328
+ # Resolve a user-supplied path against the read-set (readable ∪
329
+ # writable). Returned Pathname is absolute and may not exist on
330
+ # disk; the caller validates existence separately.
331
+ #
332
+ # @param path [String]
333
+ # @return [Pathname]
334
+ # @raise [Error] if the resolved path falls outside every root
335
+ def resolve_for_read(path)
336
+ resolve(path, @readable)
337
+ end
338
+
339
+ # Resolve a user-supplied path against the write-set.
340
+ #
341
+ # @param path [String]
342
+ # @return [Pathname]
343
+ # @raise [Error] if the resolved path falls outside every writable root
344
+ def resolve_for_write(path)
345
+ resolve(path, @writable)
346
+ end
347
+
348
+ private
349
+
350
+ # Eager-create +<internal_temp>/playground+. Used only when
351
+ # +temp: true+; touches {#internal_temp} so the umbrella is
352
+ # minted now (the playground needs a parent dir to live under
353
+ # AND has to be in {#writable} by the end of +initialize+).
354
+ def mint_playground
355
+ path = internal_temp + 'playground'
356
+ FileUtils.mkdir_p(path)
357
+ path.realpath
358
+ end
359
+
360
+ def validate_project_root!(canonical)
361
+ if DENIED_PROJECT_ROOTS.include?(canonical) || canonical.parent.to_s == '/home'
362
+ raise Error,
363
+ "project_root '#{canonical}' is a system or home root; the workspace " \
364
+ 'project_root must be a project subdirectory (a system or home root would expose ' \
365
+ 'the entire tree as writable).'
366
+ end
367
+ end
368
+
369
+ # See the class header for the algorithm rationale.
370
+ def resolve(path, roots)
371
+ path = apply_tmp_alias(path)
372
+ pn = Pathname.new(path)
373
+ pn = @project_root + pn unless pn.absolute?
374
+ cleaned = pn.cleanpath
375
+
376
+ existing = cleaned
377
+ existing = existing.parent until existing.exist? || existing.parent == existing
378
+ base = existing.realpath
379
+
380
+ matched = roots.find { |r| base == r || base.to_s.start_with?(r.to_s + File::SEPARATOR) }
381
+ unless matched
382
+ raise Error, "path '#{path}' is outside the workspace " \
383
+ "(roots: #{roots.map(&:to_s).join(', ')})"
384
+ end
385
+
386
+ base + cleaned.relative_path_from(existing)
387
+ end
388
+
389
+ # Resolve the host's effective git identity for {#project_root}
390
+ # via +git -C project_root config+ — runs git's full chain
391
+ # (system → global → includeIf → per-repo), so what comes back
392
+ # is what +git commit+ would attribute to on the host outside
393
+ # the sandbox. +Errno::ENOENT+ (no +git+ on +PATH+) and a
394
+ # non-zero exit (no value configured) both fall back to a nil
395
+ # entry; an empty identity field is treated the same. Result:
396
+ # either the full four-entry hash or +{}+.
397
+ def compute_git_identity_env
398
+ name = git_config('user.name')
399
+ email = git_config('user.email')
400
+ return {} if name.nil? || name.empty? || email.nil? || email.empty?
401
+
402
+ {
403
+ 'GIT_AUTHOR_NAME' => name,
404
+ 'GIT_AUTHOR_EMAIL' => email,
405
+ 'GIT_COMMITTER_NAME' => name,
406
+ 'GIT_COMMITTER_EMAIL' => email
407
+ }
408
+ end
409
+
410
+ def git_config(key)
411
+ result = Pikuri::Subprocess.spawn(
412
+ 'git', '-C', @project_root.to_s, 'config', key,
413
+ chdir: @project_root.to_s
414
+ ).wait
415
+ result.status.success? ? result.output.strip : nil
416
+ rescue Errno::ENOENT
417
+ nil
418
+ end
419
+
420
+ # Rewrite +/tmp+ / +/tmp/foo+ inputs to land under {#temp} when
421
+ # {#alias_tmp_to_temp} is set. Other inputs pass through. Lives
422
+ # in the base class so {AllowAll} inherits identical behavior.
423
+ def apply_tmp_alias(path)
424
+ return path unless @alias_tmp_to_temp
425
+ return @temp.to_s if path == '/tmp'
426
+ return File.join(@temp.to_s, path[5..]) if path.start_with?('/tmp/')
427
+
428
+ path
429
+ end
430
+
431
+ # Unrestricted variant of {Filesystem}: every path resolves, with one
432
+ # carve-out — paths under {CREDENTIAL_DENYLIST} (the user's +~/.ssh+,
433
+ # +~/.aws+, +~/.gnupg+, +~/.docker+, +~/.kube+, +~/.netrc+, and
434
+ # +/etc/shadow+) still raise. Intended for dev-container / Docker
435
+ # mode where the container is the security boundary; the denylist
436
+ # remains as defense-in-depth against prompt-injection exfiltration
437
+ # of credentials that are commonly bind-mounted into a container
438
+ # from the host. Pairs naturally with +--yolo+ (no Confirmer
439
+ # prompt) — but note that combining with
440
+ # +Code::Bash::Sandbox::Bubblewrap+ defeats the sandbox (the
441
+ # whole filesystem ends up bind-mounted in), and bash inside
442
+ # the sandbox bypasses the denylist anyway, so the intended
443
+ # combo is ALLOW_ALL + +Sandbox::NONE+ inside a container.
444
+ #
445
+ # == Project root, temp
446
+ #
447
+ # +project_root:+ is still accepted for parity with {Filesystem},
448
+ # even though it's not a real containment ceiling under AllowAll.
449
+ # It is NOT validated against {DENIED_PROJECT_ROOTS} (passing
450
+ # +'/'+ is legitimate inside a container). It still serves as the
451
+ # base for relative-path resolution and the chdir target tools
452
+ # like Bash use. +temp:+ behaves the same way as {Filesystem}.
453
+ #
454
+ # == Denylist semantics
455
+ #
456
+ # Each entry is realpath'd lazily at check time when it exists
457
+ # (so a symlink at e.g. +/tmp/decoy → ~/.ssh/id_rsa+ still gets
458
+ # caught — the resolved path lands under +~/.ssh+). Non-existent
459
+ # entries fall back to a literal-prefix match — blocks the agent
460
+ # from *creating* a credential dir the user doesn't have yet (e.g.
461
+ # planting +~/.gnupg+ for a later GPG operation to pick up). The
462
+ # list is hardcoded; subclass for a host with a different policy.
463
+ class AllowAll < Filesystem
464
+ # Credential locations that even ALLOW_ALL refuses. Expanded
465
+ # via +ENV['HOME']+ at class-load time and unioned with the
466
+ # +/root+ variants (dev-container default user). Frozen.
467
+ CREDENTIAL_DENYLIST = begin
468
+ homes = [ENV.fetch('HOME', nil), '/root'].compact.uniq
469
+ per_home = %w[.ssh .aws .gnupg .docker .kube .netrc]
470
+ user_paths = homes.flat_map { |h| per_home.map { |p| File.join(h, p) } }
471
+ (user_paths + %w[/etc/shadow]).map { |p| Pathname.new(p) }.uniq.freeze
472
+ end
473
+
474
+ # @param project_root [String, Pathname] surface-level project
475
+ # root. Not validated against {DENIED_PROJECT_ROOTS}.
476
+ # @param temp [Boolean] same semantics as {Filesystem}.
477
+ # @param alias_tmp_to_temp [Boolean] same semantics as
478
+ # {Filesystem} — when +true+ AND +temp:+ is set, +/tmp/*+
479
+ # inputs rewrite to land under {#temp}.
480
+ # @param env [Hash{String=>String}, nil] same semantics as
481
+ # {Filesystem}; see {Filesystem#env}.
482
+ def initialize(project_root: Dir.pwd, temp: false, alias_tmp_to_temp: false, env: nil)
483
+ @project_root = Pathname.new(project_root).realpath
484
+
485
+ @internal_temp = nil
486
+ @temp = temp ? mint_playground : nil
487
+ @alias_tmp_to_temp = alias_tmp_to_temp && !@temp.nil?
488
+ @env_override = env
489
+
490
+ # Advertise "everything is in scope" via the accessor surface so
491
+ # callers that inspect +readable+/+writable+ (system-prompt
492
+ # rendering, Bubblewrap bind-mount construction) see the
493
+ # intended semantics. The actual containment is the denylist
494
+ # in {#resolve_for_read}/{#resolve_for_write}.
495
+ @writable = [Pathname.new('/').realpath, @temp].compact.uniq
496
+ @readable = @writable.dup
497
+ end
498
+
499
+ # @param path [String]
500
+ # @return [Pathname]
501
+ # @raise [Error] if the resolved path lands under {CREDENTIAL_DENYLIST}.
502
+ def resolve_for_read(path)
503
+ resolve_unrestricted(path)
504
+ end
505
+
506
+ # @param path [String]
507
+ # @return [Pathname]
508
+ # @raise [Error] if the resolved path lands under {CREDENTIAL_DENYLIST}.
509
+ def resolve_for_write(path)
510
+ resolve_unrestricted(path)
511
+ end
512
+
513
+ private
514
+
515
+ def resolve_unrestricted(path)
516
+ path = apply_tmp_alias(path)
517
+ pn = Pathname.new(path)
518
+ pn = @project_root + pn unless pn.absolute?
519
+ cleaned = pn.cleanpath
520
+
521
+ existing = cleaned
522
+ existing = existing.parent until existing.exist? || existing.parent == existing
523
+ base = existing.realpath
524
+ resolved = base + cleaned.relative_path_from(existing)
525
+
526
+ CREDENTIAL_DENYLIST.each do |denied|
527
+ candidate = denied.exist? ? denied.realpath : denied
528
+ next unless resolved == candidate ||
529
+ resolved.to_s.start_with?(candidate.to_s + File::SEPARATOR)
530
+
531
+ raise Error,
532
+ "path '#{path}' resolves under a denied credential location " \
533
+ "('#{denied}') — even in ALLOW_ALL mode, ~/.ssh / ~/.aws / " \
534
+ '~/.gnupg / ~/.docker / ~/.kube / ~/.netrc / /etc/shadow ' \
535
+ 'are off-limits.'
536
+ end
537
+
538
+ resolved
539
+ end
540
+ end
541
+ end
542
+ end
543
+ end
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pikuri
4
- class Tool
4
+ module Workspace
5
5
  # The +glob+ tool — list files matching a glob pattern via
6
6
  # +rg --files+, sorted by modification time (newest first).
7
- # Instantiating +Tool::Glob.new(workspace: ws)+ produces a tool
8
- # whose {Tool#to_ruby_llm_tool} wiring is identical to any bundled
9
- # tool's. Same shape as {Tool::Grep} (workspace captured by the
7
+ # Instantiating +Glob.new(workspace: ws)+ produces a tool
8
+ # whose {Pikuri::Tool#to_ruby_llm_tool} wiring is identical to any
9
+ # bundled tool's. Same shape as {Grep} (workspace captured by the
10
10
  # +execute+ closure, no confirmer — read-only).
11
11
  #
12
12
  # == Why a separate tool from Grep
@@ -63,7 +63,7 @@ module Pikuri
63
63
  # == Truncation
64
64
  #
65
65
  # Total output head-truncated to {MAX_BYTES} *after* mtime sort, so
66
- # the kept rows are the newest. Matches {Tool::Grep}'s budget and
66
+ # the kept rows are the newest. Matches {Grep}'s budget and
67
67
  # head-bias.
68
68
  #
69
69
  # == Exit codes
@@ -82,10 +82,10 @@ module Pikuri
82
82
  # tool.
83
83
  # * +path+ not found → +"Error: path not found: <path>"+.
84
84
  # * +path+ outside the workspace → caught from
85
- # {Tool::Workspace::Error}.
86
- class Glob < Tool
85
+ # {Filesystem::Error}.
86
+ class Glob < Pikuri::Tool
87
87
  # @return [Integer] hard byte cap on combined rg output. Same
88
- # value as {Tool::Grep::MAX_BYTES} so the two file-touching
88
+ # value as {Grep::MAX_BYTES} so the two file-touching
89
89
  # tools share a budget shape. Re-declared here rather than
90
90
  # referenced cross-file because Zeitwerk's eager-load order
91
91
  # isn't guaranteed between siblings.
@@ -112,7 +112,7 @@ module Pikuri
112
112
  - Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
113
113
  DESC
114
114
 
115
- # @param workspace [Tool::Workspace] captured for path resolution
115
+ # @param workspace [Filesystem] captured for path resolution
116
116
  # and as +chdir+ for rg. All path arguments route through
117
117
  # +workspace.resolve_for_read+.
118
118
  # @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
@@ -139,12 +139,24 @@ module Pikuri
139
139
  )
140
140
  end
141
141
 
142
+ # Produce a new {Glob} bound to +workspace+. Used by
143
+ # {Pikuri::SubAgent::SubAgentTool} when a persona supplies a
144
+ # +workspace_factory:+ — the parent's instance is rebuilt for
145
+ # the sub-agent's fresh workspace so paths resolve against the
146
+ # right root.
147
+ #
148
+ # @param workspace [Filesystem]
149
+ # @return [Glob]
150
+ def with_workspace(workspace)
151
+ self.class.new(workspace: workspace)
152
+ end
153
+
142
154
  # Validate inputs, resolve the path against the workspace, spawn
143
155
  # rg, mtime-sort, head-truncate, render. Returns either the
144
156
  # formatted listing, a "no files match" message, or
145
157
  # +"Error: ..."+.
146
158
  #
147
- # @param workspace [Tool::Workspace]
159
+ # @param workspace [Filesystem]
148
160
  # @param pattern [String]
149
161
  # @param path [String, nil]
150
162
  # @return [String]
@@ -159,12 +171,12 @@ module Pikuri
159
171
  return "Error: #{path} is a file, not a directory; use the read tool to view it."
160
172
  end
161
173
 
162
- rel = resolved.relative_path_from(workspace.cwd).to_s
174
+ rel = resolved.relative_path_from(workspace.project_root).to_s
163
175
  search_target = rel
164
176
  end
165
177
 
166
178
  argv = build_argv(path: search_target)
167
- result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
179
+ result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.project_root.to_s).wait
168
180
  exit_code = result.status.exitstatus
169
181
 
170
182
  case exit_code
@@ -178,7 +190,7 @@ module Pikuri
178
190
  stderr = "exited #{exit_code}" if stderr.empty?
179
191
  "Error: ripgrep: #{stderr}"
180
192
  end
181
- rescue Tool::Workspace::Error => e
193
+ rescue Filesystem::Error => e
182
194
  "Error: #{e.message}"
183
195
  end
184
196
 
@@ -217,7 +229,7 @@ module Pikuri
217
229
  paths = all_paths.select { |p| File.fnmatch?(pattern, p, FNMATCH_FLAGS) }
218
230
  return no_match_message(pattern: pattern, path: path) if paths.empty?
219
231
 
220
- sorted = mtime_sort(paths, workspace.cwd)
232
+ sorted = mtime_sort(paths, workspace.project_root)
221
233
  joined = sorted.join("\n") + "\n"
222
234
  content, truncation_marker = head_truncate(joined)
223
235
  stripped = content.chomp
@@ -231,9 +243,9 @@ module Pikuri
231
243
  # mtime descending; path ascending for stable order on ties.
232
244
  #
233
245
  # @return [Array<String>]
234
- def self.mtime_sort(paths, cwd)
246
+ def self.mtime_sort(paths, base)
235
247
  paths
236
- .map { |p| [p, mtime_of(cwd + p)] }
248
+ .map { |p| [p, mtime_of(base + p)] }
237
249
  .sort_by { |(p, m)| [-m, p] }
238
250
  .map(&:first)
239
251
  end
@@ -301,7 +313,7 @@ module Pikuri
301
313
 
302
314
  # @return [String]
303
315
  def self.install_hint
304
- "Tool::Glob requires 'rg' (ripgrep) on PATH; install via your " \
316
+ "Glob requires 'rg' (ripgrep) on PATH; install via your " \
305
317
  "distro's package manager (e.g. 'apt install ripgrep')."
306
318
  end
307
319
  private_class_method :install_hint