pikuri-workspace 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4be40e803f487eb130b8ee758b86efb7a744c92691a1dd103aae68d91e59a2f9
4
- data.tar.gz: 981478770e9e7f8acef1fb1ca2b1896cf6a25c7eb73c01c1d57592f81583cf0f
3
+ metadata.gz: afd83aa622997eea8b09c700282cf401753d1925ace04a0c4ade3856c07ee8b4
4
+ data.tar.gz: 4cd46c7de8d42112e1119e68bab0ce7937ed9e5c270a29ceb34e5089178abd4c
5
5
  SHA512:
6
- metadata.gz: a211df218e4270acbe737f624cc8a77cc145d8a051c3500131036692168996c800a0110a27bd714a7cad3fb5cb9dd3e25fdfdeefb917fb73ccead021cd807c94
7
- data.tar.gz: fe2ff7510012d619caa166252ccef80148750e35fa58a68cd1722cec372758dc17e5605cbe380c4f106538b301134402da32e0fb13c6f23d6acec7c4aceb301e
6
+ metadata.gz: c818b87a2aca2f0f615d084cc7c6e0457d0a84e5cd5be8b061c36aba50a071ad2565c27170d33ae25ca810dee8a25b16b1a13b25477b21e1c00eb79d067f2bb0
7
+ data.tar.gz: 7277b1ba878546589d4107136f4d49e3dc68b0ee5aff80d3eeff8e06f9d97b4fa569eb8caf186337e39c72e9f33812e3963fd433eeb35f926f82bb6b681bb579
@@ -37,9 +37,9 @@ module Pikuri
37
37
  # +~/.cache/pikuri/workspace-XXX/+ ({#internal_temp}). It is minted
38
38
  # lazily on first access — workspaces that never touch it (most
39
39
  # specs, hosts that don't want a playground and don't use the
40
- # bubblewrap overlay) pay nothing — and removed by a single
41
- # +at_exit+ handler when the process exits. Everything ephemeral
42
- # this workspace produces lives inside the umbrella, so one
40
+ # bubblewrap overlay) pay nothing — and removed at process exit via
41
+ # a {Pikuri::Finalizers} registration. Everything ephemeral this
42
+ # workspace produces lives inside the umbrella, so one
43
43
  # +remove_entry+ at process exit cleans the lot:
44
44
  #
45
45
  # * {#temp} — the LLM-visible playground subdir, present only when
@@ -60,9 +60,9 @@ module Pikuri
60
60
  #
61
61
  # At gem load, {.sweep_stale_internal_temps!} prunes umbrella dirs
62
62
  # older than seven days — a safety net for sessions that died
63
- # before +at_exit+ could run (SIGKILL, OOM). Recent umbrellas are
64
- # left alone so a concurrent pikuri-code in another shell isn't
65
- # disturbed.
63
+ # before the {Pikuri::Finalizers} sweep could run (SIGKILL, OOM).
64
+ # Recent umbrellas are left alone so a concurrent pikuri-code in
65
+ # another shell isn't disturbed.
66
66
  #
67
67
  # == Optional temp playground
68
68
  #
@@ -279,8 +279,8 @@ module Pikuri
279
279
  end
280
280
 
281
281
  # Per-workspace ephemeral umbrella. Minted lazily on first call
282
- # under {CACHE_BASE}. Registered for +at_exit+ removal the
283
- # moment it's minted, so anything subsequently placed inside
282
+ # under {CACHE_BASE}. Registered with {Pikuri::Finalizers} for
283
+ # removal the moment it's minted, so anything subsequently placed inside
284
284
  # (the playground, {Pikuri::Code::Bash::Sandbox::Bubblewrap}'s
285
285
  # overlay state) gets wiped together. Callers that want
286
286
  # ephemeral state owned by the workspace should put it under
@@ -291,13 +291,16 @@ module Pikuri
291
291
  @internal_temp ||= Filesystem.mint_internal_temp
292
292
  end
293
293
 
294
- # @api private — minting helper shared with {AllowAll}. The
295
- # +FileUtils.remove_entry+ +at_exit+ guards against the dir
296
- # being already gone (test cleanup, manual rm).
294
+ # @api private — minting helper shared with {AllowAll}.
295
+ # Registers umbrella removal with {Pikuri::Finalizers} (block
296
+ # form the umbrella is a dir to wipe, not a closeable object)
297
+ # rather than its own +at_exit+, so process teardown stays on the
298
+ # one registry. The +path.exist?+ guard makes the removal a no-op
299
+ # when the dir is already gone (test cleanup, manual rm).
297
300
  def self.mint_internal_temp
298
301
  FileUtils.mkdir_p(CACHE_BASE)
299
302
  path = Pathname.new(Dir.mktmpdir('workspace-', CACHE_BASE)).realpath
300
- at_exit { FileUtils.remove_entry(path.to_s) if path.exist? }
303
+ Pikuri::Finalizers.register { FileUtils.remove_entry(path.to_s) if path.exist? }
301
304
  path
302
305
  end
303
306
 
@@ -305,8 +308,8 @@ module Pikuri
305
308
  # {INTERNAL_TEMP_STALE_SECONDS}. Called once at gem load via
306
309
  # {Pikuri::Workspace} so each process boot inherits a tidy
307
310
  # {CACHE_BASE}. Failures (permission denied, racing concurrent
308
- # sweeper) are swallowed — best-effort cleanup, the real
309
- # +at_exit+ path is the load-bearing one.
311
+ # sweeper) are swallowed — best-effort cleanup; the
312
+ # {Pikuri::Finalizers} removal is the load-bearing path.
310
313
  #
311
314
  # @return [void]
312
315
  def self.sweep_stale_internal_temps!
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'pdf-reader'
4
3
  require 'ruby_llm'
5
4
 
6
5
  module Pikuri
@@ -26,7 +25,11 @@ module Pikuri
26
25
  #
27
26
  # == Truncation rules
28
27
  #
29
- # Two independent limits, whichever fires first wins:
28
+ # The line/byte windowing is delegated to
29
+ # {Pikuri::FileType.read_as_text_paged}, which returns a
30
+ # {Pikuri::FileType::Page} this tool renders; the same windower
31
+ # backs +VectorDb::Tools::Read+. Two independent limits, whichever fires
32
+ # first wins:
30
33
  #
31
34
  # * *Line limit* — {DEFAULT_LIMIT} lines (overridable via +limit+).
32
35
  # * *Byte cap* — {MAX_BYTES} bytes of input content; not exposed as a
@@ -34,19 +37,22 @@ module Pikuri
34
37
  #
35
38
  # Additionally, individual lines longer than {MAX_LINE_LENGTH} chars
36
39
  # are truncated with {LINE_TRUNCATION_MARKER} appended; the model is
37
- # told to reach for +grep+ to find content inside such files.
40
+ # told to reach for +grep+ to find content inside such files. (These
41
+ # constants alias the +PAGE_*+ ones on {Pikuri::FileType} — one
42
+ # source of truth, shared with +VectorDb::Tools::Read+.)
38
43
  #
39
44
  # == PDF extraction
40
45
  #
41
46
  # PDFs are detected by their +%PDF-+ magic prefix in the sample bytes
42
- # and routed to {.format_pdf} instead of the binary-refusal path. The
43
- # extractor walks pages lazily via +pdf-reader+, emitting one synthetic
44
- # +"--- Page N ---"+ header line per page followed by that page's text.
45
- # The offset / limit / MAX_BYTES contract is identical to the text
46
- # path extraction stops as soon as the line or byte cap is hit, so
47
- # reading the first window of a 500-page PDF only parses the few pages
48
- # needed. Line numbers in PDF output are for citation back to the user
49
- # only; PDFs are not editable through {Edit}.
47
+ # and routed through {Pikuri::FileType.read_as_text_paged} instead of
48
+ # the binary-refusal path. The extractor walks pages lazily via
49
+ # +pdf-reader+, emitting one synthetic +"--- Page N ---"+ header line
50
+ # per page followed by that page's text. The offset / limit /
51
+ # MAX_BYTES contract is identical to the text path extraction stops
52
+ # as soon as the line or byte cap is hit, so reading the first window
53
+ # of a 500-page PDF only parses the few pages needed. Line numbers in
54
+ # PDF output are for citation back to the user only; PDFs are not
55
+ # editable through {Edit}.
50
56
  #
51
57
  # PDFs with no extractable text (scanned images, empty documents) come
52
58
  # back with an LLM-actionable hint string rather than an empty
@@ -99,23 +105,27 @@ module Pikuri
99
105
  # above.
100
106
  # * Offset past EOF → +"Error: offset N is beyond end of file (M lines total)"+.
101
107
  class Read < Pikuri::Tool
108
+ # The windowing constants live on {Pikuri::FileType} now (shared
109
+ # with +VectorDb::Tools::Read+); these aliases keep the names this tool's
110
+ # description and specs reference pointing at the single source.
111
+
102
112
  # @return [Integer] default value of the +limit+ parameter (number
103
113
  # of lines to read per call).
104
- DEFAULT_LIMIT = 2000
114
+ DEFAULT_LIMIT = Pikuri::FileType::PAGE_DEFAULT_LIMIT
105
115
 
106
116
  # @return [Integer] per-line character cap; longer lines are
107
117
  # truncated with {LINE_TRUNCATION_MARKER}.
108
- MAX_LINE_LENGTH = 2000
118
+ MAX_LINE_LENGTH = Pikuri::FileType::PAGE_MAX_LINE_LENGTH
109
119
 
110
120
  # @return [String] suffix appended to lines truncated by
111
121
  # {MAX_LINE_LENGTH}.
112
- LINE_TRUNCATION_MARKER = "... (line truncated to #{MAX_LINE_LENGTH} chars)"
122
+ LINE_TRUNCATION_MARKER = Pikuri::FileType::PAGE_LINE_TRUNCATION_MARKER
113
123
 
114
124
  # @return [Integer] hard byte cap on input content collected per
115
125
  # call. Counted on the line bytes (plus one for the joining
116
126
  # newline); the rendered output is slightly larger due to the
117
127
  # per-line +"%6d\t"+ prefix.
118
- MAX_BYTES = 50 * 1024
128
+ MAX_BYTES = Pikuri::FileType::PAGE_MAX_BYTES
119
129
 
120
130
  # @return [String] human-readable form of {MAX_BYTES} for the
121
131
  # continuation marker.
@@ -210,168 +220,77 @@ module Pikuri
210
220
  return "Error: #{path} is a directory; use the glob tool to list files." if resolved.directory?
211
221
 
212
222
  mime = Pikuri::FileType.detect_mime(resolved)
213
-
214
- return format_pdf(path: path, resolved: resolved, offset: offset, limit: limit) if mime == 'application/pdf'
215
223
  return format_image(path: path, resolved: resolved, mime: mime) if mime&.start_with?('image/')
216
- return "Error: cannot read binary file: #{path}" if Pikuri::FileType.binary?(resolved)
224
+ # PDFs are binary by the heuristic, so the PDF route (handled
225
+ # inside read_as_text_paged) must win over the binary refusal.
226
+ if mime != 'application/pdf' && Pikuri::FileType.binary?(resolved)
227
+ return "Error: cannot read binary file: #{path}"
228
+ end
217
229
 
218
- format_slice(path: path, resolved: resolved, offset: offset, limit: limit)
230
+ page = Pikuri::FileType.read_as_text_paged(
231
+ resolved, offset: offset, limit: limit,
232
+ max_bytes: MAX_BYTES, max_line_length: MAX_LINE_LENGTH
233
+ )
234
+ render_page(page)
219
235
  rescue Filesystem::Error => e
220
236
  "Error: #{e.message}"
221
237
  rescue Errno::EACCES => e
222
238
  "Error: cannot read #{path}: #{e.message}"
239
+ rescue RuntimeError => e
240
+ # Malformed / unsupported PDF surfaced by read_as_text_paged.
241
+ "Error: #{e.message}"
223
242
  end
224
243
 
225
- # Stream the file line-by-line, collect at most +limit+ lines
226
- # starting at +offset+, and stop early if {MAX_BYTES} is reached.
227
- # We keep counting lines past the collection window so the trailer
228
- # can report total line count when the line limit (not the byte
229
- # cap) was the stopping criterion same trick opencode uses.
244
+ # Render a {Pikuri::FileType::Page} as the cat-n observation: a
245
+ # six-column line number, a tab, then the (already-truncated)
246
+ # content, followed by a trailer that tells the model whether to
247
+ # page on. PDF pages carry +"--- Page N ---"+ marker lines from
248
+ # the extractor; the +kind+ only changes trailer wording here.
230
249
  #
250
+ # @param page [Pikuri::FileType::Page]
231
251
  # @return [String]
232
- def self.format_slice(path:, resolved:, offset:, limit:)
233
- start_index = offset - 1
234
- collected = []
235
- total_lines = 0
236
- bytes = 0
237
- byte_cap_hit = false
238
- has_more = false
252
+ def self.render_page(page)
253
+ if page.lines.empty?
254
+ return empty_message(page) if page.total_lines.zero?
239
255
 
240
- resolved.each_line do |raw|
241
- total_lines += 1
242
- next if total_lines <= start_index
243
-
244
- if collected.length >= limit
245
- has_more = true
246
- next
247
- end
248
-
249
- line = raw.chomp
250
- if line.length > MAX_LINE_LENGTH
251
- line = line[0, MAX_LINE_LENGTH] + LINE_TRUNCATION_MARKER
252
- end
253
-
254
- size = line.bytesize + 1 # +1 for the joining newline
255
- if bytes + size > MAX_BYTES
256
- byte_cap_hit = true
257
- has_more = true
258
- break
259
- end
260
-
261
- collected << line
262
- bytes += size
263
- end
264
-
265
- return '(Empty file)' if total_lines.zero?
266
-
267
- if start_index >= total_lines
268
- return "Error: offset #{offset} is beyond end of file (#{total_lines} lines total)"
256
+ return "Error: offset #{page.start_line} is beyond end of file " \
257
+ "(#{page.total_lines} lines total)"
269
258
  end
270
259
 
271
- last_line = offset + collected.length - 1
272
- body = collected.each_with_index.map { |line, i| format("%6d\t%s", i + offset, line) }.join("\n")
260
+ noun = page.kind == :pdf ? 'PDF ' : ''
261
+ last = page.start_line + page.lines.length - 1
262
+ body = page.lines.each_with_index.map { |line, i| format("%6d\t%s", i + page.start_line, line) }.join("\n")
273
263
 
274
264
  trailer =
275
- if byte_cap_hit
276
- "(Output capped at #{MAX_BYTES_LABEL}. Showing lines #{offset}-#{last_line}. " \
277
- "Use offset=#{last_line + 1} to continue.)"
278
- elsif has_more
279
- "(Showing lines #{offset}-#{last_line} of #{total_lines}. " \
280
- "Use offset=#{last_line + 1} to continue.)"
265
+ if page.byte_capped
266
+ "(Output capped at #{MAX_BYTES_LABEL}. Showing #{noun}lines #{page.start_line}-#{last}. " \
267
+ "Use offset=#{last + 1} to continue.)"
268
+ elsif page.more
269
+ total = page.total_lines ? " of #{page.total_lines}" : ''
270
+ "(Showing #{noun}lines #{page.start_line}-#{last}#{total}. " \
271
+ "Use offset=#{last + 1} to continue.)"
281
272
  else
282
- "(End of file - total #{total_lines} lines)"
273
+ "(End of #{page.kind == :pdf ? 'PDF' : 'file'} - total #{page.total_lines} lines)"
283
274
  end
284
275
 
285
276
  "#{body}\n\n#{trailer}"
286
277
  end
287
- private_class_method :format_slice
278
+ private_class_method :render_page
288
279
 
289
- # PDF counterpart to {.format_slice}: walk +pdf-reader+'s lazy page
290
- # iterator, emit a +"--- Page N ---"+ header followed by each line
291
- # of the page's extracted text, and apply the same offset / limit /
292
- # MAX_BYTES contract. Stops parsing as soon as the cap is hit so a
293
- # 500-page PDF only touches the few pages needed for the requested
294
- # window — the file handle stays open inside the +throw :done+
295
- # block, which short-circuits both the inner line loop and the
296
- # outer page loop in one move.
297
- #
298
- # The +has_more+ trailer here cannot quote a total line count the
299
- # way text files do — we'd have to parse every page just to count.
300
- # Instead we drop the "of N lines" claim and stick to the
301
- # next-offset hint, which is all the model needs to page.
280
+ # The empty-document message, worded by content kind: a scanned /
281
+ # text-free PDF gets an LLM-actionable hint rather than the
282
+ # plain-file "(Empty file)".
302
283
  #
284
+ # @param page [Pikuri::FileType::Page]
303
285
  # @return [String]
304
- def self.format_pdf(path:, resolved:, offset:, limit:)
305
- start_index = offset - 1
306
- collected = []
307
- total_lines = 0
308
- bytes = 0
309
- byte_cap_hit = false
310
- has_more = false
311
-
312
- catch(:done) do
313
- resolved.open('rb') do |io|
314
- reader = ::PDF::Reader.new(io)
315
- reader.pages.each_with_index do |page, idx|
316
- text = page.text.strip
317
- next if text.empty?
318
-
319
- page_lines = ["--- Page #{idx + 1} ---", *text.split("\n")]
320
- page_lines.each do |raw|
321
- total_lines += 1
322
- next if total_lines <= start_index
323
-
324
- if collected.length >= limit
325
- has_more = true
326
- throw :done
327
- end
328
-
329
- line = raw
330
- if line.length > MAX_LINE_LENGTH
331
- line = line[0, MAX_LINE_LENGTH] + LINE_TRUNCATION_MARKER
332
- end
333
-
334
- size = line.bytesize + 1
335
- if bytes + size > MAX_BYTES
336
- byte_cap_hit = true
337
- has_more = true
338
- throw :done
339
- end
340
-
341
- collected << line
342
- bytes += size
343
- end
344
- end
345
- end
346
- end
347
-
348
- return '(PDF has no extractable text; likely scanned image content)' if total_lines.zero?
349
-
350
- if start_index >= total_lines
351
- return "Error: offset #{offset} is beyond end of file (#{total_lines} lines total)"
286
+ def self.empty_message(page)
287
+ if page.kind == :pdf
288
+ '(PDF has no extractable text; likely scanned image content)'
289
+ else
290
+ '(Empty file)'
352
291
  end
353
-
354
- last_line = offset + collected.length - 1
355
- body = collected.each_with_index.map { |line, i| format("%6d\t%s", i + offset, line) }.join("\n")
356
-
357
- trailer =
358
- if byte_cap_hit
359
- "(Output capped at #{MAX_BYTES_LABEL}. Showing PDF lines #{offset}-#{last_line}. " \
360
- "Use offset=#{last_line + 1} to continue.)"
361
- elsif has_more
362
- "(Showing PDF lines #{offset}-#{last_line}. " \
363
- "Use offset=#{last_line + 1} to continue.)"
364
- else
365
- "(End of PDF - total #{total_lines} lines)"
366
- end
367
-
368
- "#{body}\n\n#{trailer}"
369
- rescue ::PDF::Reader::MalformedPDFError,
370
- ::PDF::Reader::InvalidPageError,
371
- ::PDF::Reader::UnsupportedFeatureError => e
372
- "Error: cannot extract PDF text from #{path}: #{e.class.name.split('::').last}: #{e.message}"
373
292
  end
374
- private_class_method :format_pdf
293
+ private_class_method :empty_message
375
294
 
376
295
  # Build a multimodal observation: a short metadata note ("Read
377
296
  # image: …") plus the file itself attached as a path. The model
@@ -23,8 +23,9 @@ module Pikuri
23
23
  LOADER.eager_load
24
24
 
25
25
  # Reap +~/.cache/pikuri/workspace-*+ leftovers from sessions
26
- # killed before their +at_exit+ could fire. Best-effort; runs once
27
- # at gem load. See {Filesystem.sweep_stale_internal_temps!}.
26
+ # killed before the {Pikuri::Finalizers} removal could fire.
27
+ # Best-effort; runs once at gem load. See
28
+ # {Filesystem.sweep_stale_internal_temps!}.
28
29
  Filesystem.sweep_stale_internal_temps!
29
30
  end
30
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pikuri-workspace
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Vysny
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-29 00:00:00.000000000 Z
11
+ date: 2026-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pikuri-core
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.0.4
19
+ version: 0.0.5
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.0.4
26
+ version: 0.0.5
27
27
  description: |
28
28
  pikuri-workspace adds "operate on a directory tree" to pikuri-core
29
29
  agents: the +Pikuri::Workspace::Filesystem+ class that scopes