@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -10,6 +10,8 @@ import json
|
|
|
10
10
|
import os
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
import re
|
|
13
|
+
import secrets
|
|
14
|
+
import shlex
|
|
13
15
|
import stat
|
|
14
16
|
import sys
|
|
15
17
|
import time
|
|
@@ -30,6 +32,18 @@ MAX_COMMAND_PREVIEW_BYTES = 2_048
|
|
|
30
32
|
MAX_TOP_ERROR_RECEIPTS = 12
|
|
31
33
|
MAX_DUPLICATE_GROUPS = 12
|
|
32
34
|
MAX_SUGGESTED_QUERIES = 12
|
|
35
|
+
SEARCH_SCHEMA_VERSION = "contextguard.artifact.search.v1"
|
|
36
|
+
OUTPUT_SANDBOX_SCHEMA_VERSION = "contextguard.artifact.output-sandbox.v1"
|
|
37
|
+
DEFAULT_SEARCH_MAX_ARTIFACTS = 100
|
|
38
|
+
MAX_SEARCH_MAX_ARTIFACTS = 1_000
|
|
39
|
+
DEFAULT_SEARCH_MAX_MATCHES = 40
|
|
40
|
+
MAX_SEARCH_MAX_MATCHES = 1_000
|
|
41
|
+
DEFAULT_SEARCH_CONTEXT_LINES = 1
|
|
42
|
+
MAX_SEARCH_CONTEXT_LINES = 20
|
|
43
|
+
DEFAULT_SEARCH_SNIPPET_CHARS = 360
|
|
44
|
+
MAX_SEARCH_SNIPPET_CHARS = 2_000
|
|
45
|
+
MAX_SEARCH_PATTERN_BYTES = 512
|
|
46
|
+
SEARCH_TRUNCATED_COUNT_UNKNOWN = "lower_bound"
|
|
33
47
|
ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
|
|
34
48
|
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
35
49
|
"tmp": Path("/private/tmp"),
|
|
@@ -183,15 +197,50 @@ def sanitize_one_line(text: str, *, show_paths: bool = False) -> str:
|
|
|
183
197
|
return cap_utf8_bytes(cap_line(" ".join(sanitized.strip().split())), MAX_COMMAND_PREVIEW_BYTES)
|
|
184
198
|
|
|
185
199
|
|
|
200
|
+
NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
|
|
201
|
+
DIR_FD_OPEN_SUPPORTED = bool(os.supports_dir_fd and os.open in os.supports_dir_fd)
|
|
202
|
+
DIR_FD_MKDIR_SUPPORTED = bool(os.supports_dir_fd and os.mkdir in os.supports_dir_fd)
|
|
203
|
+
DIR_FD_STAT_SUPPORTED = bool(os.supports_dir_fd and os.stat in os.supports_dir_fd)
|
|
204
|
+
DIR_FD_UNLINK_SUPPORTED = bool(os.supports_dir_fd and os.unlink in os.supports_dir_fd)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def dir_fd_replace_supported() -> bool:
|
|
208
|
+
# Some Python builds support src_dir_fd/dst_dir_fd for os.replace without
|
|
209
|
+
# listing os.replace in os.supports_dir_fd, so use a signature/probe-light
|
|
210
|
+
# check instead of os.supports_dir_fd membership.
|
|
211
|
+
try:
|
|
212
|
+
import inspect
|
|
213
|
+
|
|
214
|
+
signature = inspect.signature(os.replace)
|
|
215
|
+
except (TypeError, ValueError):
|
|
216
|
+
return True
|
|
217
|
+
return "src_dir_fd" in signature.parameters and "dst_dir_fd" in signature.parameters
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
DIR_FD_REPLACE_SUPPORTED = dir_fd_replace_supported()
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def os_error_detail(exc: OSError) -> str:
|
|
224
|
+
detail = exc.strerror or str(exc) or exc.__class__.__name__
|
|
225
|
+
if exc.errno is not None:
|
|
226
|
+
return f"{detail} (errno {exc.errno})"
|
|
227
|
+
return detail
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def reject_parent_traversal(path: Path, *, label: str) -> None:
|
|
231
|
+
if any(part == ".." for part in path.expanduser().parts):
|
|
232
|
+
raise ValueError(f"{label} must not contain parent traversal")
|
|
233
|
+
|
|
234
|
+
|
|
186
235
|
def ensure_private_dir(path: Path) -> None:
|
|
187
|
-
|
|
188
|
-
reject_symlink_components(path)
|
|
189
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
190
|
-
reject_symlink_components(path)
|
|
236
|
+
fd = open_private_directory_no_follow(path, label="artifact directory", create=True)
|
|
191
237
|
try:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
238
|
+
try:
|
|
239
|
+
os.fchmod(fd, 0o700)
|
|
240
|
+
except OSError:
|
|
241
|
+
pass
|
|
242
|
+
finally:
|
|
243
|
+
os.close(fd)
|
|
195
244
|
|
|
196
245
|
|
|
197
246
|
def reject_symlink_components(path: Path) -> None:
|
|
@@ -213,22 +262,38 @@ def reject_symlink_components(path: Path) -> None:
|
|
|
213
262
|
|
|
214
263
|
def regular_private_file_size(path: Path) -> int:
|
|
215
264
|
path = normalize_allowed_first_absolute_symlink(path)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
265
|
+
parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
|
|
266
|
+
try:
|
|
267
|
+
leaf = path.name
|
|
268
|
+
if leaf in {"", ".", ".."}:
|
|
269
|
+
raise ValueError("artifact file must name a regular file")
|
|
270
|
+
if not DIR_FD_STAT_SUPPORTED:
|
|
271
|
+
raise RuntimeError("artifact reads require dir_fd stat support")
|
|
272
|
+
st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
|
|
273
|
+
if stat.S_ISLNK(st.st_mode):
|
|
274
|
+
raise ValueError(f"artifact file must not be a symlink: {path.name}")
|
|
275
|
+
if not stat.S_ISREG(st.st_mode):
|
|
276
|
+
raise ValueError(f"artifact file must be a regular file: {path.name}")
|
|
277
|
+
return int(st.st_size)
|
|
278
|
+
finally:
|
|
279
|
+
os.close(parent_fd)
|
|
223
280
|
|
|
224
281
|
|
|
225
282
|
def read_bounded_private_text(path: Path, max_bytes: int) -> str:
|
|
226
283
|
path = normalize_allowed_first_absolute_symlink(path)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
284
|
+
parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
|
|
285
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
286
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
287
|
+
flags |= os.O_CLOEXEC
|
|
288
|
+
leaf = path.name
|
|
289
|
+
if leaf in {"", ".", ".."}:
|
|
290
|
+
os.close(parent_fd)
|
|
291
|
+
raise ValueError("artifact file must name a regular file")
|
|
292
|
+
try:
|
|
293
|
+
fd = os.open(leaf, flags, dir_fd=parent_fd)
|
|
294
|
+
except OSError:
|
|
295
|
+
os.close(parent_fd)
|
|
296
|
+
raise
|
|
232
297
|
try:
|
|
233
298
|
st = os.fstat(fd)
|
|
234
299
|
if not stat.S_ISREG(st.st_mode):
|
|
@@ -241,35 +306,161 @@ def read_bounded_private_text(path: Path, max_bytes: int) -> str:
|
|
|
241
306
|
return data.decode("utf-8", errors="replace")
|
|
242
307
|
finally:
|
|
243
308
|
os.close(fd)
|
|
309
|
+
os.close(parent_fd)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def no_follow_dir_flags() -> int:
|
|
313
|
+
if not NO_FOLLOW_SUPPORTED:
|
|
314
|
+
raise RuntimeError("artifact writes require O_NOFOLLOW support")
|
|
315
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
316
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
317
|
+
flags |= os.O_CLOEXEC
|
|
318
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
319
|
+
flags |= os.O_DIRECTORY
|
|
320
|
+
return flags
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def temp_file_flags() -> int:
|
|
324
|
+
if not NO_FOLLOW_SUPPORTED:
|
|
325
|
+
raise RuntimeError("artifact writes require O_NOFOLLOW support")
|
|
326
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW
|
|
327
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
328
|
+
flags |= os.O_CLOEXEC
|
|
329
|
+
if hasattr(os, "O_NOCTTY"):
|
|
330
|
+
flags |= os.O_NOCTTY
|
|
331
|
+
return flags
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def open_private_directory_no_follow(path: Path, *, label: str, create: bool) -> int:
|
|
335
|
+
reject_parent_traversal(path, label=label)
|
|
336
|
+
path = normalize_allowed_first_absolute_symlink(path.expanduser())
|
|
337
|
+
if not DIR_FD_OPEN_SUPPORTED:
|
|
338
|
+
raise RuntimeError(f"{label} requires dir_fd open support")
|
|
339
|
+
if create and not DIR_FD_MKDIR_SUPPORTED:
|
|
340
|
+
raise RuntimeError(f"{label} requires dir_fd mkdir support")
|
|
341
|
+
flags = no_follow_dir_flags()
|
|
342
|
+
if path.is_absolute():
|
|
343
|
+
current_fd = os.open(path.anchor or os.sep, os.O_RDONLY | (os.O_CLOEXEC if hasattr(os, "O_CLOEXEC") else 0))
|
|
344
|
+
parts = path.parts[1:]
|
|
345
|
+
else:
|
|
346
|
+
current_fd = os.open(".", flags)
|
|
347
|
+
parts = path.parts
|
|
348
|
+
try:
|
|
349
|
+
for part in parts:
|
|
350
|
+
if part in {"", "."}:
|
|
351
|
+
continue
|
|
352
|
+
if part == "..":
|
|
353
|
+
raise RuntimeError(f"{label} must not contain parent traversal")
|
|
354
|
+
try:
|
|
355
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
356
|
+
except FileNotFoundError:
|
|
357
|
+
if not create:
|
|
358
|
+
raise
|
|
359
|
+
os.mkdir(part, 0o700, dir_fd=current_fd)
|
|
360
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
361
|
+
try:
|
|
362
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
363
|
+
raise RuntimeError(f"{label} must not traverse non-directory components")
|
|
364
|
+
except Exception:
|
|
365
|
+
os.close(next_fd)
|
|
366
|
+
raise
|
|
367
|
+
os.close(current_fd)
|
|
368
|
+
current_fd = next_fd
|
|
369
|
+
owned_fd = current_fd
|
|
370
|
+
current_fd = -1
|
|
371
|
+
return owned_fd
|
|
372
|
+
except FileNotFoundError:
|
|
373
|
+
raise
|
|
374
|
+
except OSError as exc:
|
|
375
|
+
raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
376
|
+
finally:
|
|
377
|
+
if current_fd >= 0:
|
|
378
|
+
os.close(current_fd)
|
|
244
379
|
|
|
245
380
|
|
|
246
|
-
def
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
tmp = path.with_name(path.name + f".tmp-{os.getpid()}-{time.time_ns()}")
|
|
250
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0)
|
|
251
|
-
fd = os.open(str(tmp), flags, 0o600)
|
|
381
|
+
def precheck_artifact_leaf(parent_fd: int, leaf: str, *, label: str) -> None:
|
|
382
|
+
if not DIR_FD_STAT_SUPPORTED:
|
|
383
|
+
raise RuntimeError(f"{label} requires dir_fd stat support")
|
|
252
384
|
try:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
385
|
+
st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
|
|
386
|
+
except FileNotFoundError:
|
|
387
|
+
return
|
|
388
|
+
except OSError as exc:
|
|
389
|
+
raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
390
|
+
if not stat.S_ISREG(st.st_mode):
|
|
391
|
+
raise RuntimeError(f"{label} must be missing or a regular file")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def write_all_fd(fd: int, data: bytes) -> None:
|
|
395
|
+
view = memoryview(data)
|
|
396
|
+
offset = 0
|
|
397
|
+
while offset < len(view):
|
|
398
|
+
written = os.write(fd, view[offset:])
|
|
399
|
+
if written <= 0:
|
|
400
|
+
raise OSError("short write")
|
|
401
|
+
offset += written
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def fsync_required(fd: int, *, label: str, committed: bool = False) -> None:
|
|
261
405
|
try:
|
|
262
|
-
os.
|
|
263
|
-
except
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
406
|
+
os.fsync(fd)
|
|
407
|
+
except OSError as exc:
|
|
408
|
+
if committed:
|
|
409
|
+
raise RuntimeError(f"committed_but_parent_fsync_failed: {os_error_detail(exc)}") from exc
|
|
410
|
+
raise RuntimeError(f"could not fsync {label}: {os_error_detail(exc)}") from exc
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def write_private_text(path: Path, text: str) -> None:
|
|
414
|
+
reject_parent_traversal(path, label="artifact file")
|
|
415
|
+
path = normalize_allowed_first_absolute_symlink(path.expanduser())
|
|
416
|
+
if not DIR_FD_REPLACE_SUPPORTED:
|
|
417
|
+
raise RuntimeError("artifact writes require dir_fd replace support")
|
|
418
|
+
if not DIR_FD_UNLINK_SUPPORTED:
|
|
419
|
+
raise RuntimeError("artifact writes require dir_fd unlink support")
|
|
420
|
+
parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=True)
|
|
269
421
|
try:
|
|
270
|
-
os.
|
|
422
|
+
os.fchmod(parent_fd, 0o700)
|
|
271
423
|
except OSError:
|
|
272
424
|
pass
|
|
425
|
+
fd = -1
|
|
426
|
+
temp_leaf: str | None = None
|
|
427
|
+
try:
|
|
428
|
+
leaf = path.name
|
|
429
|
+
if leaf in {"", ".", ".."}:
|
|
430
|
+
raise RuntimeError("artifact file must name a regular file")
|
|
431
|
+
precheck_artifact_leaf(parent_fd, leaf, label="artifact file")
|
|
432
|
+
for _attempt in range(20):
|
|
433
|
+
candidate = f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
|
|
434
|
+
try:
|
|
435
|
+
fd = os.open(candidate, temp_file_flags(), 0o600, dir_fd=parent_fd)
|
|
436
|
+
temp_leaf = candidate
|
|
437
|
+
break
|
|
438
|
+
except FileExistsError:
|
|
439
|
+
continue
|
|
440
|
+
if fd < 0 or temp_leaf is None:
|
|
441
|
+
raise RuntimeError("could not create temporary artifact file")
|
|
442
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
443
|
+
raise RuntimeError("temporary artifact file must be a regular file")
|
|
444
|
+
os.fchmod(fd, 0o600)
|
|
445
|
+
write_all_fd(fd, text.encode("utf-8"))
|
|
446
|
+
fsync_required(fd, label="artifact temp file")
|
|
447
|
+
os.close(fd)
|
|
448
|
+
fd = -1
|
|
449
|
+
fsync_required(parent_fd, label="artifact directory before replace")
|
|
450
|
+
os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
|
|
451
|
+
temp_leaf = None
|
|
452
|
+
fsync_required(parent_fd, label="artifact directory after replace", committed=True)
|
|
453
|
+
except OSError as exc:
|
|
454
|
+
raise RuntimeError(f"could not write artifact file: {os_error_detail(exc)}") from exc
|
|
455
|
+
finally:
|
|
456
|
+
if fd >= 0:
|
|
457
|
+
os.close(fd)
|
|
458
|
+
if temp_leaf is not None:
|
|
459
|
+
try:
|
|
460
|
+
os.unlink(temp_leaf, dir_fd=parent_fd)
|
|
461
|
+
except OSError:
|
|
462
|
+
pass
|
|
463
|
+
os.close(parent_fd)
|
|
273
464
|
|
|
274
465
|
|
|
275
466
|
def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
|
|
@@ -283,6 +474,7 @@ def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
|
|
|
283
474
|
def artifact_paths(directory: Path, artifact_id: str) -> tuple[Path, Path]:
|
|
284
475
|
if not ARTIFACT_ID_RE.fullmatch(artifact_id):
|
|
285
476
|
raise ValueError("artifact id must be 16-64 lowercase hex chars")
|
|
477
|
+
reject_parent_traversal(directory, label="artifact directory")
|
|
286
478
|
directory = normalize_allowed_first_absolute_symlink(directory)
|
|
287
479
|
return directory / f"{artifact_id}.txt", directory / f"{artifact_id}.json"
|
|
288
480
|
|
|
@@ -295,15 +487,21 @@ def artifact_read_directories(raw_dir: str) -> list[Path]:
|
|
|
295
487
|
default. Reads and listings include that legacy default so old receipts keep
|
|
296
488
|
working; stores intentionally continue to use only the new path.
|
|
297
489
|
"""
|
|
298
|
-
|
|
490
|
+
raw_path = Path(raw_dir).expanduser()
|
|
491
|
+
reject_parent_traversal(raw_path, label="artifact directory")
|
|
492
|
+
primary = normalize_allowed_first_absolute_symlink(raw_path)
|
|
299
493
|
directories = [primary]
|
|
300
|
-
if
|
|
494
|
+
if default_artifact_dir_requested(raw_dir):
|
|
301
495
|
legacy = normalize_allowed_first_absolute_symlink(Path(LEGACY_ARTIFACT_DIR).expanduser())
|
|
302
496
|
if legacy != primary:
|
|
303
497
|
directories.append(legacy)
|
|
304
498
|
return directories
|
|
305
499
|
|
|
306
500
|
|
|
501
|
+
def default_artifact_dir_requested(raw_dir: str) -> bool:
|
|
502
|
+
return Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR)
|
|
503
|
+
|
|
504
|
+
|
|
307
505
|
CONTENT_TYPE_VALUES = ("json", "diff", "log", "search", "code", "prose", "text")
|
|
308
506
|
# Recommended retrieval strategy per content type. Pattern-oriented payloads
|
|
309
507
|
# (logs, search hits, diffs) are best sliced by `--pattern`; structured or
|
|
@@ -396,6 +594,8 @@ def build_retrieval_hints(
|
|
|
396
594
|
content_type: str,
|
|
397
595
|
strategy: str,
|
|
398
596
|
total_lines: int,
|
|
597
|
+
raw_dir: str | None = None,
|
|
598
|
+
show_paths: bool = False,
|
|
399
599
|
) -> list[dict[str, object]]:
|
|
400
600
|
"""Build deterministic, machine-readable retrieval hints for bounded round-trip.
|
|
401
601
|
|
|
@@ -413,8 +613,8 @@ def build_retrieval_hints(
|
|
|
413
613
|
lines_hint: dict[str, object] = {
|
|
414
614
|
"type": "lines",
|
|
415
615
|
"selector": {"start": 1, "end": end_line},
|
|
416
|
-
"cli": line_query_cli(artifact_id, 1, end_line),
|
|
417
|
-
"exact": total_lines <= MAX_QUERY_LINES,
|
|
616
|
+
"cli": line_query_cli(artifact_id, 1, end_line, raw_dir=raw_dir, show_paths=show_paths),
|
|
617
|
+
"exact": total_lines <= MAX_QUERY_LINES and artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths),
|
|
418
618
|
}
|
|
419
619
|
if end_line > DEFAULT_MAX_LINES:
|
|
420
620
|
lines_hint["max_lines"] = end_line
|
|
@@ -436,37 +636,69 @@ def build_retrieval_hints(
|
|
|
436
636
|
{
|
|
437
637
|
"type": "pattern",
|
|
438
638
|
"selector": {"pattern": anchor},
|
|
439
|
-
"cli": f"
|
|
639
|
+
"cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern {shlex.quote(anchor)}",
|
|
440
640
|
}
|
|
441
641
|
)
|
|
442
642
|
hints.append(
|
|
443
643
|
{
|
|
444
644
|
"type": "head",
|
|
445
645
|
"selector": {"max_lines": DEFAULT_MAX_LINES},
|
|
446
|
-
"cli": f"
|
|
646
|
+
"cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --max-lines {DEFAULT_MAX_LINES}",
|
|
447
647
|
}
|
|
448
648
|
)
|
|
449
649
|
return hints
|
|
450
650
|
|
|
451
651
|
|
|
452
|
-
def
|
|
453
|
-
|
|
652
|
+
def artifact_dir_cli_prefix(raw_dir: str | None, *, show_paths: bool = False) -> str:
|
|
653
|
+
if not raw_dir or default_artifact_dir_requested(raw_dir):
|
|
654
|
+
return "context-guard-artifact"
|
|
655
|
+
if not show_paths:
|
|
656
|
+
return "context-guard-artifact --dir <artifact_dir>"
|
|
657
|
+
return f"context-guard-artifact --dir {shlex.quote(raw_dir)}"
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def artifact_dir_cli_is_exact(raw_dir: str | None, *, show_paths: bool = False) -> bool:
|
|
661
|
+
return not raw_dir or default_artifact_dir_requested(raw_dir) or show_paths
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def line_query_cli(
|
|
665
|
+
artifact_id: str,
|
|
666
|
+
start: int,
|
|
667
|
+
end: int,
|
|
668
|
+
*,
|
|
669
|
+
raw_dir: str | None = None,
|
|
670
|
+
show_paths: bool = False,
|
|
671
|
+
) -> str:
|
|
672
|
+
cli = f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --lines {start}:{end}"
|
|
454
673
|
requested_lines = end - start + 1
|
|
455
674
|
if requested_lines > DEFAULT_MAX_LINES:
|
|
456
675
|
cli += f" --max-lines {min(requested_lines, MAX_QUERY_LINES)}"
|
|
457
676
|
return cli
|
|
458
677
|
|
|
459
678
|
|
|
460
|
-
def line_receipt(
|
|
679
|
+
def line_receipt(
|
|
680
|
+
artifact_id: str,
|
|
681
|
+
line_number: int,
|
|
682
|
+
text: str,
|
|
683
|
+
*,
|
|
684
|
+
raw_dir: str | None = None,
|
|
685
|
+
show_paths: bool = False,
|
|
686
|
+
) -> dict[str, object]:
|
|
461
687
|
return {
|
|
462
688
|
"line": line_number,
|
|
463
689
|
"text": cap_digest_text(text.strip()),
|
|
464
690
|
"selector": {"type": "lines", "start": line_number, "end": line_number},
|
|
465
|
-
"cli": line_query_cli(artifact_id, line_number, line_number),
|
|
691
|
+
"cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
|
|
466
692
|
}
|
|
467
693
|
|
|
468
694
|
|
|
469
|
-
def build_top_error_receipts(
|
|
695
|
+
def build_top_error_receipts(
|
|
696
|
+
artifact_id: str,
|
|
697
|
+
lines: list[str],
|
|
698
|
+
*,
|
|
699
|
+
raw_dir: str | None = None,
|
|
700
|
+
show_paths: bool = False,
|
|
701
|
+
) -> list[dict[str, object]]:
|
|
470
702
|
receipts: list[dict[str, object]] = []
|
|
471
703
|
seen: set[str] = set()
|
|
472
704
|
for line_number, line in enumerate(lines, start=1):
|
|
@@ -475,7 +707,7 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
|
|
|
475
707
|
text = cap_digest_text(line.strip())
|
|
476
708
|
if not text or text in seen:
|
|
477
709
|
continue
|
|
478
|
-
receipt = line_receipt(artifact_id, line_number, text)
|
|
710
|
+
receipt = line_receipt(artifact_id, line_number, text, raw_dir=raw_dir, show_paths=show_paths)
|
|
479
711
|
receipts.append(receipt)
|
|
480
712
|
seen.add(text)
|
|
481
713
|
if len(receipts) >= MAX_TOP_ERROR_RECEIPTS:
|
|
@@ -483,7 +715,14 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
|
|
|
483
715
|
return receipts
|
|
484
716
|
|
|
485
717
|
|
|
486
|
-
def build_duplicate_line_groups(
|
|
718
|
+
def build_duplicate_line_groups(
|
|
719
|
+
artifact_id: str,
|
|
720
|
+
lines: list[str],
|
|
721
|
+
*,
|
|
722
|
+
limit: int = MAX_DUPLICATE_GROUPS,
|
|
723
|
+
raw_dir: str | None = None,
|
|
724
|
+
show_paths: bool = False,
|
|
725
|
+
) -> list[dict[str, object]]:
|
|
487
726
|
counts: dict[str, int] = {}
|
|
488
727
|
first_line: dict[str, int] = {}
|
|
489
728
|
for line_number, line in enumerate(lines, start=1):
|
|
@@ -506,13 +745,20 @@ def build_duplicate_line_groups(artifact_id: str, lines: list[str], *, limit: in
|
|
|
506
745
|
"first_line": line_number,
|
|
507
746
|
"text": text,
|
|
508
747
|
"selector": {"type": "lines", "start": line_number, "end": line_number},
|
|
509
|
-
"cli": line_query_cli(artifact_id, line_number, line_number),
|
|
748
|
+
"cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
|
|
510
749
|
}
|
|
511
750
|
)
|
|
512
751
|
return groups
|
|
513
752
|
|
|
514
753
|
|
|
515
|
-
def build_digest(
|
|
754
|
+
def build_digest(
|
|
755
|
+
sanitized_text: str,
|
|
756
|
+
*,
|
|
757
|
+
artifact_id: str,
|
|
758
|
+
redacted_lines: int,
|
|
759
|
+
raw_dir: str | None = None,
|
|
760
|
+
show_paths: bool = False,
|
|
761
|
+
) -> dict[str, object]:
|
|
516
762
|
lines = sanitized_text.splitlines()
|
|
517
763
|
top_errors = compact_items(
|
|
518
764
|
(line for line in lines if ERROR_RE.search(line)),
|
|
@@ -528,8 +774,8 @@ def build_digest(sanitized_text: str, *, artifact_id: str, redacted_lines: int)
|
|
|
528
774
|
"markers": sanitized_text.count("[REDACTED]"),
|
|
529
775
|
},
|
|
530
776
|
"top_error_lines": top_errors,
|
|
531
|
-
"top_error_receipts": build_top_error_receipts(artifact_id, lines),
|
|
532
|
-
"duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines),
|
|
777
|
+
"top_error_receipts": build_top_error_receipts(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
|
|
778
|
+
"duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
|
|
533
779
|
"representative_head": compact_items(
|
|
534
780
|
lines,
|
|
535
781
|
limit=8,
|
|
@@ -572,7 +818,198 @@ def suggested_queries_for(metadata: dict[str, object]) -> list[str]:
|
|
|
572
818
|
return queries[:MAX_SUGGESTED_QUERIES]
|
|
573
819
|
|
|
574
820
|
|
|
575
|
-
def
|
|
821
|
+
def artifact_handle(artifact_id: str) -> str:
|
|
822
|
+
return f"contextguard-artifact:{artifact_id}"
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def compact_stored_output(metadata: dict[str, object]) -> dict[str, object]:
|
|
826
|
+
stored = metadata.get("stored_output")
|
|
827
|
+
if not isinstance(stored, dict):
|
|
828
|
+
return {}
|
|
829
|
+
compact: dict[str, object] = {}
|
|
830
|
+
for key in ("scope", "bytes", "lines", "sha256", "content_file", "metadata_file"):
|
|
831
|
+
if key in stored:
|
|
832
|
+
compact[key] = stored[key]
|
|
833
|
+
content_type = metadata.get("content_type")
|
|
834
|
+
if isinstance(content_type, str):
|
|
835
|
+
compact["content_type"] = content_type
|
|
836
|
+
return compact
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def digest_count(digest: dict[str, object], key: str) -> int:
|
|
840
|
+
value = digest.get(key)
|
|
841
|
+
return len(value) if isinstance(value, list) else 0
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def build_output_sandbox_summary(metadata: dict[str, object]) -> dict[str, object]:
|
|
845
|
+
digest = metadata.get("digest")
|
|
846
|
+
if not isinstance(digest, dict):
|
|
847
|
+
return {"status": "stored"}
|
|
848
|
+
summary: dict[str, object] = {
|
|
849
|
+
"status": digest.get("status") or "stored",
|
|
850
|
+
"top_error_count": digest_count(digest, "top_error_lines"),
|
|
851
|
+
"top_error_receipt_count": digest_count(digest, "top_error_receipts"),
|
|
852
|
+
"duplicate_line_group_count": digest_count(digest, "duplicate_line_groups"),
|
|
853
|
+
"representative_head_count": digest_count(digest, "representative_head"),
|
|
854
|
+
"representative_tail_count": digest_count(digest, "representative_tail"),
|
|
855
|
+
}
|
|
856
|
+
redaction_counts = digest.get("redaction_counts")
|
|
857
|
+
if isinstance(redaction_counts, dict):
|
|
858
|
+
summary["redaction_counts"] = {
|
|
859
|
+
str(key): value
|
|
860
|
+
for key, value in redaction_counts.items()
|
|
861
|
+
if isinstance(value, (int, float, str, bool)) or value is None
|
|
862
|
+
}
|
|
863
|
+
elif "redacted_lines" in digest:
|
|
864
|
+
summary["redacted_lines"] = digest.get("redacted_lines")
|
|
865
|
+
capped = digest.get("capped_for_metadata")
|
|
866
|
+
if isinstance(capped, bool):
|
|
867
|
+
summary["capped_for_metadata"] = capped
|
|
868
|
+
return summary
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def rehydration_command_record(
|
|
872
|
+
*,
|
|
873
|
+
kind: str,
|
|
874
|
+
cli: str,
|
|
875
|
+
selector: dict[str, object],
|
|
876
|
+
exact: bool,
|
|
877
|
+
note: str | None = None,
|
|
878
|
+
) -> dict[str, object]:
|
|
879
|
+
record: dict[str, object] = {
|
|
880
|
+
"type": kind,
|
|
881
|
+
"selector": selector,
|
|
882
|
+
"cli": cli,
|
|
883
|
+
"exact": exact,
|
|
884
|
+
}
|
|
885
|
+
if note:
|
|
886
|
+
record["note"] = note
|
|
887
|
+
return record
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
def build_output_sandbox_rehydration(
|
|
891
|
+
metadata: dict[str, object],
|
|
892
|
+
*,
|
|
893
|
+
raw_dir: str | None = None,
|
|
894
|
+
show_paths: bool = False,
|
|
895
|
+
) -> dict[str, object]:
|
|
896
|
+
artifact_id = str(metadata["artifact_id"])
|
|
897
|
+
cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
|
|
898
|
+
prefix = artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)
|
|
899
|
+
note = (
|
|
900
|
+
None
|
|
901
|
+
if cli_exact
|
|
902
|
+
else "custom artifact directory is redacted; rerun with the same --dir value or pass --show-paths for a directly executable local command"
|
|
903
|
+
)
|
|
904
|
+
commands: list[dict[str, object]] = [
|
|
905
|
+
rehydration_command_record(
|
|
906
|
+
kind="metadata",
|
|
907
|
+
selector={"type": "receipt"},
|
|
908
|
+
cli=f"{prefix} receipt {artifact_id} --json",
|
|
909
|
+
exact=cli_exact,
|
|
910
|
+
note=note,
|
|
911
|
+
)
|
|
912
|
+
]
|
|
913
|
+
|
|
914
|
+
retrieval = metadata.get("retrieval")
|
|
915
|
+
hints = retrieval.get("hints") if isinstance(retrieval, dict) else None
|
|
916
|
+
if isinstance(hints, list):
|
|
917
|
+
for hint in hints:
|
|
918
|
+
if not isinstance(hint, dict):
|
|
919
|
+
continue
|
|
920
|
+
hint_type = hint.get("type")
|
|
921
|
+
selector = hint.get("selector")
|
|
922
|
+
if not isinstance(selector, dict):
|
|
923
|
+
selector = {}
|
|
924
|
+
cli: str | None = None
|
|
925
|
+
exact = bool(hint.get("exact", True)) and cli_exact
|
|
926
|
+
if hint_type == "lines":
|
|
927
|
+
start = selector.get("start")
|
|
928
|
+
end = selector.get("end")
|
|
929
|
+
if isinstance(start, int) and isinstance(end, int):
|
|
930
|
+
cli = line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths)
|
|
931
|
+
elif hint_type == "pattern":
|
|
932
|
+
pattern = selector.get("pattern")
|
|
933
|
+
if isinstance(pattern, str) and pattern:
|
|
934
|
+
cli = f"{prefix} get {artifact_id} --pattern {shlex.quote(pattern)}"
|
|
935
|
+
elif hint_type == "head":
|
|
936
|
+
max_lines = selector.get("max_lines")
|
|
937
|
+
if isinstance(max_lines, int) and max_lines > 0:
|
|
938
|
+
cli = f"{prefix} get {artifact_id} --max-lines {max_lines}"
|
|
939
|
+
if cli is None:
|
|
940
|
+
raw_cli = hint.get("cli")
|
|
941
|
+
cli = raw_cli if isinstance(raw_cli, str) and raw_cli else None
|
|
942
|
+
if cli:
|
|
943
|
+
commands.append(
|
|
944
|
+
rehydration_command_record(
|
|
945
|
+
kind=str(hint_type or "query"),
|
|
946
|
+
selector=selector,
|
|
947
|
+
cli=cli,
|
|
948
|
+
exact=exact,
|
|
949
|
+
note=note if not cli_exact else str(hint.get("note") or "") or None,
|
|
950
|
+
)
|
|
951
|
+
)
|
|
952
|
+
if len(commands) >= 5:
|
|
953
|
+
break
|
|
954
|
+
|
|
955
|
+
digest = metadata.get("digest")
|
|
956
|
+
top_error_lines = digest.get("top_error_lines") if isinstance(digest, dict) else None
|
|
957
|
+
if isinstance(top_error_lines, list):
|
|
958
|
+
anchor = first_error_anchor("\n".join(str(line) for line in top_error_lines))
|
|
959
|
+
if anchor and len(commands) < 5:
|
|
960
|
+
commands.append(
|
|
961
|
+
rehydration_command_record(
|
|
962
|
+
kind="search",
|
|
963
|
+
selector={"type": "literal", "pattern": anchor},
|
|
964
|
+
cli=f"{prefix} search {shlex.quote(anchor)} --json",
|
|
965
|
+
exact=cli_exact,
|
|
966
|
+
note=note,
|
|
967
|
+
)
|
|
968
|
+
)
|
|
969
|
+
|
|
970
|
+
return {
|
|
971
|
+
"commands": commands,
|
|
972
|
+
"dir_argument": "default" if default_artifact_dir_requested(raw_dir or DEFAULT_ARTIFACT_DIR) else ("included" if show_paths else "redacted"),
|
|
973
|
+
"exact_commands": cli_exact,
|
|
974
|
+
"note": note,
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def build_output_sandbox_envelope(
|
|
979
|
+
metadata: dict[str, object],
|
|
980
|
+
*,
|
|
981
|
+
raw_dir: str | None = None,
|
|
982
|
+
show_paths: bool = False,
|
|
983
|
+
) -> dict[str, object]:
|
|
984
|
+
artifact_id = str(metadata["artifact_id"])
|
|
985
|
+
return {
|
|
986
|
+
"schema_version": OUTPUT_SANDBOX_SCHEMA_VERSION,
|
|
987
|
+
"mode": "local_artifact_receipt",
|
|
988
|
+
"handle": artifact_handle(artifact_id),
|
|
989
|
+
"artifact_id": artifact_id,
|
|
990
|
+
"stored_output": compact_stored_output(metadata),
|
|
991
|
+
"summary": build_output_sandbox_summary(metadata),
|
|
992
|
+
"rehydration": build_output_sandbox_rehydration(metadata, raw_dir=raw_dir, show_paths=show_paths),
|
|
993
|
+
"agent_guidance": [
|
|
994
|
+
"Keep this compact receipt in agent context instead of pasting the full output.",
|
|
995
|
+
"Before relying on omitted details, rehydrate the exact sanitized slice with one of rehydration.commands[].cli.",
|
|
996
|
+
"For repeated diagnostics, query narrower lines or literal matches instead of rerunning broad commands unchanged.",
|
|
997
|
+
],
|
|
998
|
+
"claim_boundary": {
|
|
999
|
+
"local_only": True,
|
|
1000
|
+
"stored_content_is_sanitized_copy": True,
|
|
1001
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
1002
|
+
"exact_rehydration_required_before_relying_on_omitted_detail": True,
|
|
1003
|
+
},
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def receipt_for(
|
|
1008
|
+
metadata: dict[str, object],
|
|
1009
|
+
*,
|
|
1010
|
+
raw_dir: str | None = None,
|
|
1011
|
+
show_paths: bool = False,
|
|
1012
|
+
) -> dict[str, object]:
|
|
576
1013
|
artifact_id = str(metadata["artifact_id"])
|
|
577
1014
|
return {
|
|
578
1015
|
"artifact_id": artifact_id,
|
|
@@ -585,11 +1022,12 @@ def receipt_for(metadata: dict[str, object]) -> dict[str, object]:
|
|
|
585
1022
|
"digest": metadata.get("digest"),
|
|
586
1023
|
"retrieval": metadata.get("retrieval"),
|
|
587
1024
|
"available_queries": [
|
|
588
|
-
|
|
589
|
-
f"
|
|
590
|
-
f"
|
|
1025
|
+
line_query_cli(artifact_id, 1, 80, raw_dir=raw_dir, show_paths=show_paths),
|
|
1026
|
+
f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern ERROR --max-lines 40",
|
|
1027
|
+
f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --json --lines 1:20",
|
|
591
1028
|
],
|
|
592
1029
|
"suggested_queries": suggested_queries_for(metadata),
|
|
1030
|
+
"output_sandbox": build_output_sandbox_envelope(metadata, raw_dir=raw_dir, show_paths=show_paths),
|
|
593
1031
|
}
|
|
594
1032
|
|
|
595
1033
|
|
|
@@ -699,7 +1137,13 @@ def store_command(args: argparse.Namespace) -> int:
|
|
|
699
1137
|
"content_file": content_path.name,
|
|
700
1138
|
"metadata_file": meta_path.name,
|
|
701
1139
|
},
|
|
702
|
-
"digest": build_digest(
|
|
1140
|
+
"digest": build_digest(
|
|
1141
|
+
sanitized_text,
|
|
1142
|
+
artifact_id=artifact_id,
|
|
1143
|
+
redacted_lines=redacted_lines,
|
|
1144
|
+
raw_dir=args.dir,
|
|
1145
|
+
show_paths=args.show_paths,
|
|
1146
|
+
),
|
|
703
1147
|
"retrieval": {
|
|
704
1148
|
"strategy": strategy,
|
|
705
1149
|
"deterministic": True,
|
|
@@ -709,17 +1153,22 @@ def store_command(args: argparse.Namespace) -> int:
|
|
|
709
1153
|
content_type=content_type,
|
|
710
1154
|
strategy=strategy,
|
|
711
1155
|
total_lines=total_lines,
|
|
1156
|
+
raw_dir=args.dir,
|
|
1157
|
+
show_paths=args.show_paths,
|
|
712
1158
|
),
|
|
713
1159
|
},
|
|
714
1160
|
}
|
|
715
1161
|
shrink_digest_for_metadata_cap(metadata)
|
|
716
1162
|
write_private_text(content_path, sanitized_text)
|
|
717
1163
|
write_private_text(meta_path, metadata_json_text(metadata))
|
|
718
|
-
receipt = receipt_for(metadata)
|
|
1164
|
+
receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=args.show_paths)
|
|
719
1165
|
if args.json:
|
|
720
1166
|
print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
|
|
721
1167
|
else:
|
|
722
1168
|
print(f"artifact_id={artifact_id}")
|
|
1169
|
+
sandbox = receipt.get("output_sandbox")
|
|
1170
|
+
handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
|
|
1171
|
+
print(f"handle={handle}")
|
|
723
1172
|
stored = receipt["stored_output"]
|
|
724
1173
|
if isinstance(stored, dict):
|
|
725
1174
|
print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
|
|
@@ -728,7 +1177,16 @@ def store_command(args: argparse.Namespace) -> int:
|
|
|
728
1177
|
print("top_error_lines:")
|
|
729
1178
|
for line in digest["top_error_lines"]: # type: ignore[index]
|
|
730
1179
|
print(f"- {line}")
|
|
731
|
-
|
|
1180
|
+
available_queries = receipt.get("available_queries")
|
|
1181
|
+
if isinstance(available_queries, list) and available_queries:
|
|
1182
|
+
print(f"query={available_queries[0]}")
|
|
1183
|
+
rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
|
|
1184
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
1185
|
+
if isinstance(commands, list):
|
|
1186
|
+
for command in commands:
|
|
1187
|
+
if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
|
|
1188
|
+
print(f"rehydrate={command['cli']}")
|
|
1189
|
+
break
|
|
732
1190
|
return 0
|
|
733
1191
|
|
|
734
1192
|
|
|
@@ -745,6 +1203,26 @@ def load_metadata(directory: Path, artifact_id: str) -> dict[str, object]:
|
|
|
745
1203
|
return data
|
|
746
1204
|
|
|
747
1205
|
|
|
1206
|
+
def load_verified_artifact(directory: Path, artifact_id: str) -> tuple[dict[str, object], Path, str]:
|
|
1207
|
+
metadata = load_metadata(directory, artifact_id)
|
|
1208
|
+
content_path, _meta_path = artifact_paths(directory, artifact_id)
|
|
1209
|
+
stored_output = metadata.get("stored_output")
|
|
1210
|
+
expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
|
|
1211
|
+
if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
|
|
1212
|
+
raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
|
|
1213
|
+
expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
|
|
1214
|
+
if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
|
|
1215
|
+
raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
|
|
1216
|
+
actual_size = regular_private_file_size(content_path)
|
|
1217
|
+
if actual_size != expected_bytes:
|
|
1218
|
+
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
1219
|
+
content = read_bounded_private_text(content_path, expected_bytes)
|
|
1220
|
+
actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
|
|
1221
|
+
if actual_sha != expected_sha:
|
|
1222
|
+
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
1223
|
+
return metadata, content_path, content
|
|
1224
|
+
|
|
1225
|
+
|
|
748
1226
|
def parse_line_range(value: str | None) -> tuple[int, int] | None:
|
|
749
1227
|
if not value:
|
|
750
1228
|
return None
|
|
@@ -766,6 +1244,149 @@ def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
|
|
|
766
1244
|
return text[:keep].rstrip() + marker, True
|
|
767
1245
|
|
|
768
1246
|
|
|
1247
|
+
def search_literal(value: str) -> str:
|
|
1248
|
+
if not value:
|
|
1249
|
+
raise ValueError("search pattern must not be empty")
|
|
1250
|
+
if "\x00" in value:
|
|
1251
|
+
raise ValueError("search pattern must not contain NUL bytes")
|
|
1252
|
+
size = len(value.encode("utf-8", errors="replace"))
|
|
1253
|
+
if size > MAX_SEARCH_PATTERN_BYTES:
|
|
1254
|
+
raise ValueError(f"search pattern exceeds {MAX_SEARCH_PATTERN_BYTES} bytes")
|
|
1255
|
+
return value
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
def safe_query_label(value: str) -> str:
|
|
1259
|
+
return sanitize_one_line(value, show_paths=False)
|
|
1260
|
+
|
|
1261
|
+
|
|
1262
|
+
def artifact_dir_label(raw_dir: str) -> str:
|
|
1263
|
+
if default_artifact_dir_requested(raw_dir):
|
|
1264
|
+
return "default"
|
|
1265
|
+
return sanitize_one_line(raw_dir, show_paths=False)
|
|
1266
|
+
|
|
1267
|
+
|
|
1268
|
+
def metadata_text_field(metadata: dict[str, object], key: str) -> str | None:
|
|
1269
|
+
value = metadata.get(key)
|
|
1270
|
+
if not isinstance(value, str):
|
|
1271
|
+
return None
|
|
1272
|
+
return sanitize_one_line(value, show_paths=False)
|
|
1273
|
+
|
|
1274
|
+
|
|
1275
|
+
def metadata_content_type(metadata: dict[str, object]) -> str:
|
|
1276
|
+
value = metadata.get("content_type")
|
|
1277
|
+
return value if isinstance(value, str) and value in CONTENT_TYPE_VALUES else "text"
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
def metadata_candidate_paths(directory: Path, limit: int) -> tuple[list[Path], int, int]:
|
|
1281
|
+
candidates: list[Path] = []
|
|
1282
|
+
skipped = 0
|
|
1283
|
+
truncated_lower_bound = 0
|
|
1284
|
+
if limit <= 0:
|
|
1285
|
+
return candidates, skipped, 0
|
|
1286
|
+
try:
|
|
1287
|
+
with os.scandir(directory) as entries:
|
|
1288
|
+
for entry in entries:
|
|
1289
|
+
name = entry.name
|
|
1290
|
+
if not name.endswith(".json"):
|
|
1291
|
+
continue
|
|
1292
|
+
if not ARTIFACT_ID_RE.fullmatch(name[:-5]):
|
|
1293
|
+
skipped += 1
|
|
1294
|
+
continue
|
|
1295
|
+
try:
|
|
1296
|
+
if not entry.is_file(follow_symlinks=False):
|
|
1297
|
+
skipped += 1
|
|
1298
|
+
continue
|
|
1299
|
+
except OSError:
|
|
1300
|
+
skipped += 1
|
|
1301
|
+
continue
|
|
1302
|
+
if len(candidates) >= limit:
|
|
1303
|
+
truncated_lower_bound += 1
|
|
1304
|
+
break
|
|
1305
|
+
candidates.append(directory / name)
|
|
1306
|
+
except OSError:
|
|
1307
|
+
return candidates, skipped + 1, truncated_lower_bound
|
|
1308
|
+
return sorted(candidates), skipped, truncated_lower_bound
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
def search_match_record(
|
|
1312
|
+
*,
|
|
1313
|
+
artifact_id: str,
|
|
1314
|
+
line_number: int,
|
|
1315
|
+
lines: list[str],
|
|
1316
|
+
context_lines: int,
|
|
1317
|
+
snippet_chars: int,
|
|
1318
|
+
metadata: dict[str, object],
|
|
1319
|
+
raw_dir: str,
|
|
1320
|
+
show_paths: bool,
|
|
1321
|
+
) -> dict[str, object]:
|
|
1322
|
+
start = max(1, line_number - context_lines)
|
|
1323
|
+
end = min(len(lines), line_number + context_lines)
|
|
1324
|
+
cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
|
|
1325
|
+
|
|
1326
|
+
def line_item(number: int) -> dict[str, object]:
|
|
1327
|
+
return {"line": number, "text": cap_line(lines[number - 1].rstrip("\n"), limit=snippet_chars)}
|
|
1328
|
+
|
|
1329
|
+
return {
|
|
1330
|
+
"artifact_id": artifact_id,
|
|
1331
|
+
"line": line_number,
|
|
1332
|
+
"text": cap_line(lines[line_number - 1].rstrip("\n"), limit=snippet_chars),
|
|
1333
|
+
"context_before": [line_item(number) for number in range(start, line_number)],
|
|
1334
|
+
"context_after": [line_item(number) for number in range(line_number + 1, end + 1)],
|
|
1335
|
+
"content_type": metadata_content_type(metadata),
|
|
1336
|
+
"command_preview": metadata_text_field(metadata, "command_preview"),
|
|
1337
|
+
"retrieval": {
|
|
1338
|
+
"selector": {"type": "lines", "start": start, "end": end},
|
|
1339
|
+
"cli": line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths),
|
|
1340
|
+
"exact": cli_exact,
|
|
1341
|
+
"dir_argument": "default" if default_artifact_dir_requested(raw_dir) else ("included" if show_paths else "redacted"),
|
|
1342
|
+
"note": (
|
|
1343
|
+
None
|
|
1344
|
+
if cli_exact
|
|
1345
|
+
else "custom artifact directory is redacted; rerun with the same --dir used for search, or pass search --show-paths to emit a directly executable local CLI"
|
|
1346
|
+
),
|
|
1347
|
+
},
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
def search_artifact_content(
|
|
1352
|
+
*,
|
|
1353
|
+
artifact_id: str,
|
|
1354
|
+
metadata: dict[str, object],
|
|
1355
|
+
content: str,
|
|
1356
|
+
literal: str,
|
|
1357
|
+
ignore_case: bool,
|
|
1358
|
+
context_lines: int,
|
|
1359
|
+
snippet_chars: int,
|
|
1360
|
+
remaining_matches: int,
|
|
1361
|
+
raw_dir: str,
|
|
1362
|
+
show_paths: bool,
|
|
1363
|
+
) -> tuple[list[dict[str, object]], int]:
|
|
1364
|
+
lines = content.splitlines()
|
|
1365
|
+
needle = literal.casefold() if ignore_case else literal
|
|
1366
|
+
matches: list[dict[str, object]] = []
|
|
1367
|
+
matched_lines = 0
|
|
1368
|
+
for line_number, line in enumerate(lines, start=1):
|
|
1369
|
+
haystack = line.casefold() if ignore_case else line
|
|
1370
|
+
if needle not in haystack:
|
|
1371
|
+
continue
|
|
1372
|
+
matched_lines += 1
|
|
1373
|
+
if len(matches) >= remaining_matches:
|
|
1374
|
+
continue
|
|
1375
|
+
matches.append(
|
|
1376
|
+
search_match_record(
|
|
1377
|
+
artifact_id=artifact_id,
|
|
1378
|
+
line_number=line_number,
|
|
1379
|
+
lines=lines,
|
|
1380
|
+
context_lines=context_lines,
|
|
1381
|
+
snippet_chars=snippet_chars,
|
|
1382
|
+
metadata=metadata,
|
|
1383
|
+
raw_dir=raw_dir,
|
|
1384
|
+
show_paths=show_paths,
|
|
1385
|
+
)
|
|
1386
|
+
)
|
|
1387
|
+
return matches, matched_lines
|
|
1388
|
+
|
|
1389
|
+
|
|
769
1390
|
def query_content(
|
|
770
1391
|
content: str,
|
|
771
1392
|
*,
|
|
@@ -805,8 +1426,7 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
805
1426
|
last_missing: FileNotFoundError | None = None
|
|
806
1427
|
for directory in artifact_read_directories(args.dir):
|
|
807
1428
|
try:
|
|
808
|
-
metadata =
|
|
809
|
-
content_path, _meta_path = artifact_paths(directory, artifact_id)
|
|
1429
|
+
metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
|
|
810
1430
|
break
|
|
811
1431
|
except FileNotFoundError as exc:
|
|
812
1432
|
last_missing = exc
|
|
@@ -815,19 +1435,9 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
815
1435
|
raise last_missing
|
|
816
1436
|
raise FileNotFoundError(f"artifact not found: {artifact_id}")
|
|
817
1437
|
stored_output = metadata.get("stored_output")
|
|
818
|
-
expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
|
|
819
|
-
if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
|
|
820
|
-
raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
|
|
821
1438
|
expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
|
|
822
|
-
if not isinstance(expected_bytes, int)
|
|
1439
|
+
if not isinstance(expected_bytes, int):
|
|
823
1440
|
raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
|
|
824
|
-
actual_size = regular_private_file_size(content_path)
|
|
825
|
-
if actual_size != expected_bytes:
|
|
826
|
-
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
827
|
-
content = read_bounded_private_text(content_path, expected_bytes)
|
|
828
|
-
actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
|
|
829
|
-
if actual_sha != expected_sha:
|
|
830
|
-
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
831
1441
|
default_max_chars = max(DEFAULT_MAX_CHARS, expected_bytes) if full else DEFAULT_MAX_CHARS
|
|
832
1442
|
max_chars = bounded_int(args.max_chars, default_max_chars, 1, MAX_MAX_BYTES)
|
|
833
1443
|
line_range = parse_line_range(args.lines)
|
|
@@ -856,6 +1466,176 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
856
1466
|
return 0
|
|
857
1467
|
|
|
858
1468
|
|
|
1469
|
+
def receipt_command(args: argparse.Namespace) -> int:
|
|
1470
|
+
artifact_id = args.artifact_id
|
|
1471
|
+
try:
|
|
1472
|
+
last_missing: FileNotFoundError | None = None
|
|
1473
|
+
for directory in artifact_read_directories(args.dir):
|
|
1474
|
+
try:
|
|
1475
|
+
metadata, _content_path, _content = load_verified_artifact(directory, artifact_id)
|
|
1476
|
+
break
|
|
1477
|
+
except FileNotFoundError as exc:
|
|
1478
|
+
last_missing = exc
|
|
1479
|
+
else:
|
|
1480
|
+
if last_missing is not None:
|
|
1481
|
+
raise last_missing
|
|
1482
|
+
raise FileNotFoundError(f"artifact not found: {artifact_id}")
|
|
1483
|
+
receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=bool(getattr(args, "show_paths", False)))
|
|
1484
|
+
except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
|
|
1485
|
+
print(f"context-guard-artifact: {exc}", file=sys.stderr)
|
|
1486
|
+
return 1
|
|
1487
|
+
if args.json:
|
|
1488
|
+
print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
|
|
1489
|
+
else:
|
|
1490
|
+
sandbox = receipt.get("output_sandbox")
|
|
1491
|
+
handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
|
|
1492
|
+
print(f"artifact_id={artifact_id}")
|
|
1493
|
+
print(f"handle={handle}")
|
|
1494
|
+
stored = receipt.get("stored_output")
|
|
1495
|
+
if isinstance(stored, dict):
|
|
1496
|
+
print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
|
|
1497
|
+
rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
|
|
1498
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
1499
|
+
if isinstance(commands, list):
|
|
1500
|
+
for command in commands[:4]:
|
|
1501
|
+
if isinstance(command, dict) and command.get("cli"):
|
|
1502
|
+
print(f"rehydrate={command.get('cli')}")
|
|
1503
|
+
print("claim_boundary=local sanitized artifact; no hosted token/cost savings claim")
|
|
1504
|
+
return 0
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
def search_command(args: argparse.Namespace) -> int:
|
|
1508
|
+
try:
|
|
1509
|
+
literal = search_literal(args.pattern)
|
|
1510
|
+
max_artifacts = bounded_int(args.max_artifacts, DEFAULT_SEARCH_MAX_ARTIFACTS, 1, MAX_SEARCH_MAX_ARTIFACTS)
|
|
1511
|
+
max_matches = bounded_int(args.max_matches, DEFAULT_SEARCH_MAX_MATCHES, 1, MAX_SEARCH_MAX_MATCHES)
|
|
1512
|
+
context_lines = bounded_int(args.context_lines, DEFAULT_SEARCH_CONTEXT_LINES, 0, MAX_SEARCH_CONTEXT_LINES)
|
|
1513
|
+
snippet_chars = bounded_int(args.max_snippet_chars, DEFAULT_SEARCH_SNIPPET_CHARS, 1, MAX_SEARCH_SNIPPET_CHARS)
|
|
1514
|
+
ignore_case = bool(args.ignore_case)
|
|
1515
|
+
matches: list[dict[str, object]] = []
|
|
1516
|
+
seen: set[str] = set()
|
|
1517
|
+
scanned_artifacts = 0
|
|
1518
|
+
skipped_artifacts = 0
|
|
1519
|
+
total_matched_lines = 0
|
|
1520
|
+
meta_candidates_seen = 0
|
|
1521
|
+
scan_truncated = False
|
|
1522
|
+
scan_truncated_count = 0
|
|
1523
|
+
matched_artifact_ids: set[str] = set()
|
|
1524
|
+
|
|
1525
|
+
for directory in artifact_read_directories(args.dir):
|
|
1526
|
+
remaining_candidates = max_artifacts - meta_candidates_seen
|
|
1527
|
+
if remaining_candidates <= 0:
|
|
1528
|
+
scan_truncated = True
|
|
1529
|
+
break
|
|
1530
|
+
try:
|
|
1531
|
+
reject_symlink_components(directory)
|
|
1532
|
+
directory_is_safe = directory.is_dir() and not directory.is_symlink()
|
|
1533
|
+
except RuntimeError:
|
|
1534
|
+
directory_is_safe = False
|
|
1535
|
+
if not directory_is_safe:
|
|
1536
|
+
continue
|
|
1537
|
+
meta_paths, skipped_candidates, truncated_candidates = metadata_candidate_paths(directory, remaining_candidates)
|
|
1538
|
+
skipped_artifacts += skipped_candidates
|
|
1539
|
+
if truncated_candidates:
|
|
1540
|
+
scan_truncated = True
|
|
1541
|
+
scan_truncated_count += truncated_candidates
|
|
1542
|
+
for meta_path in meta_paths:
|
|
1543
|
+
meta_candidates_seen += 1
|
|
1544
|
+
try:
|
|
1545
|
+
data = json.loads(read_bounded_private_text(meta_path, MAX_METADATA_BYTES))
|
|
1546
|
+
except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
|
|
1547
|
+
skipped_artifacts += 1
|
|
1548
|
+
continue
|
|
1549
|
+
artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
|
|
1550
|
+
if not (isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id)) or artifact_id in seen:
|
|
1551
|
+
skipped_artifacts += 1
|
|
1552
|
+
continue
|
|
1553
|
+
seen.add(artifact_id)
|
|
1554
|
+
if scanned_artifacts >= max_artifacts:
|
|
1555
|
+
scan_truncated = True
|
|
1556
|
+
scan_truncated_count += 1
|
|
1557
|
+
continue
|
|
1558
|
+
try:
|
|
1559
|
+
metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
|
|
1560
|
+
except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
|
|
1561
|
+
skipped_artifacts += 1
|
|
1562
|
+
continue
|
|
1563
|
+
scanned_artifacts += 1
|
|
1564
|
+
remaining = max(0, max_matches - len(matches))
|
|
1565
|
+
artifact_matches, artifact_match_count = search_artifact_content(
|
|
1566
|
+
artifact_id=artifact_id,
|
|
1567
|
+
metadata=metadata,
|
|
1568
|
+
content=content,
|
|
1569
|
+
literal=literal,
|
|
1570
|
+
ignore_case=ignore_case,
|
|
1571
|
+
context_lines=context_lines,
|
|
1572
|
+
snippet_chars=snippet_chars,
|
|
1573
|
+
remaining_matches=remaining,
|
|
1574
|
+
raw_dir=args.dir,
|
|
1575
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
1576
|
+
)
|
|
1577
|
+
if artifact_match_count:
|
|
1578
|
+
matched_artifact_ids.add(artifact_id)
|
|
1579
|
+
total_matched_lines += artifact_match_count
|
|
1580
|
+
matches.extend(artifact_matches)
|
|
1581
|
+
payload = {
|
|
1582
|
+
"tool": "context-guard-artifact",
|
|
1583
|
+
"schema_version": SEARCH_SCHEMA_VERSION,
|
|
1584
|
+
"mode": "search",
|
|
1585
|
+
"query": {
|
|
1586
|
+
"label": safe_query_label(literal),
|
|
1587
|
+
"raw_pattern_stored": False,
|
|
1588
|
+
"literal": True,
|
|
1589
|
+
"ignore_case": ignore_case,
|
|
1590
|
+
},
|
|
1591
|
+
"artifact_dir": artifact_dir_label(args.dir),
|
|
1592
|
+
"scanned_artifacts": scanned_artifacts,
|
|
1593
|
+
"skipped_artifacts": skipped_artifacts,
|
|
1594
|
+
"matched_artifacts": len(matched_artifact_ids),
|
|
1595
|
+
"matched_lines": total_matched_lines,
|
|
1596
|
+
"metadata_candidates_scanned": meta_candidates_seen,
|
|
1597
|
+
"matches": matches,
|
|
1598
|
+
"matches_truncated_count": max(0, total_matched_lines - max_matches),
|
|
1599
|
+
"artifact_scan_truncated": scan_truncated,
|
|
1600
|
+
"artifact_scan_truncated_count": scan_truncated_count,
|
|
1601
|
+
"artifact_scan_truncated_count_mode": SEARCH_TRUNCATED_COUNT_UNKNOWN if scan_truncated else "exact",
|
|
1602
|
+
"limits": {
|
|
1603
|
+
"max_artifacts": max_artifacts,
|
|
1604
|
+
"max_matches": max_matches,
|
|
1605
|
+
"context_lines": context_lines,
|
|
1606
|
+
"max_snippet_chars": snippet_chars,
|
|
1607
|
+
},
|
|
1608
|
+
"sandbox": {
|
|
1609
|
+
"local_only": True,
|
|
1610
|
+
"workflow": ["store", "search", "get"],
|
|
1611
|
+
"exact_rehydration": "use matches[].retrieval.cli when exact=true; for redacted custom dirs, reuse the same --dir or opt into --show-paths",
|
|
1612
|
+
},
|
|
1613
|
+
"claim_boundary": {
|
|
1614
|
+
"local_only": True,
|
|
1615
|
+
"stored_content_is_sanitized_copy": True,
|
|
1616
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
1617
|
+
"exact_rehydration_required_before_relying_on_omitted_detail": True,
|
|
1618
|
+
},
|
|
1619
|
+
}
|
|
1620
|
+
except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
|
|
1621
|
+
print(f"context-guard-artifact: {exc}", file=sys.stderr)
|
|
1622
|
+
return 1
|
|
1623
|
+
if args.json:
|
|
1624
|
+
print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))
|
|
1625
|
+
else:
|
|
1626
|
+
for item in payload["matches"]:
|
|
1627
|
+
if isinstance(item, dict):
|
|
1628
|
+
print(f"{item.get('artifact_id')}:{item.get('line')}: {item.get('text')}")
|
|
1629
|
+
retrieval = item.get("retrieval")
|
|
1630
|
+
if isinstance(retrieval, dict):
|
|
1631
|
+
print(f" rehydrate={retrieval.get('cli')}")
|
|
1632
|
+
if not payload["matches"]:
|
|
1633
|
+
print("no matches")
|
|
1634
|
+
elif payload["matches_truncated_count"]:
|
|
1635
|
+
print(f"matches_truncated_count={payload['matches_truncated_count']}")
|
|
1636
|
+
return 0
|
|
1637
|
+
|
|
1638
|
+
|
|
859
1639
|
def list_command(args: argparse.Namespace) -> int:
|
|
860
1640
|
items: list[dict[str, object]] = []
|
|
861
1641
|
seen: set[str] = set()
|
|
@@ -874,7 +1654,7 @@ def list_command(args: argparse.Namespace) -> int:
|
|
|
874
1654
|
continue
|
|
875
1655
|
artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
|
|
876
1656
|
if isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id) and artifact_id not in seen:
|
|
877
|
-
items.append(receipt_for(data))
|
|
1657
|
+
items.append(receipt_for(data, raw_dir=args.dir, show_paths=False))
|
|
878
1658
|
seen.add(artifact_id)
|
|
879
1659
|
items.sort(key=lambda item: str(item.get("artifact_id", "")))
|
|
880
1660
|
if args.json:
|
|
@@ -915,9 +1695,34 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
915
1695
|
get.add_argument("--json", action="store_true", help="emit query JSON with content")
|
|
916
1696
|
get.set_defaults(func=get_command)
|
|
917
1697
|
|
|
1698
|
+
receipt = subparsers.add_parser("receipt", help="print metadata-only receipt and rehydration handle for a stored artifact")
|
|
1699
|
+
receipt.add_argument("artifact_id")
|
|
1700
|
+
receipt.add_argument(
|
|
1701
|
+
"--show-paths",
|
|
1702
|
+
action="store_true",
|
|
1703
|
+
help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
|
|
1704
|
+
)
|
|
1705
|
+
receipt.add_argument("--json", action="store_true", help="emit receipt JSON without artifact content")
|
|
1706
|
+
receipt.set_defaults(func=receipt_command)
|
|
1707
|
+
|
|
918
1708
|
list_parser = subparsers.add_parser("list", help="list stored artifacts")
|
|
919
1709
|
list_parser.add_argument("--json", action="store_true", help="emit list JSON")
|
|
920
1710
|
list_parser.set_defaults(func=list_command)
|
|
1711
|
+
|
|
1712
|
+
search = subparsers.add_parser("search", help="search stored sanitized artifacts by literal text")
|
|
1713
|
+
search.add_argument("pattern", help=f"literal substring to search for (max {MAX_SEARCH_PATTERN_BYTES} UTF-8 bytes)")
|
|
1714
|
+
search.add_argument("--ignore-case", action="store_true", help="case-insensitive literal search")
|
|
1715
|
+
search.add_argument("--context-lines", type=int, default=DEFAULT_SEARCH_CONTEXT_LINES, help=f"context lines around each match (default: {DEFAULT_SEARCH_CONTEXT_LINES})")
|
|
1716
|
+
search.add_argument("--max-artifacts", type=int, default=DEFAULT_SEARCH_MAX_ARTIFACTS, help=f"maximum artifacts to scan (default: {DEFAULT_SEARCH_MAX_ARTIFACTS})")
|
|
1717
|
+
search.add_argument("--max-matches", type=int, default=DEFAULT_SEARCH_MAX_MATCHES, help=f"maximum match records to return (default: {DEFAULT_SEARCH_MAX_MATCHES})")
|
|
1718
|
+
search.add_argument("--max-snippet-chars", type=int, default=DEFAULT_SEARCH_SNIPPET_CHARS, help=f"maximum characters per displayed line (default: {DEFAULT_SEARCH_SNIPPET_CHARS})")
|
|
1719
|
+
search.add_argument(
|
|
1720
|
+
"--show-paths",
|
|
1721
|
+
action="store_true",
|
|
1722
|
+
help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
|
|
1723
|
+
)
|
|
1724
|
+
search.add_argument("--json", action="store_true", help="emit sandbox search JSON")
|
|
1725
|
+
search.set_defaults(func=search_command)
|
|
921
1726
|
return parser
|
|
922
1727
|
|
|
923
1728
|
|