@ictechgy/context-guard 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.ko.md +92 -37
- package/README.md +111 -37
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +27 -12
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +2211 -121
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -2339
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -10,6 +10,8 @@ import json
|
|
|
10
10
|
import os
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
import re
|
|
13
|
+
import secrets
|
|
14
|
+
import shlex
|
|
13
15
|
import stat
|
|
14
16
|
import sys
|
|
15
17
|
import time
|
|
@@ -30,6 +32,17 @@ MAX_COMMAND_PREVIEW_BYTES = 2_048
|
|
|
30
32
|
MAX_TOP_ERROR_RECEIPTS = 12
|
|
31
33
|
MAX_DUPLICATE_GROUPS = 12
|
|
32
34
|
MAX_SUGGESTED_QUERIES = 12
|
|
35
|
+
SEARCH_SCHEMA_VERSION = "contextguard.artifact.search.v1"
|
|
36
|
+
DEFAULT_SEARCH_MAX_ARTIFACTS = 100
|
|
37
|
+
MAX_SEARCH_MAX_ARTIFACTS = 1_000
|
|
38
|
+
DEFAULT_SEARCH_MAX_MATCHES = 40
|
|
39
|
+
MAX_SEARCH_MAX_MATCHES = 1_000
|
|
40
|
+
DEFAULT_SEARCH_CONTEXT_LINES = 1
|
|
41
|
+
MAX_SEARCH_CONTEXT_LINES = 20
|
|
42
|
+
DEFAULT_SEARCH_SNIPPET_CHARS = 360
|
|
43
|
+
MAX_SEARCH_SNIPPET_CHARS = 2_000
|
|
44
|
+
MAX_SEARCH_PATTERN_BYTES = 512
|
|
45
|
+
SEARCH_TRUNCATED_COUNT_UNKNOWN = "lower_bound"
|
|
33
46
|
ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
|
|
34
47
|
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
35
48
|
"tmp": Path("/private/tmp"),
|
|
@@ -183,15 +196,50 @@ def sanitize_one_line(text: str, *, show_paths: bool = False) -> str:
|
|
|
183
196
|
return cap_utf8_bytes(cap_line(" ".join(sanitized.strip().split())), MAX_COMMAND_PREVIEW_BYTES)
|
|
184
197
|
|
|
185
198
|
|
|
199
|
+
NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
|
|
200
|
+
DIR_FD_OPEN_SUPPORTED = bool(os.supports_dir_fd and os.open in os.supports_dir_fd)
|
|
201
|
+
DIR_FD_MKDIR_SUPPORTED = bool(os.supports_dir_fd and os.mkdir in os.supports_dir_fd)
|
|
202
|
+
DIR_FD_STAT_SUPPORTED = bool(os.supports_dir_fd and os.stat in os.supports_dir_fd)
|
|
203
|
+
DIR_FD_UNLINK_SUPPORTED = bool(os.supports_dir_fd and os.unlink in os.supports_dir_fd)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def dir_fd_replace_supported() -> bool:
|
|
207
|
+
# Some Python builds support src_dir_fd/dst_dir_fd for os.replace without
|
|
208
|
+
# listing os.replace in os.supports_dir_fd, so use a signature/probe-light
|
|
209
|
+
# check instead of os.supports_dir_fd membership.
|
|
210
|
+
try:
|
|
211
|
+
import inspect
|
|
212
|
+
|
|
213
|
+
signature = inspect.signature(os.replace)
|
|
214
|
+
except (TypeError, ValueError):
|
|
215
|
+
return True
|
|
216
|
+
return "src_dir_fd" in signature.parameters and "dst_dir_fd" in signature.parameters
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
DIR_FD_REPLACE_SUPPORTED = dir_fd_replace_supported()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def os_error_detail(exc: OSError) -> str:
|
|
223
|
+
detail = exc.strerror or str(exc) or exc.__class__.__name__
|
|
224
|
+
if exc.errno is not None:
|
|
225
|
+
return f"{detail} (errno {exc.errno})"
|
|
226
|
+
return detail
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def reject_parent_traversal(path: Path, *, label: str) -> None:
|
|
230
|
+
if any(part == ".." for part in path.expanduser().parts):
|
|
231
|
+
raise ValueError(f"{label} must not contain parent traversal")
|
|
232
|
+
|
|
233
|
+
|
|
186
234
|
def ensure_private_dir(path: Path) -> None:
|
|
187
|
-
|
|
188
|
-
reject_symlink_components(path)
|
|
189
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
190
|
-
reject_symlink_components(path)
|
|
235
|
+
fd = open_private_directory_no_follow(path, label="artifact directory", create=True)
|
|
191
236
|
try:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
237
|
+
try:
|
|
238
|
+
os.fchmod(fd, 0o700)
|
|
239
|
+
except OSError:
|
|
240
|
+
pass
|
|
241
|
+
finally:
|
|
242
|
+
os.close(fd)
|
|
195
243
|
|
|
196
244
|
|
|
197
245
|
def reject_symlink_components(path: Path) -> None:
|
|
@@ -243,33 +291,156 @@ def read_bounded_private_text(path: Path, max_bytes: int) -> str:
|
|
|
243
291
|
os.close(fd)
|
|
244
292
|
|
|
245
293
|
|
|
246
|
-
def
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
294
|
+
def no_follow_dir_flags() -> int:
|
|
295
|
+
if not NO_FOLLOW_SUPPORTED:
|
|
296
|
+
raise RuntimeError("artifact writes require O_NOFOLLOW support")
|
|
297
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
298
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
299
|
+
flags |= os.O_CLOEXEC
|
|
300
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
301
|
+
flags |= os.O_DIRECTORY
|
|
302
|
+
return flags
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def temp_file_flags() -> int:
|
|
306
|
+
if not NO_FOLLOW_SUPPORTED:
|
|
307
|
+
raise RuntimeError("artifact writes require O_NOFOLLOW support")
|
|
308
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW
|
|
309
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
310
|
+
flags |= os.O_CLOEXEC
|
|
311
|
+
if hasattr(os, "O_NOCTTY"):
|
|
312
|
+
flags |= os.O_NOCTTY
|
|
313
|
+
return flags
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def open_private_directory_no_follow(path: Path, *, label: str, create: bool) -> int:
|
|
317
|
+
reject_parent_traversal(path, label=label)
|
|
318
|
+
path = normalize_allowed_first_absolute_symlink(path.expanduser())
|
|
319
|
+
if not DIR_FD_OPEN_SUPPORTED:
|
|
320
|
+
raise RuntimeError(f"{label} requires dir_fd open support")
|
|
321
|
+
if create and not DIR_FD_MKDIR_SUPPORTED:
|
|
322
|
+
raise RuntimeError(f"{label} requires dir_fd mkdir support")
|
|
323
|
+
flags = no_follow_dir_flags()
|
|
324
|
+
if path.is_absolute():
|
|
325
|
+
current_fd = os.open(path.anchor or os.sep, os.O_RDONLY | (os.O_CLOEXEC if hasattr(os, "O_CLOEXEC") else 0))
|
|
326
|
+
parts = path.parts[1:]
|
|
327
|
+
else:
|
|
328
|
+
current_fd = os.open(".", flags)
|
|
329
|
+
parts = path.parts
|
|
252
330
|
try:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
331
|
+
for part in parts:
|
|
332
|
+
if part in {"", "."}:
|
|
333
|
+
continue
|
|
334
|
+
if part == "..":
|
|
335
|
+
raise RuntimeError(f"{label} must not contain parent traversal")
|
|
336
|
+
try:
|
|
337
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
338
|
+
except FileNotFoundError:
|
|
339
|
+
if not create:
|
|
340
|
+
raise
|
|
341
|
+
os.mkdir(part, 0o700, dir_fd=current_fd)
|
|
342
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
343
|
+
try:
|
|
344
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
345
|
+
raise RuntimeError(f"{label} must not traverse non-directory components")
|
|
346
|
+
except Exception:
|
|
347
|
+
os.close(next_fd)
|
|
348
|
+
raise
|
|
349
|
+
os.close(current_fd)
|
|
350
|
+
current_fd = next_fd
|
|
351
|
+
owned_fd = current_fd
|
|
352
|
+
current_fd = -1
|
|
353
|
+
return owned_fd
|
|
354
|
+
except OSError as exc:
|
|
355
|
+
raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
356
|
+
finally:
|
|
357
|
+
if current_fd >= 0:
|
|
358
|
+
os.close(current_fd)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def precheck_artifact_leaf(parent_fd: int, leaf: str, *, label: str) -> None:
|
|
362
|
+
if not DIR_FD_STAT_SUPPORTED:
|
|
363
|
+
raise RuntimeError(f"{label} requires dir_fd stat support")
|
|
261
364
|
try:
|
|
262
|
-
os.
|
|
263
|
-
except
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
raise
|
|
365
|
+
st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
|
|
366
|
+
except FileNotFoundError:
|
|
367
|
+
return
|
|
368
|
+
except OSError as exc:
|
|
369
|
+
raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
370
|
+
if not stat.S_ISREG(st.st_mode):
|
|
371
|
+
raise RuntimeError(f"{label} must be missing or a regular file")
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def write_all_fd(fd: int, data: bytes) -> None:
|
|
375
|
+
view = memoryview(data)
|
|
376
|
+
offset = 0
|
|
377
|
+
while offset < len(view):
|
|
378
|
+
written = os.write(fd, view[offset:])
|
|
379
|
+
if written <= 0:
|
|
380
|
+
raise OSError("short write")
|
|
381
|
+
offset += written
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def fsync_required(fd: int, *, label: str, committed: bool = False) -> None:
|
|
269
385
|
try:
|
|
270
|
-
os.
|
|
386
|
+
os.fsync(fd)
|
|
387
|
+
except OSError as exc:
|
|
388
|
+
if committed:
|
|
389
|
+
raise RuntimeError(f"committed_but_parent_fsync_failed: {os_error_detail(exc)}") from exc
|
|
390
|
+
raise RuntimeError(f"could not fsync {label}: {os_error_detail(exc)}") from exc
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def write_private_text(path: Path, text: str) -> None:
|
|
394
|
+
reject_parent_traversal(path, label="artifact file")
|
|
395
|
+
path = normalize_allowed_first_absolute_symlink(path.expanduser())
|
|
396
|
+
if not DIR_FD_REPLACE_SUPPORTED:
|
|
397
|
+
raise RuntimeError("artifact writes require dir_fd replace support")
|
|
398
|
+
if not DIR_FD_UNLINK_SUPPORTED:
|
|
399
|
+
raise RuntimeError("artifact writes require dir_fd unlink support")
|
|
400
|
+
parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=True)
|
|
401
|
+
try:
|
|
402
|
+
os.fchmod(parent_fd, 0o700)
|
|
271
403
|
except OSError:
|
|
272
404
|
pass
|
|
405
|
+
fd = -1
|
|
406
|
+
temp_leaf: str | None = None
|
|
407
|
+
try:
|
|
408
|
+
leaf = path.name
|
|
409
|
+
if leaf in {"", ".", ".."}:
|
|
410
|
+
raise RuntimeError("artifact file must name a regular file")
|
|
411
|
+
precheck_artifact_leaf(parent_fd, leaf, label="artifact file")
|
|
412
|
+
for _attempt in range(20):
|
|
413
|
+
candidate = f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
|
|
414
|
+
try:
|
|
415
|
+
fd = os.open(candidate, temp_file_flags(), 0o600, dir_fd=parent_fd)
|
|
416
|
+
temp_leaf = candidate
|
|
417
|
+
break
|
|
418
|
+
except FileExistsError:
|
|
419
|
+
continue
|
|
420
|
+
if fd < 0 or temp_leaf is None:
|
|
421
|
+
raise RuntimeError("could not create temporary artifact file")
|
|
422
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
423
|
+
raise RuntimeError("temporary artifact file must be a regular file")
|
|
424
|
+
os.fchmod(fd, 0o600)
|
|
425
|
+
write_all_fd(fd, text.encode("utf-8"))
|
|
426
|
+
fsync_required(fd, label="artifact temp file")
|
|
427
|
+
os.close(fd)
|
|
428
|
+
fd = -1
|
|
429
|
+
fsync_required(parent_fd, label="artifact directory before replace")
|
|
430
|
+
os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
|
|
431
|
+
temp_leaf = None
|
|
432
|
+
fsync_required(parent_fd, label="artifact directory after replace", committed=True)
|
|
433
|
+
except OSError as exc:
|
|
434
|
+
raise RuntimeError(f"could not write artifact file: {os_error_detail(exc)}") from exc
|
|
435
|
+
finally:
|
|
436
|
+
if fd >= 0:
|
|
437
|
+
os.close(fd)
|
|
438
|
+
if temp_leaf is not None:
|
|
439
|
+
try:
|
|
440
|
+
os.unlink(temp_leaf, dir_fd=parent_fd)
|
|
441
|
+
except OSError:
|
|
442
|
+
pass
|
|
443
|
+
os.close(parent_fd)
|
|
273
444
|
|
|
274
445
|
|
|
275
446
|
def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
|
|
@@ -283,6 +454,7 @@ def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
|
|
|
283
454
|
def artifact_paths(directory: Path, artifact_id: str) -> tuple[Path, Path]:
|
|
284
455
|
if not ARTIFACT_ID_RE.fullmatch(artifact_id):
|
|
285
456
|
raise ValueError("artifact id must be 16-64 lowercase hex chars")
|
|
457
|
+
reject_parent_traversal(directory, label="artifact directory")
|
|
286
458
|
directory = normalize_allowed_first_absolute_symlink(directory)
|
|
287
459
|
return directory / f"{artifact_id}.txt", directory / f"{artifact_id}.json"
|
|
288
460
|
|
|
@@ -295,15 +467,21 @@ def artifact_read_directories(raw_dir: str) -> list[Path]:
|
|
|
295
467
|
default. Reads and listings include that legacy default so old receipts keep
|
|
296
468
|
working; stores intentionally continue to use only the new path.
|
|
297
469
|
"""
|
|
298
|
-
|
|
470
|
+
raw_path = Path(raw_dir).expanduser()
|
|
471
|
+
reject_parent_traversal(raw_path, label="artifact directory")
|
|
472
|
+
primary = normalize_allowed_first_absolute_symlink(raw_path)
|
|
299
473
|
directories = [primary]
|
|
300
|
-
if
|
|
474
|
+
if default_artifact_dir_requested(raw_dir):
|
|
301
475
|
legacy = normalize_allowed_first_absolute_symlink(Path(LEGACY_ARTIFACT_DIR).expanduser())
|
|
302
476
|
if legacy != primary:
|
|
303
477
|
directories.append(legacy)
|
|
304
478
|
return directories
|
|
305
479
|
|
|
306
480
|
|
|
481
|
+
def default_artifact_dir_requested(raw_dir: str) -> bool:
|
|
482
|
+
return Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR)
|
|
483
|
+
|
|
484
|
+
|
|
307
485
|
CONTENT_TYPE_VALUES = ("json", "diff", "log", "search", "code", "prose", "text")
|
|
308
486
|
# Recommended retrieval strategy per content type. Pattern-oriented payloads
|
|
309
487
|
# (logs, search hits, diffs) are best sliced by `--pattern`; structured or
|
|
@@ -449,8 +627,27 @@ def build_retrieval_hints(
|
|
|
449
627
|
return hints
|
|
450
628
|
|
|
451
629
|
|
|
452
|
-
def
|
|
453
|
-
|
|
630
|
+
def artifact_dir_cli_prefix(raw_dir: str | None, *, show_paths: bool = False) -> str:
|
|
631
|
+
if not raw_dir or default_artifact_dir_requested(raw_dir):
|
|
632
|
+
return "context-guard-artifact"
|
|
633
|
+
if not show_paths:
|
|
634
|
+
return "context-guard-artifact --dir <artifact_dir>"
|
|
635
|
+
return f"context-guard-artifact --dir {shlex.quote(raw_dir)}"
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def artifact_dir_cli_is_exact(raw_dir: str | None, *, show_paths: bool = False) -> bool:
|
|
639
|
+
return not raw_dir or default_artifact_dir_requested(raw_dir) or show_paths
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def line_query_cli(
|
|
643
|
+
artifact_id: str,
|
|
644
|
+
start: int,
|
|
645
|
+
end: int,
|
|
646
|
+
*,
|
|
647
|
+
raw_dir: str | None = None,
|
|
648
|
+
show_paths: bool = False,
|
|
649
|
+
) -> str:
|
|
650
|
+
cli = f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --lines {start}:{end}"
|
|
454
651
|
requested_lines = end - start + 1
|
|
455
652
|
if requested_lines > DEFAULT_MAX_LINES:
|
|
456
653
|
cli += f" --max-lines {min(requested_lines, MAX_QUERY_LINES)}"
|
|
@@ -745,6 +942,26 @@ def load_metadata(directory: Path, artifact_id: str) -> dict[str, object]:
|
|
|
745
942
|
return data
|
|
746
943
|
|
|
747
944
|
|
|
945
|
+
def load_verified_artifact(directory: Path, artifact_id: str) -> tuple[dict[str, object], Path, str]:
|
|
946
|
+
metadata = load_metadata(directory, artifact_id)
|
|
947
|
+
content_path, _meta_path = artifact_paths(directory, artifact_id)
|
|
948
|
+
stored_output = metadata.get("stored_output")
|
|
949
|
+
expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
|
|
950
|
+
if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
|
|
951
|
+
raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
|
|
952
|
+
expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
|
|
953
|
+
if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
|
|
954
|
+
raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
|
|
955
|
+
actual_size = regular_private_file_size(content_path)
|
|
956
|
+
if actual_size != expected_bytes:
|
|
957
|
+
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
958
|
+
content = read_bounded_private_text(content_path, expected_bytes)
|
|
959
|
+
actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
|
|
960
|
+
if actual_sha != expected_sha:
|
|
961
|
+
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
962
|
+
return metadata, content_path, content
|
|
963
|
+
|
|
964
|
+
|
|
748
965
|
def parse_line_range(value: str | None) -> tuple[int, int] | None:
|
|
749
966
|
if not value:
|
|
750
967
|
return None
|
|
@@ -766,6 +983,149 @@ def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
|
|
|
766
983
|
return text[:keep].rstrip() + marker, True
|
|
767
984
|
|
|
768
985
|
|
|
986
|
+
def search_literal(value: str) -> str:
|
|
987
|
+
if not value:
|
|
988
|
+
raise ValueError("search pattern must not be empty")
|
|
989
|
+
if "\x00" in value:
|
|
990
|
+
raise ValueError("search pattern must not contain NUL bytes")
|
|
991
|
+
size = len(value.encode("utf-8", errors="replace"))
|
|
992
|
+
if size > MAX_SEARCH_PATTERN_BYTES:
|
|
993
|
+
raise ValueError(f"search pattern exceeds {MAX_SEARCH_PATTERN_BYTES} bytes")
|
|
994
|
+
return value
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def safe_query_label(value: str) -> str:
|
|
998
|
+
return sanitize_one_line(value, show_paths=False)
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def artifact_dir_label(raw_dir: str) -> str:
|
|
1002
|
+
if default_artifact_dir_requested(raw_dir):
|
|
1003
|
+
return "default"
|
|
1004
|
+
return sanitize_one_line(raw_dir, show_paths=False)
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def metadata_text_field(metadata: dict[str, object], key: str) -> str | None:
|
|
1008
|
+
value = metadata.get(key)
|
|
1009
|
+
if not isinstance(value, str):
|
|
1010
|
+
return None
|
|
1011
|
+
return sanitize_one_line(value, show_paths=False)
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
def metadata_content_type(metadata: dict[str, object]) -> str:
|
|
1015
|
+
value = metadata.get("content_type")
|
|
1016
|
+
return value if isinstance(value, str) and value in CONTENT_TYPE_VALUES else "text"
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
def metadata_candidate_paths(directory: Path, limit: int) -> tuple[list[Path], int, int]:
|
|
1020
|
+
candidates: list[Path] = []
|
|
1021
|
+
skipped = 0
|
|
1022
|
+
truncated_lower_bound = 0
|
|
1023
|
+
if limit <= 0:
|
|
1024
|
+
return candidates, skipped, 0
|
|
1025
|
+
try:
|
|
1026
|
+
with os.scandir(directory) as entries:
|
|
1027
|
+
for entry in entries:
|
|
1028
|
+
name = entry.name
|
|
1029
|
+
if not name.endswith(".json"):
|
|
1030
|
+
continue
|
|
1031
|
+
if not ARTIFACT_ID_RE.fullmatch(name[:-5]):
|
|
1032
|
+
skipped += 1
|
|
1033
|
+
continue
|
|
1034
|
+
try:
|
|
1035
|
+
if not entry.is_file(follow_symlinks=False):
|
|
1036
|
+
skipped += 1
|
|
1037
|
+
continue
|
|
1038
|
+
except OSError:
|
|
1039
|
+
skipped += 1
|
|
1040
|
+
continue
|
|
1041
|
+
if len(candidates) >= limit:
|
|
1042
|
+
truncated_lower_bound += 1
|
|
1043
|
+
break
|
|
1044
|
+
candidates.append(directory / name)
|
|
1045
|
+
except OSError:
|
|
1046
|
+
return candidates, skipped + 1, truncated_lower_bound
|
|
1047
|
+
return sorted(candidates), skipped, truncated_lower_bound
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def search_match_record(
|
|
1051
|
+
*,
|
|
1052
|
+
artifact_id: str,
|
|
1053
|
+
line_number: int,
|
|
1054
|
+
lines: list[str],
|
|
1055
|
+
context_lines: int,
|
|
1056
|
+
snippet_chars: int,
|
|
1057
|
+
metadata: dict[str, object],
|
|
1058
|
+
raw_dir: str,
|
|
1059
|
+
show_paths: bool,
|
|
1060
|
+
) -> dict[str, object]:
|
|
1061
|
+
start = max(1, line_number - context_lines)
|
|
1062
|
+
end = min(len(lines), line_number + context_lines)
|
|
1063
|
+
cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
|
|
1064
|
+
|
|
1065
|
+
def line_item(number: int) -> dict[str, object]:
|
|
1066
|
+
return {"line": number, "text": cap_line(lines[number - 1].rstrip("\n"), limit=snippet_chars)}
|
|
1067
|
+
|
|
1068
|
+
return {
|
|
1069
|
+
"artifact_id": artifact_id,
|
|
1070
|
+
"line": line_number,
|
|
1071
|
+
"text": cap_line(lines[line_number - 1].rstrip("\n"), limit=snippet_chars),
|
|
1072
|
+
"context_before": [line_item(number) for number in range(start, line_number)],
|
|
1073
|
+
"context_after": [line_item(number) for number in range(line_number + 1, end + 1)],
|
|
1074
|
+
"content_type": metadata_content_type(metadata),
|
|
1075
|
+
"command_preview": metadata_text_field(metadata, "command_preview"),
|
|
1076
|
+
"retrieval": {
|
|
1077
|
+
"selector": {"type": "lines", "start": start, "end": end},
|
|
1078
|
+
"cli": line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths),
|
|
1079
|
+
"exact": cli_exact,
|
|
1080
|
+
"dir_argument": "default" if default_artifact_dir_requested(raw_dir) else ("included" if show_paths else "redacted"),
|
|
1081
|
+
"note": (
|
|
1082
|
+
None
|
|
1083
|
+
if cli_exact
|
|
1084
|
+
else "custom artifact directory is redacted; rerun with the same --dir used for search, or pass search --show-paths to emit a directly executable local CLI"
|
|
1085
|
+
),
|
|
1086
|
+
},
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
def search_artifact_content(
|
|
1091
|
+
*,
|
|
1092
|
+
artifact_id: str,
|
|
1093
|
+
metadata: dict[str, object],
|
|
1094
|
+
content: str,
|
|
1095
|
+
literal: str,
|
|
1096
|
+
ignore_case: bool,
|
|
1097
|
+
context_lines: int,
|
|
1098
|
+
snippet_chars: int,
|
|
1099
|
+
remaining_matches: int,
|
|
1100
|
+
raw_dir: str,
|
|
1101
|
+
show_paths: bool,
|
|
1102
|
+
) -> tuple[list[dict[str, object]], int]:
|
|
1103
|
+
lines = content.splitlines()
|
|
1104
|
+
needle = literal.casefold() if ignore_case else literal
|
|
1105
|
+
matches: list[dict[str, object]] = []
|
|
1106
|
+
matched_lines = 0
|
|
1107
|
+
for line_number, line in enumerate(lines, start=1):
|
|
1108
|
+
haystack = line.casefold() if ignore_case else line
|
|
1109
|
+
if needle not in haystack:
|
|
1110
|
+
continue
|
|
1111
|
+
matched_lines += 1
|
|
1112
|
+
if len(matches) >= remaining_matches:
|
|
1113
|
+
continue
|
|
1114
|
+
matches.append(
|
|
1115
|
+
search_match_record(
|
|
1116
|
+
artifact_id=artifact_id,
|
|
1117
|
+
line_number=line_number,
|
|
1118
|
+
lines=lines,
|
|
1119
|
+
context_lines=context_lines,
|
|
1120
|
+
snippet_chars=snippet_chars,
|
|
1121
|
+
metadata=metadata,
|
|
1122
|
+
raw_dir=raw_dir,
|
|
1123
|
+
show_paths=show_paths,
|
|
1124
|
+
)
|
|
1125
|
+
)
|
|
1126
|
+
return matches, matched_lines
|
|
1127
|
+
|
|
1128
|
+
|
|
769
1129
|
def query_content(
|
|
770
1130
|
content: str,
|
|
771
1131
|
*,
|
|
@@ -805,8 +1165,7 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
805
1165
|
last_missing: FileNotFoundError | None = None
|
|
806
1166
|
for directory in artifact_read_directories(args.dir):
|
|
807
1167
|
try:
|
|
808
|
-
metadata =
|
|
809
|
-
content_path, _meta_path = artifact_paths(directory, artifact_id)
|
|
1168
|
+
metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
|
|
810
1169
|
break
|
|
811
1170
|
except FileNotFoundError as exc:
|
|
812
1171
|
last_missing = exc
|
|
@@ -815,19 +1174,9 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
815
1174
|
raise last_missing
|
|
816
1175
|
raise FileNotFoundError(f"artifact not found: {artifact_id}")
|
|
817
1176
|
stored_output = metadata.get("stored_output")
|
|
818
|
-
expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
|
|
819
|
-
if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
|
|
820
|
-
raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
|
|
821
1177
|
expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
|
|
822
|
-
if not isinstance(expected_bytes, int)
|
|
1178
|
+
if not isinstance(expected_bytes, int):
|
|
823
1179
|
raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
|
|
824
|
-
actual_size = regular_private_file_size(content_path)
|
|
825
|
-
if actual_size != expected_bytes:
|
|
826
|
-
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
827
|
-
content = read_bounded_private_text(content_path, expected_bytes)
|
|
828
|
-
actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
|
|
829
|
-
if actual_sha != expected_sha:
|
|
830
|
-
raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
|
|
831
1180
|
default_max_chars = max(DEFAULT_MAX_CHARS, expected_bytes) if full else DEFAULT_MAX_CHARS
|
|
832
1181
|
max_chars = bounded_int(args.max_chars, default_max_chars, 1, MAX_MAX_BYTES)
|
|
833
1182
|
line_range = parse_line_range(args.lines)
|
|
@@ -856,6 +1205,138 @@ def get_command(args: argparse.Namespace) -> int:
|
|
|
856
1205
|
return 0
|
|
857
1206
|
|
|
858
1207
|
|
|
1208
|
+
def search_command(args: argparse.Namespace) -> int:
|
|
1209
|
+
try:
|
|
1210
|
+
literal = search_literal(args.pattern)
|
|
1211
|
+
max_artifacts = bounded_int(args.max_artifacts, DEFAULT_SEARCH_MAX_ARTIFACTS, 1, MAX_SEARCH_MAX_ARTIFACTS)
|
|
1212
|
+
max_matches = bounded_int(args.max_matches, DEFAULT_SEARCH_MAX_MATCHES, 1, MAX_SEARCH_MAX_MATCHES)
|
|
1213
|
+
context_lines = bounded_int(args.context_lines, DEFAULT_SEARCH_CONTEXT_LINES, 0, MAX_SEARCH_CONTEXT_LINES)
|
|
1214
|
+
snippet_chars = bounded_int(args.max_snippet_chars, DEFAULT_SEARCH_SNIPPET_CHARS, 1, MAX_SEARCH_SNIPPET_CHARS)
|
|
1215
|
+
ignore_case = bool(args.ignore_case)
|
|
1216
|
+
matches: list[dict[str, object]] = []
|
|
1217
|
+
seen: set[str] = set()
|
|
1218
|
+
scanned_artifacts = 0
|
|
1219
|
+
skipped_artifacts = 0
|
|
1220
|
+
total_matched_lines = 0
|
|
1221
|
+
meta_candidates_seen = 0
|
|
1222
|
+
scan_truncated = False
|
|
1223
|
+
scan_truncated_count = 0
|
|
1224
|
+
matched_artifact_ids: set[str] = set()
|
|
1225
|
+
|
|
1226
|
+
for directory in artifact_read_directories(args.dir):
|
|
1227
|
+
remaining_candidates = max_artifacts - meta_candidates_seen
|
|
1228
|
+
if remaining_candidates <= 0:
|
|
1229
|
+
scan_truncated = True
|
|
1230
|
+
break
|
|
1231
|
+
try:
|
|
1232
|
+
reject_symlink_components(directory)
|
|
1233
|
+
directory_is_safe = directory.is_dir() and not directory.is_symlink()
|
|
1234
|
+
except RuntimeError:
|
|
1235
|
+
directory_is_safe = False
|
|
1236
|
+
if not directory_is_safe:
|
|
1237
|
+
continue
|
|
1238
|
+
meta_paths, skipped_candidates, truncated_candidates = metadata_candidate_paths(directory, remaining_candidates)
|
|
1239
|
+
skipped_artifacts += skipped_candidates
|
|
1240
|
+
if truncated_candidates:
|
|
1241
|
+
scan_truncated = True
|
|
1242
|
+
scan_truncated_count += truncated_candidates
|
|
1243
|
+
for meta_path in meta_paths:
|
|
1244
|
+
meta_candidates_seen += 1
|
|
1245
|
+
try:
|
|
1246
|
+
data = json.loads(read_bounded_private_text(meta_path, MAX_METADATA_BYTES))
|
|
1247
|
+
except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
|
|
1248
|
+
skipped_artifacts += 1
|
|
1249
|
+
continue
|
|
1250
|
+
artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
|
|
1251
|
+
if not (isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id)) or artifact_id in seen:
|
|
1252
|
+
skipped_artifacts += 1
|
|
1253
|
+
continue
|
|
1254
|
+
seen.add(artifact_id)
|
|
1255
|
+
if scanned_artifacts >= max_artifacts:
|
|
1256
|
+
scan_truncated = True
|
|
1257
|
+
scan_truncated_count += 1
|
|
1258
|
+
continue
|
|
1259
|
+
try:
|
|
1260
|
+
metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
|
|
1261
|
+
except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
|
|
1262
|
+
skipped_artifacts += 1
|
|
1263
|
+
continue
|
|
1264
|
+
scanned_artifacts += 1
|
|
1265
|
+
remaining = max(0, max_matches - len(matches))
|
|
1266
|
+
artifact_matches, artifact_match_count = search_artifact_content(
|
|
1267
|
+
artifact_id=artifact_id,
|
|
1268
|
+
metadata=metadata,
|
|
1269
|
+
content=content,
|
|
1270
|
+
literal=literal,
|
|
1271
|
+
ignore_case=ignore_case,
|
|
1272
|
+
context_lines=context_lines,
|
|
1273
|
+
snippet_chars=snippet_chars,
|
|
1274
|
+
remaining_matches=remaining,
|
|
1275
|
+
raw_dir=args.dir,
|
|
1276
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
1277
|
+
)
|
|
1278
|
+
if artifact_match_count:
|
|
1279
|
+
matched_artifact_ids.add(artifact_id)
|
|
1280
|
+
total_matched_lines += artifact_match_count
|
|
1281
|
+
matches.extend(artifact_matches)
|
|
1282
|
+
payload = {
|
|
1283
|
+
"tool": "context-guard-artifact",
|
|
1284
|
+
"schema_version": SEARCH_SCHEMA_VERSION,
|
|
1285
|
+
"mode": "search",
|
|
1286
|
+
"query": {
|
|
1287
|
+
"label": safe_query_label(literal),
|
|
1288
|
+
"raw_pattern_stored": False,
|
|
1289
|
+
"literal": True,
|
|
1290
|
+
"ignore_case": ignore_case,
|
|
1291
|
+
},
|
|
1292
|
+
"artifact_dir": artifact_dir_label(args.dir),
|
|
1293
|
+
"scanned_artifacts": scanned_artifacts,
|
|
1294
|
+
"skipped_artifacts": skipped_artifacts,
|
|
1295
|
+
"matched_artifacts": len(matched_artifact_ids),
|
|
1296
|
+
"matched_lines": total_matched_lines,
|
|
1297
|
+
"metadata_candidates_scanned": meta_candidates_seen,
|
|
1298
|
+
"matches": matches,
|
|
1299
|
+
"matches_truncated_count": max(0, total_matched_lines - max_matches),
|
|
1300
|
+
"artifact_scan_truncated": scan_truncated,
|
|
1301
|
+
"artifact_scan_truncated_count": scan_truncated_count,
|
|
1302
|
+
"artifact_scan_truncated_count_mode": SEARCH_TRUNCATED_COUNT_UNKNOWN if scan_truncated else "exact",
|
|
1303
|
+
"limits": {
|
|
1304
|
+
"max_artifacts": max_artifacts,
|
|
1305
|
+
"max_matches": max_matches,
|
|
1306
|
+
"context_lines": context_lines,
|
|
1307
|
+
"max_snippet_chars": snippet_chars,
|
|
1308
|
+
},
|
|
1309
|
+
"sandbox": {
|
|
1310
|
+
"local_only": True,
|
|
1311
|
+
"workflow": ["store", "search", "get"],
|
|
1312
|
+
"exact_rehydration": "use matches[].retrieval.cli when exact=true; for redacted custom dirs, reuse the same --dir or opt into --show-paths",
|
|
1313
|
+
},
|
|
1314
|
+
"claim_boundary": {
|
|
1315
|
+
"local_only": True,
|
|
1316
|
+
"stored_content_is_sanitized_copy": True,
|
|
1317
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
1318
|
+
"exact_rehydration_required_before_relying_on_omitted_detail": True,
|
|
1319
|
+
},
|
|
1320
|
+
}
|
|
1321
|
+
except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
|
|
1322
|
+
print(f"context-guard-artifact: {exc}", file=sys.stderr)
|
|
1323
|
+
return 1
|
|
1324
|
+
if args.json:
|
|
1325
|
+
print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))
|
|
1326
|
+
else:
|
|
1327
|
+
for item in payload["matches"]:
|
|
1328
|
+
if isinstance(item, dict):
|
|
1329
|
+
print(f"{item.get('artifact_id')}:{item.get('line')}: {item.get('text')}")
|
|
1330
|
+
retrieval = item.get("retrieval")
|
|
1331
|
+
if isinstance(retrieval, dict):
|
|
1332
|
+
print(f" rehydrate={retrieval.get('cli')}")
|
|
1333
|
+
if not payload["matches"]:
|
|
1334
|
+
print("no matches")
|
|
1335
|
+
elif payload["matches_truncated_count"]:
|
|
1336
|
+
print(f"matches_truncated_count={payload['matches_truncated_count']}")
|
|
1337
|
+
return 0
|
|
1338
|
+
|
|
1339
|
+
|
|
859
1340
|
def list_command(args: argparse.Namespace) -> int:
|
|
860
1341
|
items: list[dict[str, object]] = []
|
|
861
1342
|
seen: set[str] = set()
|
|
@@ -918,6 +1399,21 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
918
1399
|
list_parser = subparsers.add_parser("list", help="list stored artifacts")
|
|
919
1400
|
list_parser.add_argument("--json", action="store_true", help="emit list JSON")
|
|
920
1401
|
list_parser.set_defaults(func=list_command)
|
|
1402
|
+
|
|
1403
|
+
search = subparsers.add_parser("search", help="search stored sanitized artifacts by literal text")
|
|
1404
|
+
search.add_argument("pattern", help=f"literal substring to search for (max {MAX_SEARCH_PATTERN_BYTES} UTF-8 bytes)")
|
|
1405
|
+
search.add_argument("--ignore-case", action="store_true", help="case-insensitive literal search")
|
|
1406
|
+
search.add_argument("--context-lines", type=int, default=DEFAULT_SEARCH_CONTEXT_LINES, help=f"context lines around each match (default: {DEFAULT_SEARCH_CONTEXT_LINES})")
|
|
1407
|
+
search.add_argument("--max-artifacts", type=int, default=DEFAULT_SEARCH_MAX_ARTIFACTS, help=f"maximum artifacts to scan (default: {DEFAULT_SEARCH_MAX_ARTIFACTS})")
|
|
1408
|
+
search.add_argument("--max-matches", type=int, default=DEFAULT_SEARCH_MAX_MATCHES, help=f"maximum match records to return (default: {DEFAULT_SEARCH_MAX_MATCHES})")
|
|
1409
|
+
search.add_argument("--max-snippet-chars", type=int, default=DEFAULT_SEARCH_SNIPPET_CHARS, help=f"maximum characters per displayed line (default: {DEFAULT_SEARCH_SNIPPET_CHARS})")
|
|
1410
|
+
search.add_argument(
|
|
1411
|
+
"--show-paths",
|
|
1412
|
+
action="store_true",
|
|
1413
|
+
help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
|
|
1414
|
+
)
|
|
1415
|
+
search.add_argument("--json", action="store_true", help="emit sandbox search JSON")
|
|
1416
|
+
search.set_defaults(func=search_command)
|
|
921
1417
|
return parser
|
|
922
1418
|
|
|
923
1419
|
|