@ictechgy/context-guard 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/context-guard-kit/context_filter.py +212 -21
- package/context-guard-kit/context_guard_cli.py +174 -2
- package/context-guard-kit/context_pack.py +66 -21
- package/context-guard-kit/cost_guard.py +126 -59
- package/context-guard-kit/experimental_registry.py +362 -61
- package/package.json +1 -1
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/bin/context-guard +174 -2
- package/plugins/context-guard/bin/context-guard-cost +126 -59
- package/plugins/context-guard/bin/context-guard-experiments +362 -61
- package/plugins/context-guard/bin/context-guard-filter +212 -21
- package/plugins/context-guard/bin/context-guard-pack +66 -21
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,14 @@ All notable changes for the ContextGuard plugin are documented here.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.4.8] - 2026-06-11
|
|
8
|
+
|
|
9
|
+
- Hardened experimental registry config writes with same-directory atomic replace so failed writes or symlink swaps do not truncate or redirect the live config.
|
|
10
|
+
- Hardened dispatcher version metadata reads with dir-fd no-follow parent traversal to close parent symlink races.
|
|
11
|
+
- Preserved bounded filter passthrough ordering without holding the capture state lock during emission.
|
|
12
|
+
- Serialized context pack sanitizer factory first load and added focused race/failure regression coverage.
|
|
13
|
+
- Kept plugin bin mirrors synchronized for the updated helper hardening.
|
|
14
|
+
|
|
7
15
|
## [0.4.7] - 2026-06-11
|
|
8
16
|
|
|
9
17
|
- Added default-off experimental opt-in registry surfaces for future token-reduction lanes, preserving project-local intent without enabling runtime behavior.
|
|
@@ -8,13 +8,18 @@ risk hiding evidence.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
+
import codecs
|
|
11
12
|
from dataclasses import dataclass
|
|
12
13
|
import json
|
|
14
|
+
import os
|
|
13
15
|
from pathlib import Path
|
|
14
16
|
import re
|
|
15
17
|
import shlex
|
|
18
|
+
import signal
|
|
16
19
|
import subprocess
|
|
17
20
|
import sys
|
|
21
|
+
import threading
|
|
22
|
+
import time
|
|
18
23
|
from typing import Any, Iterable
|
|
19
24
|
|
|
20
25
|
SCHEMA_VERSION = "contextguard.filter-dsl.v1"
|
|
@@ -33,6 +38,8 @@ MAX_EMIT_LINES = 5_000
|
|
|
33
38
|
DEFAULT_TIMEOUT_SECONDS = 600
|
|
34
39
|
MAX_TIMEOUT_SECONDS = 86_400
|
|
35
40
|
TIMEOUT_EXIT_CODE = 124
|
|
41
|
+
TIMEOUT_PIPE_DRAIN_GRACE_SECONDS = 5.0
|
|
42
|
+
PIPE_THREAD_CLOSE_GRACE_SECONDS = 1.0
|
|
36
43
|
FILTER_KEYS = {"id", "match", "passthrough_on_exit", "include_regex", "exclude_regex", "head_lines", "tail_lines", "max_lines"}
|
|
37
44
|
MATCH_KEYS = {"argv_prefix", "argv_regex"}
|
|
38
45
|
PROTECTED_BASENAMES = {
|
|
@@ -329,22 +336,204 @@ def print_validation(valid: bool, errors: list[str], count: int, as_json: bool)
|
|
|
329
336
|
print(f"- {error}", file=sys.stderr)
|
|
330
337
|
|
|
331
338
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
339
|
+
@dataclass
|
|
340
|
+
class CommandResult:
|
|
341
|
+
returncode: int
|
|
342
|
+
stdout_text: str
|
|
343
|
+
stderr_text: str
|
|
344
|
+
output_bytes: int
|
|
345
|
+
capture_limited: bool
|
|
346
|
+
timed_out: bool
|
|
347
|
+
drain_timed_out: bool
|
|
348
|
+
passthrough_emitted: bool
|
|
336
349
|
|
|
337
350
|
|
|
338
|
-
def
|
|
351
|
+
def write_binary_chunk(stream: Any, chunk: bytes) -> None:
|
|
352
|
+
if not chunk:
|
|
353
|
+
return
|
|
354
|
+
stream.flush()
|
|
355
|
+
binary = getattr(stream, "buffer", None)
|
|
356
|
+
if binary is not None:
|
|
357
|
+
binary.write(chunk)
|
|
358
|
+
else:
|
|
359
|
+
stream.write(chunk.decode("utf-8", "replace"))
|
|
360
|
+
stream.flush()
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class BoundedCapture:
|
|
364
|
+
def __init__(self, max_capture_bytes: int) -> None:
|
|
365
|
+
self.max_capture_bytes = max_capture_bytes
|
|
366
|
+
self.stdout = bytearray()
|
|
367
|
+
self.stderr = bytearray()
|
|
368
|
+
self.output_bytes = 0
|
|
369
|
+
self.capture_limited = False
|
|
370
|
+
self.passthrough_emitted = False
|
|
371
|
+
self._lock = threading.Lock()
|
|
372
|
+
self._emit_condition = threading.Condition()
|
|
373
|
+
self._next_emit_order = 0
|
|
374
|
+
self._active_emit_order = 0
|
|
375
|
+
self._stdout_decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
376
|
+
self._stderr_decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
377
|
+
|
|
378
|
+
def consume(self, stream_name: str, chunk: bytes) -> None:
|
|
379
|
+
if not chunk:
|
|
380
|
+
return
|
|
381
|
+
passthrough: list[tuple[Any, bytes]] = []
|
|
382
|
+
emit_order: int | None = None
|
|
383
|
+
with self._lock:
|
|
384
|
+
self.output_bytes += len(chunk)
|
|
385
|
+
if self.capture_limited:
|
|
386
|
+
passthrough.append((sys.stdout if stream_name == "stdout" else sys.stderr, chunk))
|
|
387
|
+
else:
|
|
388
|
+
stored_total = len(self.stdout) + len(self.stderr)
|
|
389
|
+
remaining = self.max_capture_bytes - stored_total
|
|
390
|
+
target = self.stdout if stream_name == "stdout" else self.stderr
|
|
391
|
+
if len(chunk) <= remaining:
|
|
392
|
+
target.extend(chunk)
|
|
393
|
+
return
|
|
394
|
+
if remaining > 0:
|
|
395
|
+
target.extend(chunk[:remaining])
|
|
396
|
+
overflow = chunk[remaining:]
|
|
397
|
+
else:
|
|
398
|
+
overflow = chunk
|
|
399
|
+
self.capture_limited = True
|
|
400
|
+
self.passthrough_emitted = True
|
|
401
|
+
passthrough.extend(
|
|
402
|
+
[
|
|
403
|
+
(sys.stdout, bytes(self.stdout)),
|
|
404
|
+
(sys.stderr, bytes(self.stderr)),
|
|
405
|
+
(sys.stdout if stream_name == "stdout" else sys.stderr, overflow),
|
|
406
|
+
]
|
|
407
|
+
)
|
|
408
|
+
if passthrough:
|
|
409
|
+
emit_order = self._next_emit_order
|
|
410
|
+
self._next_emit_order += 1
|
|
411
|
+
if emit_order is None:
|
|
412
|
+
return
|
|
413
|
+
with self._emit_condition:
|
|
414
|
+
while emit_order != self._active_emit_order:
|
|
415
|
+
self._emit_condition.wait()
|
|
416
|
+
try:
|
|
417
|
+
for stream, payload in passthrough:
|
|
418
|
+
write_binary_chunk(stream, payload)
|
|
419
|
+
finally:
|
|
420
|
+
with self._emit_condition:
|
|
421
|
+
self._active_emit_order += 1
|
|
422
|
+
self._emit_condition.notify_all()
|
|
423
|
+
|
|
424
|
+
def text(self) -> tuple[str, str]:
|
|
425
|
+
with self._lock:
|
|
426
|
+
stdout_bytes = bytes(self.stdout)
|
|
427
|
+
stderr_bytes = bytes(self.stderr)
|
|
428
|
+
stdout = self._stdout_decoder.decode(stdout_bytes, final=True)
|
|
429
|
+
stderr = self._stderr_decoder.decode(stderr_bytes, final=True)
|
|
430
|
+
return stdout, stderr
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def run_command(argv: list[str], timeout_seconds: int, max_capture_bytes: int) -> CommandResult:
|
|
434
|
+
if not argv:
|
|
435
|
+
stderr = f"{TOOL_NAME}: command failed to start: no command provided\n"
|
|
436
|
+
output_bytes = len(stderr.encode("utf-8", "replace"))
|
|
437
|
+
return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
|
|
438
|
+
capture = BoundedCapture(max_capture_bytes)
|
|
439
|
+
|
|
440
|
+
def read_pipe(pipe: Any, stream_name: str) -> None:
|
|
441
|
+
try:
|
|
442
|
+
while True:
|
|
443
|
+
chunk = pipe.read(64 * 1024)
|
|
444
|
+
if not chunk:
|
|
445
|
+
break
|
|
446
|
+
capture.consume(stream_name, chunk)
|
|
447
|
+
finally:
|
|
448
|
+
try:
|
|
449
|
+
pipe.close()
|
|
450
|
+
except OSError:
|
|
451
|
+
pass
|
|
452
|
+
|
|
453
|
+
def terminate_processes(proc: subprocess.Popen[bytes], *, force: bool) -> None:
|
|
454
|
+
if os.name == "posix":
|
|
455
|
+
try:
|
|
456
|
+
os.killpg(proc.pid, signal.SIGKILL if force else signal.SIGTERM)
|
|
457
|
+
return
|
|
458
|
+
except ProcessLookupError:
|
|
459
|
+
return
|
|
460
|
+
except OSError:
|
|
461
|
+
pass
|
|
462
|
+
try:
|
|
463
|
+
if proc.poll() is not None:
|
|
464
|
+
return
|
|
465
|
+
if force:
|
|
466
|
+
proc.kill()
|
|
467
|
+
else:
|
|
468
|
+
proc.terminate()
|
|
469
|
+
except (OSError, ValueError):
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
def close_pipes(proc: subprocess.Popen[bytes]) -> None:
|
|
473
|
+
for pipe in (proc.stdout, proc.stderr):
|
|
474
|
+
if pipe is None:
|
|
475
|
+
continue
|
|
476
|
+
try:
|
|
477
|
+
pipe.close()
|
|
478
|
+
except OSError:
|
|
479
|
+
pass
|
|
480
|
+
|
|
481
|
+
def join_threads_until(threads: tuple[threading.Thread, threading.Thread], deadline: float) -> bool:
|
|
482
|
+
for thread in threads:
|
|
483
|
+
remaining = max(0.0, deadline - time.monotonic())
|
|
484
|
+
thread.join(timeout=remaining)
|
|
485
|
+
return all(not thread.is_alive() for thread in threads)
|
|
486
|
+
|
|
487
|
+
def terminate_and_close(proc: subprocess.Popen[bytes], threads: tuple[threading.Thread, threading.Thread]) -> None:
|
|
488
|
+
terminate_processes(proc, force=False)
|
|
489
|
+
try:
|
|
490
|
+
proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
491
|
+
except subprocess.TimeoutExpired:
|
|
492
|
+
pass
|
|
493
|
+
if join_threads_until(threads, time.monotonic() + PIPE_THREAD_CLOSE_GRACE_SECONDS):
|
|
494
|
+
return
|
|
495
|
+
terminate_processes(proc, force=True)
|
|
496
|
+
try:
|
|
497
|
+
proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
498
|
+
except subprocess.TimeoutExpired:
|
|
499
|
+
pass
|
|
500
|
+
close_pipes(proc)
|
|
501
|
+
for thread in threads:
|
|
502
|
+
thread.join(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
503
|
+
|
|
339
504
|
try:
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
505
|
+
started_at = time.monotonic()
|
|
506
|
+
proc = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=(os.name == "posix"))
|
|
507
|
+
assert proc.stdout is not None
|
|
508
|
+
assert proc.stderr is not None
|
|
509
|
+
stdout_thread = threading.Thread(target=read_pipe, args=(proc.stdout, "stdout"), daemon=True)
|
|
510
|
+
stderr_thread = threading.Thread(target=read_pipe, args=(proc.stderr, "stderr"), daemon=True)
|
|
511
|
+
reader_threads = (stdout_thread, stderr_thread)
|
|
512
|
+
stdout_thread.start()
|
|
513
|
+
stderr_thread.start()
|
|
514
|
+
timed_out = False
|
|
515
|
+
drain_timed_out = False
|
|
516
|
+
try:
|
|
517
|
+
returncode = proc.wait(timeout=timeout_seconds)
|
|
518
|
+
except subprocess.TimeoutExpired:
|
|
519
|
+
timed_out = True
|
|
520
|
+
returncode = TIMEOUT_EXIT_CODE
|
|
521
|
+
terminate_and_close(proc, reader_threads)
|
|
522
|
+
drain_deadline = time.monotonic() + TIMEOUT_PIPE_DRAIN_GRACE_SECONDS
|
|
523
|
+
if not join_threads_until(reader_threads, drain_deadline):
|
|
524
|
+
drain_timed_out = True
|
|
525
|
+
terminate_and_close(proc, reader_threads)
|
|
526
|
+
if timed_out:
|
|
527
|
+
capture.consume("stderr", f"\n[{TOOL_NAME}] command timed out after {timeout_seconds}s\n".encode("utf-8"))
|
|
528
|
+
elif drain_timed_out:
|
|
529
|
+
capture.consume("stderr", f"\n[{TOOL_NAME}] command pipe drain timed out after direct process exit\n".encode("utf-8"))
|
|
530
|
+
stdout_text, stderr_text = ("", "") if capture.capture_limited else capture.text()
|
|
531
|
+
return CommandResult(returncode, stdout_text, stderr_text, capture.output_bytes, capture.capture_limited, timed_out, drain_timed_out, capture.passthrough_emitted)
|
|
346
532
|
except OSError as exc:
|
|
347
|
-
|
|
533
|
+
stderr = f"{TOOL_NAME}: command failed to start: {exc.strerror or exc.__class__.__name__}\n"
|
|
534
|
+
encoded = stderr.encode("utf-8", "replace")
|
|
535
|
+
output_bytes = len(encoded)
|
|
536
|
+
return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
|
|
348
537
|
|
|
349
538
|
|
|
350
539
|
def emit_run_report(args: argparse.Namespace, payload: dict[str, Any]) -> None:
|
|
@@ -373,19 +562,21 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
373
562
|
max_line_chars = bounded_int(args.max_line_chars, DEFAULT_MAX_LINE_CHARS, 1, MAX_LINE_CHARS_LIMIT)
|
|
374
563
|
timeout_seconds = bounded_int(args.timeout_seconds, DEFAULT_TIMEOUT_SECONDS, 1, MAX_TIMEOUT_SECONDS)
|
|
375
564
|
filters, errors = load_filters(Path(args.config).expanduser())
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
565
|
+
result = run_command(command, timeout_seconds, max_capture)
|
|
566
|
+
rc = result.returncode
|
|
567
|
+
output = result.stdout_text + result.stderr_text
|
|
379
568
|
protected_nonzero = rc != 0 and is_protected_command(command)
|
|
380
569
|
report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
|
|
381
|
-
if timed_out:
|
|
570
|
+
if result.timed_out:
|
|
382
571
|
report["reason"] = "timeout"
|
|
572
|
+
elif result.drain_timed_out:
|
|
573
|
+
report["reason"] = "pipe-drain-timeout"
|
|
383
574
|
elif errors:
|
|
384
575
|
report["reason"] = "invalid-config"
|
|
385
576
|
report["errors"] = errors[:10]
|
|
386
|
-
elif
|
|
577
|
+
elif result.capture_limited:
|
|
387
578
|
report["reason"] = "capture-limit"
|
|
388
|
-
report["output_bytes"] = output_bytes
|
|
579
|
+
report["output_bytes"] = result.output_bytes
|
|
389
580
|
report["max_capture_bytes"] = max_capture
|
|
390
581
|
else:
|
|
391
582
|
matched = next((flt for flt in filters if filter_matches(flt, command)), None)
|
|
@@ -413,12 +604,12 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
413
604
|
report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": len(lines), "output_lines": len(filtered)})
|
|
414
605
|
emit_run_report(args, report)
|
|
415
606
|
return rc
|
|
416
|
-
|
|
417
|
-
|
|
607
|
+
if not result.passthrough_emitted:
|
|
608
|
+
sys.stdout.write(result.stdout_text)
|
|
609
|
+
sys.stderr.write(result.stderr_text)
|
|
418
610
|
emit_run_report(args, report)
|
|
419
611
|
return rc
|
|
420
612
|
|
|
421
|
-
|
|
422
613
|
def build_parser() -> argparse.ArgumentParser:
|
|
423
614
|
parser = argparse.ArgumentParser(prog=TOOL_NAME, description="Validate and apply bounded declarative command-output filters. Filtered mode applies line rules to combined stdout+stderr and writes the filtered result to stdout; passthrough mode preserves stdout/stderr streams.")
|
|
424
615
|
sub = parser.add_subparsers(dest="command_name", required=True)
|
|
@@ -11,11 +11,17 @@ import json
|
|
|
11
11
|
import os
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
import subprocess
|
|
14
|
+
import stat
|
|
14
15
|
import sys
|
|
15
16
|
from typing import NoReturn
|
|
16
17
|
|
|
17
18
|
COMMAND_NAME = "context-guard"
|
|
18
19
|
PACKAGE_NAME = "@ictechgy/context-guard"
|
|
20
|
+
MAX_VERSION_METADATA_BYTES = 64 * 1024
|
|
21
|
+
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
22
|
+
"tmp": Path("/private/tmp"),
|
|
23
|
+
"var": Path("/private/var"),
|
|
24
|
+
}
|
|
19
25
|
|
|
20
26
|
HELPER_SUBCOMMANDS: dict[str, tuple[str, ...]] = {
|
|
21
27
|
"setup": ("context-guard-setup",),
|
|
@@ -50,16 +56,182 @@ def _script_dir() -> Path:
|
|
|
50
56
|
|
|
51
57
|
def _candidate_roots() -> list[Path]:
|
|
52
58
|
script_dir = _script_dir()
|
|
53
|
-
roots = [script_dir.parent, script_dir.parent.parent
|
|
59
|
+
roots = [script_dir.parent, script_dir.parent.parent]
|
|
54
60
|
# When run from context-guard-kit in a checkout, the repo root is one level up.
|
|
55
61
|
if script_dir.name == "context-guard-kit":
|
|
56
62
|
roots.insert(0, script_dir.parent)
|
|
57
63
|
return list(dict.fromkeys(roots))
|
|
58
64
|
|
|
59
65
|
|
|
66
|
+
def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
|
|
67
|
+
target = Path(raw_target)
|
|
68
|
+
if target.is_absolute():
|
|
69
|
+
return Path(os.path.normpath(str(target)))
|
|
70
|
+
return Path(os.path.normpath(str(anchor / target)))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _normalize_allowed_first_absolute_symlink(path: Path) -> Path:
|
|
74
|
+
if not path.is_absolute():
|
|
75
|
+
return path
|
|
76
|
+
parts = path.parts
|
|
77
|
+
if len(parts) < 2:
|
|
78
|
+
return path
|
|
79
|
+
expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(parts[1])
|
|
80
|
+
if expected is None:
|
|
81
|
+
return path
|
|
82
|
+
first = Path(path.anchor) / parts[1]
|
|
83
|
+
try:
|
|
84
|
+
if first.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(first)) == expected:
|
|
85
|
+
return expected.joinpath(*parts[2:])
|
|
86
|
+
except OSError:
|
|
87
|
+
return path
|
|
88
|
+
return path
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _metadata_no_follow_supported() -> bool:
|
|
92
|
+
return (
|
|
93
|
+
hasattr(os, "O_NOFOLLOW")
|
|
94
|
+
and os.open in getattr(os, "supports_dir_fd", set())
|
|
95
|
+
and os.stat in getattr(os, "supports_dir_fd", set())
|
|
96
|
+
and os.stat in getattr(os, "supports_follow_symlinks", set())
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _directory_open_flags(*, follow_final: bool = False) -> int:
|
|
101
|
+
flags = os.O_RDONLY
|
|
102
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
103
|
+
flags |= os.O_CLOEXEC
|
|
104
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
105
|
+
flags |= os.O_DIRECTORY
|
|
106
|
+
if not follow_final:
|
|
107
|
+
flags |= os.O_NOFOLLOW
|
|
108
|
+
return flags
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _metadata_file_open_flags() -> int:
|
|
112
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
113
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
114
|
+
flags |= os.O_CLOEXEC
|
|
115
|
+
if hasattr(os, "O_NONBLOCK"):
|
|
116
|
+
flags |= os.O_NONBLOCK
|
|
117
|
+
if hasattr(os, "O_NOCTTY"):
|
|
118
|
+
flags |= os.O_NOCTTY
|
|
119
|
+
return flags
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _leaf_name(path: Path) -> str | None:
|
|
123
|
+
name = path.name
|
|
124
|
+
if name in {"", ".", ".."}:
|
|
125
|
+
return None
|
|
126
|
+
return name
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _open_metadata_parent_no_follow(path: Path) -> int | None:
|
|
130
|
+
if not _metadata_no_follow_supported():
|
|
131
|
+
return None
|
|
132
|
+
path = _normalize_allowed_first_absolute_symlink(path)
|
|
133
|
+
try:
|
|
134
|
+
if path.is_absolute():
|
|
135
|
+
current_fd = os.open(path.anchor or os.sep, _directory_open_flags(follow_final=True))
|
|
136
|
+
parts = path.parts[1:-1]
|
|
137
|
+
else:
|
|
138
|
+
current_fd = os.open(".", _directory_open_flags(follow_final=True))
|
|
139
|
+
parts = path.parts[:-1]
|
|
140
|
+
except OSError:
|
|
141
|
+
return None
|
|
142
|
+
try:
|
|
143
|
+
for part in parts:
|
|
144
|
+
if part in {"", "."}:
|
|
145
|
+
continue
|
|
146
|
+
if part == "..":
|
|
147
|
+
return None
|
|
148
|
+
next_fd = -1
|
|
149
|
+
try:
|
|
150
|
+
next_fd = os.open(part, _directory_open_flags(), dir_fd=current_fd)
|
|
151
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
152
|
+
try:
|
|
153
|
+
os.close(next_fd)
|
|
154
|
+
except OSError:
|
|
155
|
+
pass
|
|
156
|
+
next_fd = -1
|
|
157
|
+
return None
|
|
158
|
+
except OSError:
|
|
159
|
+
if next_fd >= 0:
|
|
160
|
+
try:
|
|
161
|
+
os.close(next_fd)
|
|
162
|
+
except OSError:
|
|
163
|
+
pass
|
|
164
|
+
try:
|
|
165
|
+
os.close(current_fd)
|
|
166
|
+
except OSError:
|
|
167
|
+
pass
|
|
168
|
+
current_fd = -1
|
|
169
|
+
return None
|
|
170
|
+
try:
|
|
171
|
+
os.close(current_fd)
|
|
172
|
+
except OSError:
|
|
173
|
+
pass
|
|
174
|
+
current_fd = next_fd
|
|
175
|
+
owned_fd = current_fd
|
|
176
|
+
current_fd = -1
|
|
177
|
+
return owned_fd
|
|
178
|
+
finally:
|
|
179
|
+
if current_fd >= 0:
|
|
180
|
+
try:
|
|
181
|
+
os.close(current_fd)
|
|
182
|
+
except OSError:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _read_metadata_text(path: Path) -> str | None:
|
|
187
|
+
path = _normalize_allowed_first_absolute_symlink(path)
|
|
188
|
+
parent_fd = _open_metadata_parent_no_follow(path)
|
|
189
|
+
if parent_fd is None:
|
|
190
|
+
return None
|
|
191
|
+
fd = -1
|
|
192
|
+
data = b""
|
|
193
|
+
try:
|
|
194
|
+
leaf = _leaf_name(path)
|
|
195
|
+
if leaf is None:
|
|
196
|
+
return None
|
|
197
|
+
pre_open = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
|
|
198
|
+
if not stat.S_ISREG(pre_open.st_mode):
|
|
199
|
+
return None
|
|
200
|
+
if pre_open.st_size > MAX_VERSION_METADATA_BYTES:
|
|
201
|
+
return None
|
|
202
|
+
fd = os.open(leaf, _metadata_file_open_flags(), dir_fd=parent_fd)
|
|
203
|
+
opened = os.fstat(fd)
|
|
204
|
+
if not stat.S_ISREG(opened.st_mode):
|
|
205
|
+
return None
|
|
206
|
+
if opened.st_size > MAX_VERSION_METADATA_BYTES:
|
|
207
|
+
return None
|
|
208
|
+
data = os.read(fd, MAX_VERSION_METADATA_BYTES + 1)
|
|
209
|
+
except OSError:
|
|
210
|
+
return None
|
|
211
|
+
finally:
|
|
212
|
+
if fd >= 0:
|
|
213
|
+
try:
|
|
214
|
+
os.close(fd)
|
|
215
|
+
except OSError:
|
|
216
|
+
pass
|
|
217
|
+
try:
|
|
218
|
+
os.close(parent_fd)
|
|
219
|
+
except OSError:
|
|
220
|
+
pass
|
|
221
|
+
if len(data) > MAX_VERSION_METADATA_BYTES:
|
|
222
|
+
return None
|
|
223
|
+
try:
|
|
224
|
+
return data.decode("utf-8")
|
|
225
|
+
except UnicodeDecodeError:
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
|
|
60
229
|
def _load_json(path: Path) -> dict[str, object] | None:
|
|
230
|
+
text = _read_metadata_text(path)
|
|
231
|
+
if text is None:
|
|
232
|
+
return None
|
|
61
233
|
try:
|
|
62
|
-
data = json.loads(
|
|
234
|
+
data = json.loads(text)
|
|
63
235
|
except (OSError, json.JSONDecodeError):
|
|
64
236
|
return None
|
|
65
237
|
return data if isinstance(data, dict) else None
|
|
@@ -24,6 +24,7 @@ import shlex
|
|
|
24
24
|
import stat
|
|
25
25
|
import subprocess
|
|
26
26
|
import sys
|
|
27
|
+
import threading
|
|
27
28
|
import time
|
|
28
29
|
from dataclasses import dataclass
|
|
29
30
|
from typing import Any
|
|
@@ -183,23 +184,43 @@ class FallbackLineSanitizer:
|
|
|
183
184
|
return line, bool(count)
|
|
184
185
|
|
|
185
186
|
|
|
187
|
+
# Process-static cache: CLI invocations should not re-import the sanitizer for
|
|
188
|
+
# every file, while each sanitize_text() call still gets a fresh stateful
|
|
189
|
+
# sanitizer instance.
|
|
190
|
+
_LINE_SANITIZER_FACTORY_CACHE: Any | None = None
|
|
191
|
+
_LINE_SANITIZER_FACTORY_LOCK = threading.Lock()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def load_line_sanitizer_factory() -> Any:
|
|
195
|
+
global _LINE_SANITIZER_FACTORY_CACHE
|
|
196
|
+
if _LINE_SANITIZER_FACTORY_CACHE is not None:
|
|
197
|
+
return _LINE_SANITIZER_FACTORY_CACHE
|
|
198
|
+
with _LINE_SANITIZER_FACTORY_LOCK:
|
|
199
|
+
if _LINE_SANITIZER_FACTORY_CACHE is not None:
|
|
200
|
+
return _LINE_SANITIZER_FACTORY_CACHE
|
|
201
|
+
script_dir = Path(__file__).resolve().parent
|
|
202
|
+
for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
|
|
203
|
+
candidate = script_dir / name
|
|
204
|
+
if not candidate.exists():
|
|
205
|
+
continue
|
|
206
|
+
try:
|
|
207
|
+
loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
|
|
208
|
+
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
209
|
+
if spec is None:
|
|
210
|
+
raise RuntimeError("import spec unavailable")
|
|
211
|
+
module = importlib.util.module_from_spec(spec)
|
|
212
|
+
loader.exec_module(module)
|
|
213
|
+
_LINE_SANITIZER_FACTORY_CACHE = module.LineSanitizer
|
|
214
|
+
return _LINE_SANITIZER_FACTORY_CACHE
|
|
215
|
+
except Exception as exc:
|
|
216
|
+
raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
|
|
217
|
+
_LINE_SANITIZER_FACTORY_CACHE = FallbackLineSanitizer
|
|
218
|
+
return _LINE_SANITIZER_FACTORY_CACHE
|
|
219
|
+
|
|
220
|
+
|
|
186
221
|
def load_line_sanitizer(show_paths: bool = False) -> object:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
candidate = script_dir / name
|
|
190
|
-
if not candidate.exists():
|
|
191
|
-
continue
|
|
192
|
-
try:
|
|
193
|
-
loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
|
|
194
|
-
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
195
|
-
if spec is None:
|
|
196
|
-
raise RuntimeError("import spec unavailable")
|
|
197
|
-
module = importlib.util.module_from_spec(spec)
|
|
198
|
-
loader.exec_module(module)
|
|
199
|
-
return module.LineSanitizer(show_paths=show_paths)
|
|
200
|
-
except Exception as exc:
|
|
201
|
-
raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
|
|
202
|
-
return FallbackLineSanitizer(show_paths=show_paths)
|
|
222
|
+
sanitizer_factory = load_line_sanitizer_factory()
|
|
223
|
+
return sanitizer_factory(show_paths=show_paths)
|
|
203
224
|
|
|
204
225
|
|
|
205
226
|
def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
|
|
@@ -464,6 +485,21 @@ def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
|
|
|
464
485
|
raise
|
|
465
486
|
|
|
466
487
|
|
|
488
|
+
def file_open_flags() -> int:
|
|
489
|
+
flags = os.O_RDONLY
|
|
490
|
+
for name in ("O_NOFOLLOW", "O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
|
|
491
|
+
flags |= getattr(os, name, 0)
|
|
492
|
+
return flags
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def stat_leaf_no_follow(name: str, *, dir_fd: int) -> os.stat_result | None:
|
|
496
|
+
supports_dir_fd = os.stat in getattr(os, "supports_dir_fd", set())
|
|
497
|
+
supports_no_follow = os.stat in getattr(os, "supports_follow_symlinks", set())
|
|
498
|
+
if not supports_dir_fd or not supports_no_follow:
|
|
499
|
+
return None
|
|
500
|
+
return os.stat(name, dir_fd=dir_fd, follow_symlinks=False)
|
|
501
|
+
|
|
502
|
+
|
|
467
503
|
def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
|
|
468
504
|
current_fd: int | None = None
|
|
469
505
|
try:
|
|
@@ -484,11 +520,20 @@ def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
|
|
|
484
520
|
os.close(current_fd)
|
|
485
521
|
current_fd = next_fd
|
|
486
522
|
continue
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
523
|
+
try:
|
|
524
|
+
pre_st = stat_leaf_no_follow(part, dir_fd=current_fd)
|
|
525
|
+
except FileNotFoundError:
|
|
526
|
+
return None, "missing"
|
|
527
|
+
except NotADirectoryError:
|
|
528
|
+
return None, "missing"
|
|
529
|
+
except OSError:
|
|
530
|
+
return None, "unsafe_path"
|
|
531
|
+
if pre_st is not None:
|
|
532
|
+
if stat.S_ISLNK(pre_st.st_mode):
|
|
533
|
+
return None, "unsafe_path"
|
|
534
|
+
if not stat.S_ISREG(pre_st.st_mode):
|
|
535
|
+
return None, "empty_source"
|
|
536
|
+
flags = file_open_flags()
|
|
492
537
|
file_fd = -1
|
|
493
538
|
try:
|
|
494
539
|
file_fd = os.open(part, flags, dir_fd=current_fd)
|