@ictechgy/context-guard 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,14 @@ All notable changes for the ContextGuard plugin are documented here.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.4.8] - 2026-06-11
8
+
9
+ - Hardened experimental registry config writes with same-directory atomic replace so failed writes or symlink swaps do not truncate or redirect the live config.
10
+ - Hardened dispatcher version metadata reads with dir-fd no-follow parent traversal to close parent symlink races.
11
+ - Preserved bounded filter passthrough ordering without holding the capture state lock during emission.
12
+ - Serialized context pack sanitizer factory first load and added focused race/failure regression coverage.
13
+ - Kept plugin bin mirrors synchronized for the updated helper hardening.
14
+
7
15
  ## [0.4.7] - 2026-06-11
8
16
 
9
17
  - Added default-off experimental opt-in registry surfaces for future token-reduction lanes, preserving project-local intent without enabling runtime behavior.
@@ -8,13 +8,18 @@ risk hiding evidence.
8
8
  from __future__ import annotations
9
9
 
10
10
  import argparse
11
+ import codecs
11
12
  from dataclasses import dataclass
12
13
  import json
14
+ import os
13
15
  from pathlib import Path
14
16
  import re
15
17
  import shlex
18
+ import signal
16
19
  import subprocess
17
20
  import sys
21
+ import threading
22
+ import time
18
23
  from typing import Any, Iterable
19
24
 
20
25
  SCHEMA_VERSION = "contextguard.filter-dsl.v1"
@@ -33,6 +38,8 @@ MAX_EMIT_LINES = 5_000
33
38
  DEFAULT_TIMEOUT_SECONDS = 600
34
39
  MAX_TIMEOUT_SECONDS = 86_400
35
40
  TIMEOUT_EXIT_CODE = 124
41
+ TIMEOUT_PIPE_DRAIN_GRACE_SECONDS = 5.0
42
+ PIPE_THREAD_CLOSE_GRACE_SECONDS = 1.0
36
43
  FILTER_KEYS = {"id", "match", "passthrough_on_exit", "include_regex", "exclude_regex", "head_lines", "tail_lines", "max_lines"}
37
44
  MATCH_KEYS = {"argv_prefix", "argv_regex"}
38
45
  PROTECTED_BASENAMES = {
@@ -329,22 +336,204 @@ def print_validation(valid: bool, errors: list[str], count: int, as_json: bool)
329
336
  print(f"- {error}", file=sys.stderr)
330
337
 
331
338
 
332
- def timeout_text(value: str | bytes | None) -> str:
333
- if isinstance(value, str):
334
- return value
335
- return (value or b"").decode("utf-8", "replace")
339
+ @dataclass
340
+ class CommandResult:
341
+ returncode: int
342
+ stdout_text: str
343
+ stderr_text: str
344
+ output_bytes: int
345
+ capture_limited: bool
346
+ timed_out: bool
347
+ drain_timed_out: bool
348
+ passthrough_emitted: bool
336
349
 
337
350
 
338
- def run_command(argv: list[str], timeout_seconds: int) -> tuple[int, str, str, bool]:
351
+ def write_binary_chunk(stream: Any, chunk: bytes) -> None:
352
+ if not chunk:
353
+ return
354
+ stream.flush()
355
+ binary = getattr(stream, "buffer", None)
356
+ if binary is not None:
357
+ binary.write(chunk)
358
+ else:
359
+ stream.write(chunk.decode("utf-8", "replace"))
360
+ stream.flush()
361
+
362
+
363
+ class BoundedCapture:
364
+ def __init__(self, max_capture_bytes: int) -> None:
365
+ self.max_capture_bytes = max_capture_bytes
366
+ self.stdout = bytearray()
367
+ self.stderr = bytearray()
368
+ self.output_bytes = 0
369
+ self.capture_limited = False
370
+ self.passthrough_emitted = False
371
+ self._lock = threading.Lock()
372
+ self._emit_condition = threading.Condition()
373
+ self._next_emit_order = 0
374
+ self._active_emit_order = 0
375
+ self._stdout_decoder = codecs.getincrementaldecoder("utf-8")("replace")
376
+ self._stderr_decoder = codecs.getincrementaldecoder("utf-8")("replace")
377
+
378
+ def consume(self, stream_name: str, chunk: bytes) -> None:
379
+ if not chunk:
380
+ return
381
+ passthrough: list[tuple[Any, bytes]] = []
382
+ emit_order: int | None = None
383
+ with self._lock:
384
+ self.output_bytes += len(chunk)
385
+ if self.capture_limited:
386
+ passthrough.append((sys.stdout if stream_name == "stdout" else sys.stderr, chunk))
387
+ else:
388
+ stored_total = len(self.stdout) + len(self.stderr)
389
+ remaining = self.max_capture_bytes - stored_total
390
+ target = self.stdout if stream_name == "stdout" else self.stderr
391
+ if len(chunk) <= remaining:
392
+ target.extend(chunk)
393
+ return
394
+ if remaining > 0:
395
+ target.extend(chunk[:remaining])
396
+ overflow = chunk[remaining:]
397
+ else:
398
+ overflow = chunk
399
+ self.capture_limited = True
400
+ self.passthrough_emitted = True
401
+ passthrough.extend(
402
+ [
403
+ (sys.stdout, bytes(self.stdout)),
404
+ (sys.stderr, bytes(self.stderr)),
405
+ (sys.stdout if stream_name == "stdout" else sys.stderr, overflow),
406
+ ]
407
+ )
408
+ if passthrough:
409
+ emit_order = self._next_emit_order
410
+ self._next_emit_order += 1
411
+ if emit_order is None:
412
+ return
413
+ with self._emit_condition:
414
+ while emit_order != self._active_emit_order:
415
+ self._emit_condition.wait()
416
+ try:
417
+ for stream, payload in passthrough:
418
+ write_binary_chunk(stream, payload)
419
+ finally:
420
+ with self._emit_condition:
421
+ self._active_emit_order += 1
422
+ self._emit_condition.notify_all()
423
+
424
+ def text(self) -> tuple[str, str]:
425
+ with self._lock:
426
+ stdout_bytes = bytes(self.stdout)
427
+ stderr_bytes = bytes(self.stderr)
428
+ stdout = self._stdout_decoder.decode(stdout_bytes, final=True)
429
+ stderr = self._stderr_decoder.decode(stderr_bytes, final=True)
430
+ return stdout, stderr
431
+
432
+
433
+ def run_command(argv: list[str], timeout_seconds: int, max_capture_bytes: int) -> CommandResult:
434
+ if not argv:
435
+ stderr = f"{TOOL_NAME}: command failed to start: no command provided\n"
436
+ output_bytes = len(stderr.encode("utf-8", "replace"))
437
+ return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
438
+ capture = BoundedCapture(max_capture_bytes)
439
+
440
+ def read_pipe(pipe: Any, stream_name: str) -> None:
441
+ try:
442
+ while True:
443
+ chunk = pipe.read(64 * 1024)
444
+ if not chunk:
445
+ break
446
+ capture.consume(stream_name, chunk)
447
+ finally:
448
+ try:
449
+ pipe.close()
450
+ except OSError:
451
+ pass
452
+
453
+ def terminate_processes(proc: subprocess.Popen[bytes], *, force: bool) -> None:
454
+ if os.name == "posix":
455
+ try:
456
+ os.killpg(proc.pid, signal.SIGKILL if force else signal.SIGTERM)
457
+ return
458
+ except ProcessLookupError:
459
+ return
460
+ except OSError:
461
+ pass
462
+ try:
463
+ if proc.poll() is not None:
464
+ return
465
+ if force:
466
+ proc.kill()
467
+ else:
468
+ proc.terminate()
469
+ except (OSError, ValueError):
470
+ pass
471
+
472
+ def close_pipes(proc: subprocess.Popen[bytes]) -> None:
473
+ for pipe in (proc.stdout, proc.stderr):
474
+ if pipe is None:
475
+ continue
476
+ try:
477
+ pipe.close()
478
+ except OSError:
479
+ pass
480
+
481
+ def join_threads_until(threads: tuple[threading.Thread, threading.Thread], deadline: float) -> bool:
482
+ for thread in threads:
483
+ remaining = max(0.0, deadline - time.monotonic())
484
+ thread.join(timeout=remaining)
485
+ return all(not thread.is_alive() for thread in threads)
486
+
487
+ def terminate_and_close(proc: subprocess.Popen[bytes], threads: tuple[threading.Thread, threading.Thread]) -> None:
488
+ terminate_processes(proc, force=False)
489
+ try:
490
+ proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
491
+ except subprocess.TimeoutExpired:
492
+ pass
493
+ if join_threads_until(threads, time.monotonic() + PIPE_THREAD_CLOSE_GRACE_SECONDS):
494
+ return
495
+ terminate_processes(proc, force=True)
496
+ try:
497
+ proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
498
+ except subprocess.TimeoutExpired:
499
+ pass
500
+ close_pipes(proc)
501
+ for thread in threads:
502
+ thread.join(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
503
+
339
504
  try:
340
- proc = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, errors="replace", timeout=timeout_seconds)
341
- return proc.returncode, proc.stdout or "", proc.stderr or "", False
342
- except subprocess.TimeoutExpired as exc:
343
- stdout = timeout_text(exc.stdout)
344
- stderr = timeout_text(exc.stderr) + f"\n[{TOOL_NAME}] command timed out after {timeout_seconds}s\n"
345
- return TIMEOUT_EXIT_CODE, stdout, stderr, True
505
+ started_at = time.monotonic()
506
+ proc = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=(os.name == "posix"))
507
+ assert proc.stdout is not None
508
+ assert proc.stderr is not None
509
+ stdout_thread = threading.Thread(target=read_pipe, args=(proc.stdout, "stdout"), daemon=True)
510
+ stderr_thread = threading.Thread(target=read_pipe, args=(proc.stderr, "stderr"), daemon=True)
511
+ reader_threads = (stdout_thread, stderr_thread)
512
+ stdout_thread.start()
513
+ stderr_thread.start()
514
+ timed_out = False
515
+ drain_timed_out = False
516
+ try:
517
+ returncode = proc.wait(timeout=timeout_seconds)
518
+ except subprocess.TimeoutExpired:
519
+ timed_out = True
520
+ returncode = TIMEOUT_EXIT_CODE
521
+ terminate_and_close(proc, reader_threads)
522
+ drain_deadline = time.monotonic() + TIMEOUT_PIPE_DRAIN_GRACE_SECONDS
523
+ if not join_threads_until(reader_threads, drain_deadline):
524
+ drain_timed_out = True
525
+ terminate_and_close(proc, reader_threads)
526
+ if timed_out:
527
+ capture.consume("stderr", f"\n[{TOOL_NAME}] command timed out after {timeout_seconds}s\n".encode("utf-8"))
528
+ elif drain_timed_out:
529
+ capture.consume("stderr", f"\n[{TOOL_NAME}] command pipe drain timed out after direct process exit\n".encode("utf-8"))
530
+ stdout_text, stderr_text = ("", "") if capture.capture_limited else capture.text()
531
+ return CommandResult(returncode, stdout_text, stderr_text, capture.output_bytes, capture.capture_limited, timed_out, drain_timed_out, capture.passthrough_emitted)
346
532
  except OSError as exc:
347
- return 127, "", f"{TOOL_NAME}: command failed to start: {exc.strerror or exc.__class__.__name__}\n", False
533
+ stderr = f"{TOOL_NAME}: command failed to start: {exc.strerror or exc.__class__.__name__}\n"
534
+ encoded = stderr.encode("utf-8", "replace")
535
+ output_bytes = len(encoded)
536
+ return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
348
537
 
349
538
 
350
539
  def emit_run_report(args: argparse.Namespace, payload: dict[str, Any]) -> None:
@@ -373,19 +562,21 @@ def cmd_run(args: argparse.Namespace) -> int:
373
562
  max_line_chars = bounded_int(args.max_line_chars, DEFAULT_MAX_LINE_CHARS, 1, MAX_LINE_CHARS_LIMIT)
374
563
  timeout_seconds = bounded_int(args.timeout_seconds, DEFAULT_TIMEOUT_SECONDS, 1, MAX_TIMEOUT_SECONDS)
375
564
  filters, errors = load_filters(Path(args.config).expanduser())
376
- rc, stdout_text, stderr_text, timed_out = run_command(command, timeout_seconds)
377
- output = stdout_text + stderr_text
378
- output_bytes = len(output.encode("utf-8", "replace"))
565
+ result = run_command(command, timeout_seconds, max_capture)
566
+ rc = result.returncode
567
+ output = result.stdout_text + result.stderr_text
379
568
  protected_nonzero = rc != 0 and is_protected_command(command)
380
569
  report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
381
- if timed_out:
570
+ if result.timed_out:
382
571
  report["reason"] = "timeout"
572
+ elif result.drain_timed_out:
573
+ report["reason"] = "pipe-drain-timeout"
383
574
  elif errors:
384
575
  report["reason"] = "invalid-config"
385
576
  report["errors"] = errors[:10]
386
- elif output_bytes > max_capture:
577
+ elif result.capture_limited:
387
578
  report["reason"] = "capture-limit"
388
- report["output_bytes"] = output_bytes
579
+ report["output_bytes"] = result.output_bytes
389
580
  report["max_capture_bytes"] = max_capture
390
581
  else:
391
582
  matched = next((flt for flt in filters if filter_matches(flt, command)), None)
@@ -413,12 +604,12 @@ def cmd_run(args: argparse.Namespace) -> int:
413
604
  report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": len(lines), "output_lines": len(filtered)})
414
605
  emit_run_report(args, report)
415
606
  return rc
416
- sys.stdout.write(stdout_text)
417
- sys.stderr.write(stderr_text)
607
+ if not result.passthrough_emitted:
608
+ sys.stdout.write(result.stdout_text)
609
+ sys.stderr.write(result.stderr_text)
418
610
  emit_run_report(args, report)
419
611
  return rc
420
612
 
421
-
422
613
  def build_parser() -> argparse.ArgumentParser:
423
614
  parser = argparse.ArgumentParser(prog=TOOL_NAME, description="Validate and apply bounded declarative command-output filters. Filtered mode applies line rules to combined stdout+stderr and writes the filtered result to stdout; passthrough mode preserves stdout/stderr streams.")
424
615
  sub = parser.add_subparsers(dest="command_name", required=True)
@@ -11,11 +11,17 @@ import json
11
11
  import os
12
12
  from pathlib import Path
13
13
  import subprocess
14
+ import stat
14
15
  import sys
15
16
  from typing import NoReturn
16
17
 
17
18
  COMMAND_NAME = "context-guard"
18
19
  PACKAGE_NAME = "@ictechgy/context-guard"
20
+ MAX_VERSION_METADATA_BYTES = 64 * 1024
21
+ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
22
+ "tmp": Path("/private/tmp"),
23
+ "var": Path("/private/var"),
24
+ }
19
25
 
20
26
  HELPER_SUBCOMMANDS: dict[str, tuple[str, ...]] = {
21
27
  "setup": ("context-guard-setup",),
@@ -50,16 +56,182 @@ def _script_dir() -> Path:
50
56
 
51
57
  def _candidate_roots() -> list[Path]:
52
58
  script_dir = _script_dir()
53
- roots = [script_dir.parent, script_dir.parent.parent, Path.cwd()]
59
+ roots = [script_dir.parent, script_dir.parent.parent]
54
60
  # When run from context-guard-kit in a checkout, the repo root is one level up.
55
61
  if script_dir.name == "context-guard-kit":
56
62
  roots.insert(0, script_dir.parent)
57
63
  return list(dict.fromkeys(roots))
58
64
 
59
65
 
66
+ def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
67
+ target = Path(raw_target)
68
+ if target.is_absolute():
69
+ return Path(os.path.normpath(str(target)))
70
+ return Path(os.path.normpath(str(anchor / target)))
71
+
72
+
73
+ def _normalize_allowed_first_absolute_symlink(path: Path) -> Path:
74
+ if not path.is_absolute():
75
+ return path
76
+ parts = path.parts
77
+ if len(parts) < 2:
78
+ return path
79
+ expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(parts[1])
80
+ if expected is None:
81
+ return path
82
+ first = Path(path.anchor) / parts[1]
83
+ try:
84
+ if first.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(first)) == expected:
85
+ return expected.joinpath(*parts[2:])
86
+ except OSError:
87
+ return path
88
+ return path
89
+
90
+
91
+ def _metadata_no_follow_supported() -> bool:
92
+ return (
93
+ hasattr(os, "O_NOFOLLOW")
94
+ and os.open in getattr(os, "supports_dir_fd", set())
95
+ and os.stat in getattr(os, "supports_dir_fd", set())
96
+ and os.stat in getattr(os, "supports_follow_symlinks", set())
97
+ )
98
+
99
+
100
+ def _directory_open_flags(*, follow_final: bool = False) -> int:
101
+ flags = os.O_RDONLY
102
+ if hasattr(os, "O_CLOEXEC"):
103
+ flags |= os.O_CLOEXEC
104
+ if hasattr(os, "O_DIRECTORY"):
105
+ flags |= os.O_DIRECTORY
106
+ if not follow_final:
107
+ flags |= os.O_NOFOLLOW
108
+ return flags
109
+
110
+
111
+ def _metadata_file_open_flags() -> int:
112
+ flags = os.O_RDONLY | os.O_NOFOLLOW
113
+ if hasattr(os, "O_CLOEXEC"):
114
+ flags |= os.O_CLOEXEC
115
+ if hasattr(os, "O_NONBLOCK"):
116
+ flags |= os.O_NONBLOCK
117
+ if hasattr(os, "O_NOCTTY"):
118
+ flags |= os.O_NOCTTY
119
+ return flags
120
+
121
+
122
+ def _leaf_name(path: Path) -> str | None:
123
+ name = path.name
124
+ if name in {"", ".", ".."}:
125
+ return None
126
+ return name
127
+
128
+
129
+ def _open_metadata_parent_no_follow(path: Path) -> int | None:
130
+ if not _metadata_no_follow_supported():
131
+ return None
132
+ path = _normalize_allowed_first_absolute_symlink(path)
133
+ try:
134
+ if path.is_absolute():
135
+ current_fd = os.open(path.anchor or os.sep, _directory_open_flags(follow_final=True))
136
+ parts = path.parts[1:-1]
137
+ else:
138
+ current_fd = os.open(".", _directory_open_flags(follow_final=True))
139
+ parts = path.parts[:-1]
140
+ except OSError:
141
+ return None
142
+ try:
143
+ for part in parts:
144
+ if part in {"", "."}:
145
+ continue
146
+ if part == "..":
147
+ return None
148
+ next_fd = -1
149
+ try:
150
+ next_fd = os.open(part, _directory_open_flags(), dir_fd=current_fd)
151
+ if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
152
+ try:
153
+ os.close(next_fd)
154
+ except OSError:
155
+ pass
156
+ next_fd = -1
157
+ return None
158
+ except OSError:
159
+ if next_fd >= 0:
160
+ try:
161
+ os.close(next_fd)
162
+ except OSError:
163
+ pass
164
+ try:
165
+ os.close(current_fd)
166
+ except OSError:
167
+ pass
168
+ current_fd = -1
169
+ return None
170
+ try:
171
+ os.close(current_fd)
172
+ except OSError:
173
+ pass
174
+ current_fd = next_fd
175
+ owned_fd = current_fd
176
+ current_fd = -1
177
+ return owned_fd
178
+ finally:
179
+ if current_fd >= 0:
180
+ try:
181
+ os.close(current_fd)
182
+ except OSError:
183
+ pass
184
+
185
+
186
+ def _read_metadata_text(path: Path) -> str | None:
187
+ path = _normalize_allowed_first_absolute_symlink(path)
188
+ parent_fd = _open_metadata_parent_no_follow(path)
189
+ if parent_fd is None:
190
+ return None
191
+ fd = -1
192
+ data = b""
193
+ try:
194
+ leaf = _leaf_name(path)
195
+ if leaf is None:
196
+ return None
197
+ pre_open = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
198
+ if not stat.S_ISREG(pre_open.st_mode):
199
+ return None
200
+ if pre_open.st_size > MAX_VERSION_METADATA_BYTES:
201
+ return None
202
+ fd = os.open(leaf, _metadata_file_open_flags(), dir_fd=parent_fd)
203
+ opened = os.fstat(fd)
204
+ if not stat.S_ISREG(opened.st_mode):
205
+ return None
206
+ if opened.st_size > MAX_VERSION_METADATA_BYTES:
207
+ return None
208
+ data = os.read(fd, MAX_VERSION_METADATA_BYTES + 1)
209
+ except OSError:
210
+ return None
211
+ finally:
212
+ if fd >= 0:
213
+ try:
214
+ os.close(fd)
215
+ except OSError:
216
+ pass
217
+ try:
218
+ os.close(parent_fd)
219
+ except OSError:
220
+ pass
221
+ if len(data) > MAX_VERSION_METADATA_BYTES:
222
+ return None
223
+ try:
224
+ return data.decode("utf-8")
225
+ except UnicodeDecodeError:
226
+ return None
227
+
228
+
60
229
  def _load_json(path: Path) -> dict[str, object] | None:
230
+ text = _read_metadata_text(path)
231
+ if text is None:
232
+ return None
61
233
  try:
62
- data = json.loads(path.read_text(encoding="utf-8"))
234
+ data = json.loads(text)
63
235
  except (OSError, json.JSONDecodeError):
64
236
  return None
65
237
  return data if isinstance(data, dict) else None
@@ -24,6 +24,7 @@ import shlex
24
24
  import stat
25
25
  import subprocess
26
26
  import sys
27
+ import threading
27
28
  import time
28
29
  from dataclasses import dataclass
29
30
  from typing import Any
@@ -183,23 +184,43 @@ class FallbackLineSanitizer:
183
184
  return line, bool(count)
184
185
 
185
186
 
187
+ # Process-static cache: CLI invocations should not re-import the sanitizer for
188
+ # every file, while each sanitize_text() call still gets a fresh stateful
189
+ # sanitizer instance.
190
+ _LINE_SANITIZER_FACTORY_CACHE: Any | None = None
191
+ _LINE_SANITIZER_FACTORY_LOCK = threading.Lock()
192
+
193
+
194
+ def load_line_sanitizer_factory() -> Any:
195
+ global _LINE_SANITIZER_FACTORY_CACHE
196
+ if _LINE_SANITIZER_FACTORY_CACHE is not None:
197
+ return _LINE_SANITIZER_FACTORY_CACHE
198
+ with _LINE_SANITIZER_FACTORY_LOCK:
199
+ if _LINE_SANITIZER_FACTORY_CACHE is not None:
200
+ return _LINE_SANITIZER_FACTORY_CACHE
201
+ script_dir = Path(__file__).resolve().parent
202
+ for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
203
+ candidate = script_dir / name
204
+ if not candidate.exists():
205
+ continue
206
+ try:
207
+ loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
208
+ spec = importlib.util.spec_from_loader(loader.name, loader)
209
+ if spec is None:
210
+ raise RuntimeError("import spec unavailable")
211
+ module = importlib.util.module_from_spec(spec)
212
+ loader.exec_module(module)
213
+ _LINE_SANITIZER_FACTORY_CACHE = module.LineSanitizer
214
+ return _LINE_SANITIZER_FACTORY_CACHE
215
+ except Exception as exc:
216
+ raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
217
+ _LINE_SANITIZER_FACTORY_CACHE = FallbackLineSanitizer
218
+ return _LINE_SANITIZER_FACTORY_CACHE
219
+
220
+
186
221
  def load_line_sanitizer(show_paths: bool = False) -> object:
187
- script_dir = Path(__file__).resolve().parent
188
- for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
189
- candidate = script_dir / name
190
- if not candidate.exists():
191
- continue
192
- try:
193
- loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
194
- spec = importlib.util.spec_from_loader(loader.name, loader)
195
- if spec is None:
196
- raise RuntimeError("import spec unavailable")
197
- module = importlib.util.module_from_spec(spec)
198
- loader.exec_module(module)
199
- return module.LineSanitizer(show_paths=show_paths)
200
- except Exception as exc:
201
- raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
202
- return FallbackLineSanitizer(show_paths=show_paths)
222
+ sanitizer_factory = load_line_sanitizer_factory()
223
+ return sanitizer_factory(show_paths=show_paths)
203
224
 
204
225
 
205
226
  def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
@@ -464,6 +485,21 @@ def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
464
485
  raise
465
486
 
466
487
 
488
+ def file_open_flags() -> int:
489
+ flags = os.O_RDONLY
490
+ for name in ("O_NOFOLLOW", "O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
491
+ flags |= getattr(os, name, 0)
492
+ return flags
493
+
494
+
495
+ def stat_leaf_no_follow(name: str, *, dir_fd: int) -> os.stat_result | None:
496
+ supports_dir_fd = os.stat in getattr(os, "supports_dir_fd", set())
497
+ supports_no_follow = os.stat in getattr(os, "supports_follow_symlinks", set())
498
+ if not supports_dir_fd or not supports_no_follow:
499
+ return None
500
+ return os.stat(name, dir_fd=dir_fd, follow_symlinks=False)
501
+
502
+
467
503
  def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
468
504
  current_fd: int | None = None
469
505
  try:
@@ -484,11 +520,20 @@ def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
484
520
  os.close(current_fd)
485
521
  current_fd = next_fd
486
522
  continue
487
- flags = os.O_RDONLY
488
- if hasattr(os, "O_NOFOLLOW"):
489
- flags |= os.O_NOFOLLOW
490
- if hasattr(os, "O_CLOEXEC"):
491
- flags |= os.O_CLOEXEC
523
+ try:
524
+ pre_st = stat_leaf_no_follow(part, dir_fd=current_fd)
525
+ except FileNotFoundError:
526
+ return None, "missing"
527
+ except NotADirectoryError:
528
+ return None, "missing"
529
+ except OSError:
530
+ return None, "unsafe_path"
531
+ if pre_st is not None:
532
+ if stat.S_ISLNK(pre_st.st_mode):
533
+ return None, "unsafe_path"
534
+ if not stat.S_ISREG(pre_st.st_mode):
535
+ return None, "empty_source"
536
+ flags = file_open_flags()
492
537
  file_fd = -1
493
538
  try:
494
539
  file_fd = os.open(part, flags, dir_fd=current_fd)