@ictechgy/context-guard 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,17 +11,24 @@ import json
11
11
  import os
12
12
  from pathlib import Path
13
13
  import subprocess
14
+ import stat
14
15
  import sys
15
16
  from typing import NoReturn
16
17
 
17
18
  COMMAND_NAME = "context-guard"
18
19
  PACKAGE_NAME = "@ictechgy/context-guard"
20
+ MAX_VERSION_METADATA_BYTES = 64 * 1024
21
+ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
22
+ "tmp": Path("/private/tmp"),
23
+ "var": Path("/private/var"),
24
+ }
19
25
 
20
26
  HELPER_SUBCOMMANDS: dict[str, tuple[str, ...]] = {
21
27
  "setup": ("context-guard-setup",),
22
28
  "doctor": ("context-guard-setup", "--verify"),
23
29
  "audit": ("context-guard-audit",),
24
30
  "diet": ("context-guard-diet",),
31
+ "experiments": ("context-guard-experiments",),
25
32
  "scan": ("context-guard-diet", "scan"),
26
33
  "trim-output": ("context-guard-trim-output",),
27
34
  "trim": ("context-guard-trim-output",),
@@ -49,16 +56,182 @@ def _script_dir() -> Path:
49
56
 
50
57
  def _candidate_roots() -> list[Path]:
51
58
  script_dir = _script_dir()
52
- roots = [script_dir.parent, script_dir.parent.parent, Path.cwd()]
59
+ roots = [script_dir.parent, script_dir.parent.parent]
53
60
  # When run from context-guard-kit in a checkout, the repo root is one level up.
54
61
  if script_dir.name == "context-guard-kit":
55
62
  roots.insert(0, script_dir.parent)
56
63
  return list(dict.fromkeys(roots))
57
64
 
58
65
 
66
+ def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
67
+ target = Path(raw_target)
68
+ if target.is_absolute():
69
+ return Path(os.path.normpath(str(target)))
70
+ return Path(os.path.normpath(str(anchor / target)))
71
+
72
+
73
+ def _normalize_allowed_first_absolute_symlink(path: Path) -> Path:
74
+ if not path.is_absolute():
75
+ return path
76
+ parts = path.parts
77
+ if len(parts) < 2:
78
+ return path
79
+ expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(parts[1])
80
+ if expected is None:
81
+ return path
82
+ first = Path(path.anchor) / parts[1]
83
+ try:
84
+ if first.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(first)) == expected:
85
+ return expected.joinpath(*parts[2:])
86
+ except OSError:
87
+ return path
88
+ return path
89
+
90
+
91
+ def _metadata_no_follow_supported() -> bool:
92
+ return (
93
+ hasattr(os, "O_NOFOLLOW")
94
+ and os.open in getattr(os, "supports_dir_fd", set())
95
+ and os.stat in getattr(os, "supports_dir_fd", set())
96
+ and os.stat in getattr(os, "supports_follow_symlinks", set())
97
+ )
98
+
99
+
100
+ def _directory_open_flags(*, follow_final: bool = False) -> int:
101
+ flags = os.O_RDONLY
102
+ if hasattr(os, "O_CLOEXEC"):
103
+ flags |= os.O_CLOEXEC
104
+ if hasattr(os, "O_DIRECTORY"):
105
+ flags |= os.O_DIRECTORY
106
+ if not follow_final:
107
+ flags |= os.O_NOFOLLOW
108
+ return flags
109
+
110
+
111
+ def _metadata_file_open_flags() -> int:
112
+ flags = os.O_RDONLY | os.O_NOFOLLOW
113
+ if hasattr(os, "O_CLOEXEC"):
114
+ flags |= os.O_CLOEXEC
115
+ if hasattr(os, "O_NONBLOCK"):
116
+ flags |= os.O_NONBLOCK
117
+ if hasattr(os, "O_NOCTTY"):
118
+ flags |= os.O_NOCTTY
119
+ return flags
120
+
121
+
122
+ def _leaf_name(path: Path) -> str | None:
123
+ name = path.name
124
+ if name in {"", ".", ".."}:
125
+ return None
126
+ return name
127
+
128
+
129
+ def _open_metadata_parent_no_follow(path: Path) -> int | None:
130
+ if not _metadata_no_follow_supported():
131
+ return None
132
+ path = _normalize_allowed_first_absolute_symlink(path)
133
+ try:
134
+ if path.is_absolute():
135
+ current_fd = os.open(path.anchor or os.sep, _directory_open_flags(follow_final=True))
136
+ parts = path.parts[1:-1]
137
+ else:
138
+ current_fd = os.open(".", _directory_open_flags(follow_final=True))
139
+ parts = path.parts[:-1]
140
+ except OSError:
141
+ return None
142
+ try:
143
+ for part in parts:
144
+ if part in {"", "."}:
145
+ continue
146
+ if part == "..":
147
+ return None
148
+ next_fd = -1
149
+ try:
150
+ next_fd = os.open(part, _directory_open_flags(), dir_fd=current_fd)
151
+ if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
152
+ try:
153
+ os.close(next_fd)
154
+ except OSError:
155
+ pass
156
+ next_fd = -1
157
+ return None
158
+ except OSError:
159
+ if next_fd >= 0:
160
+ try:
161
+ os.close(next_fd)
162
+ except OSError:
163
+ pass
164
+ try:
165
+ os.close(current_fd)
166
+ except OSError:
167
+ pass
168
+ current_fd = -1
169
+ return None
170
+ try:
171
+ os.close(current_fd)
172
+ except OSError:
173
+ pass
174
+ current_fd = next_fd
175
+ owned_fd = current_fd
176
+ current_fd = -1
177
+ return owned_fd
178
+ finally:
179
+ if current_fd >= 0:
180
+ try:
181
+ os.close(current_fd)
182
+ except OSError:
183
+ pass
184
+
185
+
186
+ def _read_metadata_text(path: Path) -> str | None:
187
+ path = _normalize_allowed_first_absolute_symlink(path)
188
+ parent_fd = _open_metadata_parent_no_follow(path)
189
+ if parent_fd is None:
190
+ return None
191
+ fd = -1
192
+ data = b""
193
+ try:
194
+ leaf = _leaf_name(path)
195
+ if leaf is None:
196
+ return None
197
+ pre_open = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
198
+ if not stat.S_ISREG(pre_open.st_mode):
199
+ return None
200
+ if pre_open.st_size > MAX_VERSION_METADATA_BYTES:
201
+ return None
202
+ fd = os.open(leaf, _metadata_file_open_flags(), dir_fd=parent_fd)
203
+ opened = os.fstat(fd)
204
+ if not stat.S_ISREG(opened.st_mode):
205
+ return None
206
+ if opened.st_size > MAX_VERSION_METADATA_BYTES:
207
+ return None
208
+ data = os.read(fd, MAX_VERSION_METADATA_BYTES + 1)
209
+ except OSError:
210
+ return None
211
+ finally:
212
+ if fd >= 0:
213
+ try:
214
+ os.close(fd)
215
+ except OSError:
216
+ pass
217
+ try:
218
+ os.close(parent_fd)
219
+ except OSError:
220
+ pass
221
+ if len(data) > MAX_VERSION_METADATA_BYTES:
222
+ return None
223
+ try:
224
+ return data.decode("utf-8")
225
+ except UnicodeDecodeError:
226
+ return None
227
+
228
+
59
229
  def _load_json(path: Path) -> dict[str, object] | None:
230
+ text = _read_metadata_text(path)
231
+ if text is None:
232
+ return None
60
233
  try:
61
- data = json.loads(path.read_text(encoding="utf-8"))
234
+ data = json.loads(text)
62
235
  except (OSError, json.JSONDecodeError):
63
236
  return None
64
237
  return data if isinstance(data, dict) else None
@@ -24,6 +24,7 @@ import shlex
24
24
  import stat
25
25
  import subprocess
26
26
  import sys
27
+ import threading
27
28
  import time
28
29
  from dataclasses import dataclass
29
30
  from typing import Any
@@ -183,23 +184,43 @@ class FallbackLineSanitizer:
183
184
  return line, bool(count)
184
185
 
185
186
 
187
+ # Process-static cache: CLI invocations should not re-import the sanitizer for
188
+ # every file, while each sanitize_text() call still gets a fresh stateful
189
+ # sanitizer instance.
190
+ _LINE_SANITIZER_FACTORY_CACHE: Any | None = None
191
+ _LINE_SANITIZER_FACTORY_LOCK = threading.Lock()
192
+
193
+
194
+ def load_line_sanitizer_factory() -> Any:
195
+ global _LINE_SANITIZER_FACTORY_CACHE
196
+ if _LINE_SANITIZER_FACTORY_CACHE is not None:
197
+ return _LINE_SANITIZER_FACTORY_CACHE
198
+ with _LINE_SANITIZER_FACTORY_LOCK:
199
+ if _LINE_SANITIZER_FACTORY_CACHE is not None:
200
+ return _LINE_SANITIZER_FACTORY_CACHE
201
+ script_dir = Path(__file__).resolve().parent
202
+ for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
203
+ candidate = script_dir / name
204
+ if not candidate.exists():
205
+ continue
206
+ try:
207
+ loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
208
+ spec = importlib.util.spec_from_loader(loader.name, loader)
209
+ if spec is None:
210
+ raise RuntimeError("import spec unavailable")
211
+ module = importlib.util.module_from_spec(spec)
212
+ loader.exec_module(module)
213
+ _LINE_SANITIZER_FACTORY_CACHE = module.LineSanitizer
214
+ return _LINE_SANITIZER_FACTORY_CACHE
215
+ except Exception as exc:
216
+ raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
217
+ _LINE_SANITIZER_FACTORY_CACHE = FallbackLineSanitizer
218
+ return _LINE_SANITIZER_FACTORY_CACHE
219
+
220
+
186
221
  def load_line_sanitizer(show_paths: bool = False) -> object:
187
- script_dir = Path(__file__).resolve().parent
188
- for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
189
- candidate = script_dir / name
190
- if not candidate.exists():
191
- continue
192
- try:
193
- loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
194
- spec = importlib.util.spec_from_loader(loader.name, loader)
195
- if spec is None:
196
- raise RuntimeError("import spec unavailable")
197
- module = importlib.util.module_from_spec(spec)
198
- loader.exec_module(module)
199
- return module.LineSanitizer(show_paths=show_paths)
200
- except Exception as exc:
201
- raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
202
- return FallbackLineSanitizer(show_paths=show_paths)
222
+ sanitizer_factory = load_line_sanitizer_factory()
223
+ return sanitizer_factory(show_paths=show_paths)
203
224
 
204
225
 
205
226
  def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
@@ -464,6 +485,21 @@ def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
464
485
  raise
465
486
 
466
487
 
488
+ def file_open_flags() -> int:
489
+ flags = os.O_RDONLY
490
+ for name in ("O_NOFOLLOW", "O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
491
+ flags |= getattr(os, name, 0)
492
+ return flags
493
+
494
+
495
+ def stat_leaf_no_follow(name: str, *, dir_fd: int) -> os.stat_result | None:
496
+ supports_dir_fd = os.stat in getattr(os, "supports_dir_fd", set())
497
+ supports_no_follow = os.stat in getattr(os, "supports_follow_symlinks", set())
498
+ if not supports_dir_fd or not supports_no_follow:
499
+ return None
500
+ return os.stat(name, dir_fd=dir_fd, follow_symlinks=False)
501
+
502
+
467
503
  def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
468
504
  current_fd: int | None = None
469
505
  try:
@@ -484,11 +520,20 @@ def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
484
520
  os.close(current_fd)
485
521
  current_fd = next_fd
486
522
  continue
487
- flags = os.O_RDONLY
488
- if hasattr(os, "O_NOFOLLOW"):
489
- flags |= os.O_NOFOLLOW
490
- if hasattr(os, "O_CLOEXEC"):
491
- flags |= os.O_CLOEXEC
523
+ try:
524
+ pre_st = stat_leaf_no_follow(part, dir_fd=current_fd)
525
+ except FileNotFoundError:
526
+ return None, "missing"
527
+ except NotADirectoryError:
528
+ return None, "missing"
529
+ except OSError:
530
+ return None, "unsafe_path"
531
+ if pre_st is not None:
532
+ if stat.S_ISLNK(pre_st.st_mode):
533
+ return None, "unsafe_path"
534
+ if not stat.S_ISREG(pre_st.st_mode):
535
+ return None, "empty_source"
536
+ flags = file_open_flags()
492
537
  file_fd = -1
493
538
  try:
494
539
  file_fd = os.open(part, flags, dir_fd=current_fd)
@@ -11,6 +11,7 @@ from __future__ import annotations
11
11
  import argparse
12
12
  import base64
13
13
  import binascii
14
+ import errno
14
15
  try:
15
16
  import fcntl
16
17
  except ImportError: # pragma: no cover - fcntl is unavailable on Windows.
@@ -49,6 +50,8 @@ DEFAULT_SAFETY_FACTOR = 1.25
49
50
  DEFAULT_LARGE_SECTION_BYTES = 64_000
50
51
  MAX_LEDGER_ROWS = 20_000
51
52
  LEDGER_TAIL_INITIAL_BYTES = 64 * 1024
53
+ LEDGER_OPEN_RETRY_ATTEMPTS = 5
54
+ LEDGER_OPEN_RETRY_SECONDS = 0.01
52
55
  TTL_SECONDS = {"5m": 5 * 60, "1h": 60 * 60}
53
56
  ANTHROPIC_DOCS_URL = "https://docs.anthropic.com/en/build-with-claude/prompt-caching"
54
57
  ANTHROPIC_PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
@@ -58,6 +61,10 @@ ALLOWED_FIRST_COMPONENT_SYMLINKS = {
58
61
  }
59
62
  DIR_FD_OPEN_SUPPORTED = os.open in getattr(os, "supports_dir_fd", set())
60
63
  NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
64
+ DIR_FD_STAT_NOFOLLOW_SUPPORTED = (
65
+ os.stat in getattr(os, "supports_dir_fd", set())
66
+ and os.stat in getattr(os, "supports_follow_symlinks", set())
67
+ )
61
68
 
62
69
  SECRET_RE = re.compile(
63
70
  r"(?is)("
@@ -148,25 +155,68 @@ def token_proxy_obj(data: Any) -> int:
148
155
  return token_proxy_text(json_bytes(data))
149
156
 
150
157
 
158
+ def read_bounded_regular_path(path: str | Path, *, max_bytes: int, label: str) -> tuple[str, bool]:
159
+ if max_bytes < 1 or max_bytes > MAX_MAX_BYTES:
160
+ fail(f"max bytes must be between 1 and {MAX_MAX_BYTES}")
161
+ p = reject_symlink_components(Path(path), label=label)
162
+ leaf_name = _private_leaf_name(p, label=label)
163
+ parent_fd = -1
164
+ fd = -1
165
+ try:
166
+ parent_fd = open_directory_no_follow(p.parent, label=f"{label} parent")
167
+ if not DIR_FD_STAT_NOFOLLOW_SUPPORTED:
168
+ fail(f"{label} requires dir_fd stat support for symlink-safe regular-file validation")
169
+ try:
170
+ pre_st = os.stat(leaf_name, dir_fd=parent_fd, follow_symlinks=False)
171
+ except OSError as exc:
172
+ fail(f"could not inspect {label}: {os_error_detail(exc)}")
173
+ if not stat.S_ISREG(pre_st.st_mode):
174
+ fail(f"{label} must be a regular file")
175
+ flags = _base_open_flags() | _no_follow_flag(label=label)
176
+ if hasattr(os, "O_NONBLOCK"):
177
+ flags |= os.O_NONBLOCK
178
+ if hasattr(os, "O_NOCTTY"):
179
+ flags |= os.O_NOCTTY
180
+ fd = os.open(leaf_name, flags, dir_fd=parent_fd)
181
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
182
+ fail(f"{label} must be a regular file")
183
+ chunks: list[bytes] = []
184
+ remaining = max_bytes + 1
185
+ while remaining > 0:
186
+ chunk = os.read(fd, min(64 * 1024, remaining))
187
+ if not chunk:
188
+ break
189
+ chunks.append(chunk)
190
+ remaining -= len(chunk)
191
+ raw = b"".join(chunks)
192
+ except CostGuardError:
193
+ raise
194
+ except OSError as exc:
195
+ fail(f"could not read {label}: {os_error_detail(exc)}")
196
+ finally:
197
+ if fd >= 0:
198
+ try:
199
+ os.close(fd)
200
+ except OSError:
201
+ pass
202
+ if parent_fd >= 0:
203
+ try:
204
+ os.close(parent_fd)
205
+ except OSError:
206
+ pass
207
+ truncated = len(raw) > max_bytes
208
+ if truncated:
209
+ raw = raw[:max_bytes]
210
+ return raw.decode("utf-8", errors="replace"), truncated
211
+
212
+
151
213
  def read_text_path(path: str, *, max_bytes: int = DEFAULT_MAX_BYTES) -> tuple[str, bool]:
152
214
  if max_bytes < 1 or max_bytes > MAX_MAX_BYTES:
153
215
  fail(f"max bytes must be between 1 and {MAX_MAX_BYTES}")
154
216
  if path == "-":
155
217
  raw = sys.stdin.buffer.read(max_bytes + 1)
156
218
  else:
157
- p = Path(path)
158
- try:
159
- st = p.stat()
160
- except OSError as exc:
161
- fail(f"could not read input file: {exc}")
162
- if not stat.S_ISREG(st.st_mode):
163
- fail("input path must be a regular file")
164
- if st.st_size > max_bytes + 1:
165
- # Read only the bounded prefix so large requests cannot exhaust memory.
166
- with p.open("rb") as fh:
167
- raw = fh.read(max_bytes + 1)
168
- else:
169
- raw = p.read_bytes()
219
+ return read_bounded_regular_path(path, max_bytes=max_bytes, label="input file")
170
220
  truncated = len(raw) > max_bytes
171
221
  if truncated:
172
222
  raw = raw[:max_bytes]
@@ -494,20 +544,20 @@ def _base_open_flags() -> int:
494
544
  return flags
495
545
 
496
546
 
497
- def _no_follow_flag() -> int:
547
+ def _no_follow_flag(*, label: str = "private local cost storage") -> int:
498
548
  if not NO_FOLLOW_SUPPORTED:
499
- fail("private local cost storage requires O_NOFOLLOW support")
549
+ fail(f"{label} requires O_NOFOLLOW support")
500
550
  return os.O_NOFOLLOW
501
551
 
502
552
 
503
- def _directory_open_flags(*, follow_final: bool = False) -> int:
553
+ def _directory_open_flags(*, follow_final: bool = False, label: str = "private local cost storage") -> int:
504
554
  flags = os.O_RDONLY
505
555
  if hasattr(os, "O_CLOEXEC"):
506
556
  flags |= os.O_CLOEXEC
507
557
  if hasattr(os, "O_DIRECTORY"):
508
558
  flags |= os.O_DIRECTORY
509
559
  if not follow_final:
510
- flags |= _no_follow_flag()
560
+ flags |= _no_follow_flag(label=label)
511
561
  return flags
512
562
 
513
563
 
@@ -572,18 +622,18 @@ def reject_symlink_components(path: Path, *, label: str) -> Path:
572
622
  return path
573
623
 
574
624
 
575
- def open_private_directory(path: Path, *, label: str) -> int:
625
+ def open_directory_no_follow(path: Path, *, label: str) -> int:
576
626
  """Open an existing directory without following symlink path components."""
577
627
 
578
628
  if not dir_fd_open_supported():
579
- fail(f"{label} requires dir_fd support for symlink-safe private storage")
629
+ fail(f"{label} requires dir_fd support for symlink-safe directory traversal")
580
630
  path = reject_symlink_components(path, label=label)
581
- flags = _directory_open_flags()
631
+ flags = _directory_open_flags(label=label)
582
632
  if path.is_absolute():
583
633
  anchor = path.anchor or os.sep
584
634
  parts = path.parts[1:]
585
635
  try:
586
- current_fd = os.open(anchor, _directory_open_flags(follow_final=True))
636
+ current_fd = os.open(anchor, _directory_open_flags(follow_final=True, label=label))
587
637
  except OSError as exc:
588
638
  fail(f"could not inspect {label}: {os_error_detail(exc)}")
589
639
  else:
@@ -635,6 +685,12 @@ def open_private_directory(path: Path, *, label: str) -> int:
635
685
  pass
636
686
 
637
687
 
688
+ def open_private_directory(path: Path, *, label: str) -> int:
689
+ """Open an existing private-storage directory without following symlinks."""
690
+
691
+ return open_directory_no_follow(path, label=label)
692
+
693
+
638
694
  def fsync_directory_fd(fd: int) -> None:
639
695
  if os.name != "posix":
640
696
  return
@@ -676,7 +732,7 @@ def open_private_regular_fd_for_read(path: Path, *, label: str) -> int:
676
732
  fd = -1
677
733
  try:
678
734
  parent_fd = open_private_directory(path.parent, label=f"{label} parent")
679
- fd = os.open(leaf_name, _base_open_flags() | _no_follow_flag(), dir_fd=parent_fd)
735
+ fd = os.open(leaf_name, _base_open_flags() | _no_follow_flag(label=label), dir_fd=parent_fd)
680
736
  st = os.fstat(fd)
681
737
  if not stat.S_ISREG(st.st_mode):
682
738
  fail(f"{label} must be a regular file")
@@ -1138,41 +1194,47 @@ def open_private_regular_file_for_append(path: Path, *, label: str) -> int:
1138
1194
  flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND | _no_follow_flag()
1139
1195
  if hasattr(os, "O_CLOEXEC"):
1140
1196
  flags |= os.O_CLOEXEC
1141
- parent_fd = -1
1142
- fd = -1
1143
- try:
1144
- parent_fd = open_private_directory(path.parent, label=f"{label} parent")
1145
- fd = os.open(leaf_name, flags, 0o600, dir_fd=parent_fd)
1146
- st = os.fstat(fd)
1147
- if not stat.S_ISREG(st.st_mode):
1148
- fail(f"{label} must be a regular file")
1149
- try:
1150
- os.fchmod(fd, 0o600)
1151
- except (AttributeError, OSError):
1152
- pass
1153
- st = os.fstat(fd)
1154
- if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
1155
- fail(f"could not verify {label} privacy: expected mode 0600")
1156
- owned_fd = fd
1197
+ for attempt in range(LEDGER_OPEN_RETRY_ATTEMPTS):
1198
+ parent_fd = -1
1157
1199
  fd = -1
1158
- return owned_fd
1159
- except CostGuardError:
1160
- raise
1161
- except OSError as exc:
1162
- fail(f"could not open {label}: {os_error_detail(exc)}")
1163
- finally:
1164
- if fd >= 0:
1165
- # Ownership transfers to the caller only on the successful return
1166
- # above. On errors, close before surfacing a deterministic message.
1167
- try:
1168
- os.close(fd)
1169
- except OSError:
1170
- pass
1171
- if parent_fd >= 0:
1200
+ try:
1201
+ parent_fd = open_private_directory(path.parent, label=f"{label} parent")
1202
+ fd = os.open(leaf_name, flags, 0o600, dir_fd=parent_fd)
1203
+ st = os.fstat(fd)
1204
+ if not stat.S_ISREG(st.st_mode):
1205
+ fail(f"{label} must be a regular file")
1172
1206
  try:
1173
- os.close(parent_fd)
1174
- except OSError:
1207
+ os.fchmod(fd, 0o600)
1208
+ except (AttributeError, OSError):
1175
1209
  pass
1210
+ st = os.fstat(fd)
1211
+ if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
1212
+ fail(f"could not verify {label} privacy: expected mode 0600")
1213
+ owned_fd = fd
1214
+ fd = -1
1215
+ return owned_fd
1216
+ except CostGuardError:
1217
+ raise
1218
+ except OSError as exc:
1219
+ if exc.errno == errno.ENOENT and attempt + 1 < LEDGER_OPEN_RETRY_ATTEMPTS:
1220
+ time.sleep(LEDGER_OPEN_RETRY_SECONDS)
1221
+ continue
1222
+ fail(f"could not open {label}: {os_error_detail(exc)}")
1223
+ finally:
1224
+ if fd >= 0:
1225
+ # Ownership transfers to the caller only on the successful
1226
+ # return above. On errors, close before surfacing a
1227
+ # deterministic message.
1228
+ try:
1229
+ os.close(fd)
1230
+ except OSError:
1231
+ pass
1232
+ if parent_fd >= 0:
1233
+ try:
1234
+ os.close(parent_fd)
1235
+ except OSError:
1236
+ pass
1237
+ raise AssertionError("unreachable: append retry loop exits via return or fail")
1176
1238
 
1177
1239
 
1178
1240
  def load_ledger(store_dir: Path) -> list[dict[str, Any]]:
@@ -1280,7 +1342,7 @@ def default_pricing_profile() -> dict[str, Any]:
1280
1342
  }
1281
1343
 
1282
1344
 
1283
- def load_pricing_profile(raw: str | None) -> dict[str, Any]:
1345
+ def load_pricing_profile(raw: str | None, *, max_bytes: int = DEFAULT_MAX_BYTES) -> dict[str, Any]:
1284
1346
  profile = default_pricing_profile()
1285
1347
  if not raw:
1286
1348
  return profile
@@ -1288,7 +1350,12 @@ def load_pricing_profile(raw: str | None) -> dict[str, Any]:
1288
1350
  if raw.lstrip().startswith("{"):
1289
1351
  override = json.loads(raw, parse_constant=reject_json_constant)
1290
1352
  else:
1291
- override = json.loads(Path(raw).read_text(encoding="utf-8"), parse_constant=reject_json_constant)
1353
+ text, truncated = read_bounded_regular_path(raw, max_bytes=max_bytes, label="pricing profile")
1354
+ if truncated:
1355
+ fail("pricing profile exceeded max bytes")
1356
+ override = json.loads(text, parse_constant=reject_json_constant)
1357
+ except CostGuardError:
1358
+ raise
1292
1359
  except (OSError, json.JSONDecodeError, ValueError) as exc:
1293
1360
  fail(f"could not load pricing profile: {exc}")
1294
1361
  if not isinstance(override, dict):
@@ -1542,7 +1609,7 @@ def annotate_cache_state(
1542
1609
  def preflight_command(args: argparse.Namespace) -> int:
1543
1610
  request_raw, _truncated = load_json_input(args.request, max_bytes=args.max_bytes)
1544
1611
  request = require_json_object(request_raw, "request")
1545
- profile = load_pricing_profile(args.pricing_profile)
1612
+ profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
1546
1613
  if args.usd_to_krw is not None:
1547
1614
  profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1548
1615
  if args.budget_usd is not None:
@@ -1809,7 +1876,7 @@ def observe_command(args: argparse.Namespace) -> int:
1809
1876
  usage = usage_raw
1810
1877
  if not isinstance(usage, dict):
1811
1878
  fail("usage must be a JSON object or an object containing a usage object")
1812
- profile = load_pricing_profile(args.pricing_profile)
1879
+ profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
1813
1880
  if args.usd_to_krw is not None:
1814
1881
  profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1815
1882
  model = str(args.model or (usage_raw.get("model") if isinstance(usage_raw, dict) else "") or "unknown")
@@ -2217,7 +2284,7 @@ def emit(data: dict[str, Any], *, json_mode: bool) -> None:
2217
2284
  def add_common_cost_args(parser: argparse.ArgumentParser) -> None:
2218
2285
  parser.add_argument("--pricing-profile", help="JSON string or file with input/output rates, cache multipliers, and usd_to_krw")
2219
2286
  parser.add_argument("--usd-to-krw", type=float, help="override USD→KRW exchange rate used for estimates")
2220
- parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum JSON input bytes (default: {DEFAULT_MAX_BYTES})")
2287
+ parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum JSON input and pricing profile file bytes (default: {DEFAULT_MAX_BYTES})")
2221
2288
  parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
2222
2289
 
2223
2290