@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,2713 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Build a deterministic, budgeted local context pack from prioritized files.
|
|
3
|
-
|
|
4
|
-
The packer is local-only and intentionally conservative. It assembles selected
|
|
5
|
-
file slices into a Markdown body whose rendered UTF-8 byte length is bounded by
|
|
6
|
-
``--budget-bytes``. It redacts before building the pack/receipt, records why
|
|
7
|
-
lower-priority sources were omitted, and emits exact local slice commands for
|
|
8
|
-
retrieval when the path is safe to display.
|
|
9
|
-
"""
|
|
10
|
-
from __future__ import annotations
|
|
11
|
-
|
|
12
|
-
import argparse
|
|
13
|
-
import ast
|
|
14
|
-
import copy
|
|
15
|
-
import hashlib
|
|
16
|
-
import importlib.machinery
|
|
17
|
-
import importlib.util
|
|
18
|
-
import json
|
|
19
|
-
import os
|
|
20
|
-
import posixpath
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
import re
|
|
23
|
-
import shlex
|
|
24
|
-
import stat
|
|
25
|
-
import subprocess
|
|
26
|
-
import sys
|
|
27
|
-
import threading
|
|
28
|
-
import time
|
|
29
|
-
from dataclasses import dataclass
|
|
30
|
-
from typing import Any
|
|
31
|
-
|
|
32
|
-
TOOL_NAME = "context-guard-pack"
|
|
33
|
-
VERSION = 1
|
|
34
|
-
DEFAULT_BUDGET_BYTES = 12_000
|
|
35
|
-
MIN_BUDGET_BYTES = 0
|
|
36
|
-
MAX_BUDGET_BYTES = 2_000_000
|
|
37
|
-
MAX_RECEIPT_BYTES = 64_000
|
|
38
|
-
MAX_MANIFEST_BYTES = 1_000_000
|
|
39
|
-
MAX_LABEL_CHARS = 160
|
|
40
|
-
MAX_REASON_CHARS = 120
|
|
41
|
-
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
42
|
-
SUGGEST_SCHEMA_VERSION = "contextguard.pack-suggest.v1"
|
|
43
|
-
AUTO_SCHEMA_VERSION = "contextguard.pack-auto.v1"
|
|
44
|
-
AUTO_EXPLAIN_SCHEMA_VERSION = "contextguard.pack-auto-explain.v1"
|
|
45
|
-
REPO_MAP_SCHEMA_VERSION = "contextguard.pack-repo-map.v1"
|
|
46
|
-
DEFAULT_SUGGEST_TOP = 8
|
|
47
|
-
MAX_SUGGEST_TOP = 50
|
|
48
|
-
DEFAULT_SUGGEST_CONTEXT_LINES = 20
|
|
49
|
-
MAX_SUGGEST_CONTEXT_LINES = 120
|
|
50
|
-
SUGGEST_WHOLE_FILE_MAX_LINES = 120
|
|
51
|
-
MAX_SUGGEST_INPUT_BYTES = 256_000
|
|
52
|
-
MAX_QUERY_SCAN_FILES = 2_000
|
|
53
|
-
MAX_QUERY_SCAN_BYTES_PER_FILE = 200_000
|
|
54
|
-
MAX_REPO_MAP_FILES = 1_000
|
|
55
|
-
MAX_REPO_MAP_BYTES_PER_FILE = 120_000
|
|
56
|
-
MAX_REPO_MAP_TREE_ENTRIES = 30
|
|
57
|
-
MAX_REPO_MAP_SIGNATURE_ENTRIES = 40
|
|
58
|
-
MAX_REPO_MAP_GRAPH_RANK_ENTRIES = 30
|
|
59
|
-
MAX_REPO_MAP_RETRIEVAL_HINTS = 30
|
|
60
|
-
MAX_REPO_MAP_SECRET_RISK_FILES = 20
|
|
61
|
-
PACK_DIR = ".context-guard/packs"
|
|
62
|
-
REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
|
|
63
|
-
CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f-\x9f]")
|
|
64
|
-
SECRET_CONTENT_RE = re.compile(
|
|
65
|
-
r"(?is)("
|
|
66
|
-
r"-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----|"
|
|
67
|
-
r"AKIA[0-9A-Z]{16}|"
|
|
68
|
-
r"ASIA[0-9A-Z]{16}|"
|
|
69
|
-
r"gh[pousr]_[A-Za-z0-9_]{20,}|"
|
|
70
|
-
r"github_pat_[A-Za-z0-9_]{20,}|"
|
|
71
|
-
r"glpat-[A-Za-z0-9_-]{12,}|"
|
|
72
|
-
r"xox[abprs]-[A-Za-z0-9-]{10,}|"
|
|
73
|
-
r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}|"
|
|
74
|
-
r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|"
|
|
75
|
-
r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}|"
|
|
76
|
-
r"npm_[A-Za-z0-9]{20,}|"
|
|
77
|
-
r"AIza[0-9A-Za-z_\-]{20,}|"
|
|
78
|
-
r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
|
|
79
|
-
r"(?<![A-Za-z0-9])(?:api[_-]?key|token|secret|password|client[_-]?secret)\s*[:=]\s*[^\s]+"
|
|
80
|
-
r")"
|
|
81
|
-
)
|
|
82
|
-
SECRET_PATH_COMPONENT_RE = re.compile(
|
|
83
|
-
r"(?i)("
|
|
84
|
-
r"SG\.[A-Za-z0-9_-]{16,256}\.[A-Za-z0-9_-]{16,512}|"
|
|
85
|
-
r"eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}|"
|
|
86
|
-
r"\b(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]{12,}|"
|
|
87
|
-
r"[a-z][a-z0-9+.-]{0,31}:/+(?:[^/\s:@]{0,256}:[^/\s@]{0,2048}|[^/\s@]{1,2048})@"
|
|
88
|
-
r")"
|
|
89
|
-
)
|
|
90
|
-
SECRET_RISK_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
|
|
91
|
-
("private_key_block", re.compile(r"(?is)-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----")),
|
|
92
|
-
("github_token", re.compile(r"gh[pousr]_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9_-]{12,}")),
|
|
93
|
-
("provider_api_key", re.compile(r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}|sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|AIza[0-9A-Za-z_\-]{20,}")),
|
|
94
|
-
("authorization_header", re.compile(r"(?i)Authorization\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+")),
|
|
95
|
-
("generic_secret_assignment", re.compile(r"(?i)(?:api[_-]?key|token|secret|password|client[_-]?secret)\s*[:=]\s*[^\s]+")),
|
|
96
|
-
)
|
|
97
|
-
REPO_MAP_TEXT_EXTENSIONS = {
|
|
98
|
-
".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
|
|
99
|
-
".go", ".rs", ".java", ".kt", ".kts", ".swift", ".c", ".cc", ".cpp", ".h", ".hpp",
|
|
100
|
-
".md", ".mdx", ".txt", ".json", ".yaml", ".yml", ".toml", ".sh", ".css", ".html",
|
|
101
|
-
}
|
|
102
|
-
SYMBOL_HINT_EXTENSIONS = {".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".rs"}
|
|
103
|
-
SIGNATURE_LINE_RE = re.compile(
|
|
104
|
-
r"^\s*(?:export\s+)?(?:(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(|class\s+([A-Za-z_$][\w$]*)|"
|
|
105
|
-
r"(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?(?:\([^)]*\)|[A-Za-z_$][\w$]*)\s*=>|"
|
|
106
|
-
r"func\s+(?:\([^)]*\)\s*)?([A-Za-z_]\w*)\s*\(|(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_]\w*)\s*\()"
|
|
107
|
-
)
|
|
108
|
-
IMPORT_PATH_RE = re.compile(
|
|
109
|
-
r"(?:from\s+['\"](?P<jsfrom>[^'\"]+)['\"]|"
|
|
110
|
-
r"import(?:\s+[^;\n'\"]+?\s+from)?\s+['\"](?P<jsimport>[^'\"]+)['\"]|"
|
|
111
|
-
r"from\s+(?P<pyfrom>\.*[A-Za-z_][\w.]*|\.+)\s+import|"
|
|
112
|
-
r"import\s+(?P<pyimport>[A-Za-z_][\w.]*))"
|
|
113
|
-
)
|
|
114
|
-
PY_FROM_IMPORT_LINE_RE = re.compile(r"^\s*from\s+(?P<module>\.*[A-Za-z_][\w.]*|\.+)\s+import\s+(?P<names>[^\n#;]+)")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
@dataclass(frozen=True)
|
|
118
|
-
class LineRange:
|
|
119
|
-
start: int
|
|
120
|
-
end: int
|
|
121
|
-
|
|
122
|
-
def as_dict(self) -> dict[str, int]:
|
|
123
|
-
return {"start": self.start, "end": self.end}
|
|
124
|
-
|
|
125
|
-
def identity(self) -> str:
|
|
126
|
-
return f"{self.start}:{self.end}"
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
@dataclass
|
|
130
|
-
class SourceSpec:
|
|
131
|
-
path: str
|
|
132
|
-
priority: int = 0
|
|
133
|
-
lines: LineRange | None = None
|
|
134
|
-
label: str | None = None
|
|
135
|
-
input_index: int = 0
|
|
136
|
-
origin: str = "cli"
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
@dataclass
|
|
140
|
-
class ResolvedSource:
|
|
141
|
-
spec: SourceSpec
|
|
142
|
-
abs_path: Path
|
|
143
|
-
display_path: str
|
|
144
|
-
redacted_path: bool
|
|
145
|
-
requested_lines: LineRange | None
|
|
146
|
-
selected_lines: list[str]
|
|
147
|
-
total_lines: int
|
|
148
|
-
redacted_lines: int
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
@dataclass
|
|
152
|
-
class SuggestCandidate:
|
|
153
|
-
path: str
|
|
154
|
-
score: int
|
|
155
|
-
reason: str
|
|
156
|
-
lines: LineRange | None = None
|
|
157
|
-
label: str | None = None
|
|
158
|
-
input_index: int = 0
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
class PackError(ValueError):
|
|
162
|
-
pass
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
class FallbackLineSanitizer:
|
|
166
|
-
def __init__(self, *, show_paths: bool = False) -> None:
|
|
167
|
-
self.show_paths = show_paths
|
|
168
|
-
self.redactions = 0
|
|
169
|
-
|
|
170
|
-
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
171
|
-
def repl(match: re.Match[str]) -> str:
|
|
172
|
-
text = match.group(0)
|
|
173
|
-
if "=" in text:
|
|
174
|
-
key = text.split("=", 1)[0]
|
|
175
|
-
return key + "=[REDACTED]"
|
|
176
|
-
if ":" in text and re.search(r"(?i)(api|token|secret|password|authorization)", text.split(":", 1)[0]):
|
|
177
|
-
key = text.split(":", 1)[0]
|
|
178
|
-
return key + ": [REDACTED]"
|
|
179
|
-
return "[REDACTED]"
|
|
180
|
-
|
|
181
|
-
line, count = SECRET_CONTENT_RE.subn(repl, raw_line)
|
|
182
|
-
if count:
|
|
183
|
-
self.redactions += 1
|
|
184
|
-
return line, bool(count)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
# Process-static cache: CLI invocations should not re-import the sanitizer for
|
|
188
|
-
# every file, while each sanitize_text() call still gets a fresh stateful
|
|
189
|
-
# sanitizer instance.
|
|
190
|
-
_LINE_SANITIZER_FACTORY_CACHE: Any | None = None
|
|
191
|
-
_LINE_SANITIZER_FACTORY_LOCK = threading.Lock()
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def load_line_sanitizer_factory() -> Any:
|
|
195
|
-
global _LINE_SANITIZER_FACTORY_CACHE
|
|
196
|
-
if _LINE_SANITIZER_FACTORY_CACHE is not None:
|
|
197
|
-
return _LINE_SANITIZER_FACTORY_CACHE
|
|
198
|
-
with _LINE_SANITIZER_FACTORY_LOCK:
|
|
199
|
-
if _LINE_SANITIZER_FACTORY_CACHE is not None:
|
|
200
|
-
return _LINE_SANITIZER_FACTORY_CACHE
|
|
201
|
-
script_dir = Path(__file__).resolve().parent
|
|
202
|
-
for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
|
|
203
|
-
candidate = script_dir / name
|
|
204
|
-
if not candidate.exists():
|
|
205
|
-
continue
|
|
206
|
-
try:
|
|
207
|
-
loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
|
|
208
|
-
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
209
|
-
if spec is None:
|
|
210
|
-
raise RuntimeError("import spec unavailable")
|
|
211
|
-
module = importlib.util.module_from_spec(spec)
|
|
212
|
-
loader.exec_module(module)
|
|
213
|
-
_LINE_SANITIZER_FACTORY_CACHE = module.LineSanitizer
|
|
214
|
-
return _LINE_SANITIZER_FACTORY_CACHE
|
|
215
|
-
except Exception as exc:
|
|
216
|
-
raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
|
|
217
|
-
_LINE_SANITIZER_FACTORY_CACHE = FallbackLineSanitizer
|
|
218
|
-
return _LINE_SANITIZER_FACTORY_CACHE
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
def load_line_sanitizer(show_paths: bool = False) -> object:
|
|
222
|
-
sanitizer_factory = load_line_sanitizer_factory()
|
|
223
|
-
return sanitizer_factory(show_paths=show_paths)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
|
|
227
|
-
sanitizer = load_line_sanitizer(show_paths)
|
|
228
|
-
redacted = 0
|
|
229
|
-
out: list[str] = []
|
|
230
|
-
for line in text.splitlines(True):
|
|
231
|
-
sanitized, did_redact = sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
232
|
-
out.append(sanitized)
|
|
233
|
-
if did_redact:
|
|
234
|
-
redacted += 1
|
|
235
|
-
return "".join(out), redacted
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def byte_len(text: str) -> int:
|
|
239
|
-
return len(text.encode("utf-8", errors="replace"))
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def token_proxy(text: str) -> int:
|
|
243
|
-
if not text:
|
|
244
|
-
return 0
|
|
245
|
-
return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
def sha256_text(text: str) -> str:
|
|
249
|
-
return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def path_hash(path: Path) -> str:
|
|
253
|
-
return hashlib.sha256(str(path).encode("utf-8", "replace")).hexdigest()[:12]
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def sanitize_path_component(component: str) -> tuple[str, bool]:
|
|
257
|
-
if SECRET_CONTENT_RE.search(component):
|
|
258
|
-
return REDACTED_PATH_COMPONENT, True
|
|
259
|
-
return component, False
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def display_root(root: Path) -> str:
|
|
263
|
-
name, redacted = sanitize_path_component(root.name or "project")
|
|
264
|
-
if redacted:
|
|
265
|
-
name = "project"
|
|
266
|
-
return f"{name}#path:{path_hash(root)}"
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def display_rel_path(rel: str) -> tuple[str, bool]:
|
|
270
|
-
normalized = rel.replace("\\", "/")
|
|
271
|
-
parts: list[str] = []
|
|
272
|
-
redacted = False
|
|
273
|
-
for part in normalized.split("/"):
|
|
274
|
-
if not part:
|
|
275
|
-
continue
|
|
276
|
-
safe, did = sanitize_path_component(part)
|
|
277
|
-
parts.append(safe)
|
|
278
|
-
redacted = redacted or did
|
|
279
|
-
return "/".join(parts), redacted
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def repo_map_path_has_sensitive_evidence(value: str) -> bool:
|
|
283
|
-
return bool(CONTROL_CHAR_RE.search(value) or SECRET_PATH_COMPONENT_RE.search(value))
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
def repo_map_display_rel_path(rel: str) -> tuple[str, bool]:
|
|
287
|
-
normalized = rel.replace("\\", "/")
|
|
288
|
-
if repo_map_path_has_sensitive_evidence(normalized):
|
|
289
|
-
return f"redacted-path#path:{sha256_text(normalized)[:12]}", True
|
|
290
|
-
return display_rel_path(normalized)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
def repo_map_safe_raw_path_label(raw: str) -> str:
|
|
294
|
-
normalized = raw.replace("\\", "/")
|
|
295
|
-
if repo_map_path_has_sensitive_evidence(normalized):
|
|
296
|
-
return f"redacted-path#path:{sha256_text(normalized)[:12]}"
|
|
297
|
-
return safe_raw_path_label(normalized)
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
def parse_line_range(value: object) -> LineRange | None:
|
|
301
|
-
if value is None or value == "":
|
|
302
|
-
return None
|
|
303
|
-
if isinstance(value, dict):
|
|
304
|
-
try:
|
|
305
|
-
start = int(value.get("start"))
|
|
306
|
-
end = int(value.get("end"))
|
|
307
|
-
except (TypeError, ValueError):
|
|
308
|
-
raise PackError("invalid_lines")
|
|
309
|
-
elif isinstance(value, str):
|
|
310
|
-
if ":" not in value:
|
|
311
|
-
raise PackError("invalid_lines")
|
|
312
|
-
left, right = value.split(":", 1)
|
|
313
|
-
try:
|
|
314
|
-
start = int(left)
|
|
315
|
-
end = int(right)
|
|
316
|
-
except ValueError:
|
|
317
|
-
raise PackError("invalid_lines")
|
|
318
|
-
else:
|
|
319
|
-
raise PackError("invalid_lines")
|
|
320
|
-
if start < 1 or end < start:
|
|
321
|
-
raise PackError("invalid_lines")
|
|
322
|
-
return LineRange(start, end)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
326
|
-
try:
|
|
327
|
-
number = int(value)
|
|
328
|
-
except (TypeError, ValueError, OverflowError):
|
|
329
|
-
return default
|
|
330
|
-
return min(max(number, minimum), maximum)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_CHARS) -> str | None:
|
|
334
|
-
if value is None:
|
|
335
|
-
return default
|
|
336
|
-
text = " ".join(str(value).strip().split())
|
|
337
|
-
text = SECRET_CONTENT_RE.sub("[REDACTED]", text)
|
|
338
|
-
if not text:
|
|
339
|
-
return default
|
|
340
|
-
if len(text) > limit:
|
|
341
|
-
text = text[: max(0, limit - 15)].rstrip() + " ...[truncated]"
|
|
342
|
-
return text
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def read_manifest(path: Path) -> list[SourceSpec]:
|
|
346
|
-
try:
|
|
347
|
-
raw = path.read_bytes()
|
|
348
|
-
except OSError as exc:
|
|
349
|
-
raise PackError(f"could not read manifest: {exc.strerror or exc.__class__.__name__}") from exc
|
|
350
|
-
if len(raw) > MAX_MANIFEST_BYTES:
|
|
351
|
-
raise PackError(f"manifest exceeds trusted size cap: {len(raw)} > {MAX_MANIFEST_BYTES}")
|
|
352
|
-
try:
|
|
353
|
-
data = json.loads(raw.decode("utf-8"))
|
|
354
|
-
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
|
355
|
-
raise PackError(f"invalid manifest JSON: {exc}") from exc
|
|
356
|
-
version = data.get("version", VERSION) if isinstance(data, dict) else None
|
|
357
|
-
if version != VERSION:
|
|
358
|
-
raise PackError(f"unsupported manifest version: {version}")
|
|
359
|
-
sources = data.get("sources") if isinstance(data, dict) else None
|
|
360
|
-
if not isinstance(sources, list):
|
|
361
|
-
raise PackError("manifest sources must be a list")
|
|
362
|
-
out: list[SourceSpec] = []
|
|
363
|
-
for item in sources:
|
|
364
|
-
if not isinstance(item, dict):
|
|
365
|
-
raise PackError("manifest sources must be objects")
|
|
366
|
-
if "path" not in item:
|
|
367
|
-
raise PackError("manifest source missing path")
|
|
368
|
-
try:
|
|
369
|
-
lines = parse_line_range(item.get("lines"))
|
|
370
|
-
except PackError:
|
|
371
|
-
lines = LineRange(-1, -1)
|
|
372
|
-
out.append(SourceSpec(
|
|
373
|
-
path=str(item.get("path", "")),
|
|
374
|
-
priority=bounded_int(item.get("priority"), 0, -1_000_000, 1_000_000),
|
|
375
|
-
lines=lines,
|
|
376
|
-
label=cap_label(item.get("label")),
|
|
377
|
-
origin="manifest",
|
|
378
|
-
))
|
|
379
|
-
return out
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
def parse_source_spec(raw: str) -> SourceSpec:
|
|
383
|
-
raw = raw.strip()
|
|
384
|
-
if not raw:
|
|
385
|
-
raise PackError("empty --source")
|
|
386
|
-
values: dict[str, str] = {}
|
|
387
|
-
if "=" not in raw.split(",", 1)[0]:
|
|
388
|
-
values["path"] = raw
|
|
389
|
-
else:
|
|
390
|
-
for part in raw.split(","):
|
|
391
|
-
if not part:
|
|
392
|
-
continue
|
|
393
|
-
if "=" not in part:
|
|
394
|
-
raise PackError(f"invalid --source part: {part}")
|
|
395
|
-
key, value = part.split("=", 1)
|
|
396
|
-
values[key.strip()] = value.strip()
|
|
397
|
-
if "path" not in values or not values["path"]:
|
|
398
|
-
raise PackError("--source missing path")
|
|
399
|
-
try:
|
|
400
|
-
lines = parse_line_range(values.get("lines"))
|
|
401
|
-
except PackError:
|
|
402
|
-
lines = LineRange(-1, -1)
|
|
403
|
-
return SourceSpec(
|
|
404
|
-
path=values["path"],
|
|
405
|
-
priority=bounded_int(values.get("priority"), 0, -1_000_000, 1_000_000),
|
|
406
|
-
lines=lines,
|
|
407
|
-
label=cap_label(values.get("label")),
|
|
408
|
-
origin="cli",
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
def normalize_root(raw_root: Path) -> Path:
|
|
413
|
-
expanded = raw_root.expanduser()
|
|
414
|
-
try:
|
|
415
|
-
if expanded.is_symlink():
|
|
416
|
-
raise PackError("root must not be a symlink")
|
|
417
|
-
root = expanded.resolve()
|
|
418
|
-
except OSError as exc:
|
|
419
|
-
raise PackError(f"could not resolve root: {exc.strerror or exc.__class__.__name__}") from exc
|
|
420
|
-
if not root.is_dir():
|
|
421
|
-
raise PackError("root must be a directory")
|
|
422
|
-
return root
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
def omission(spec: SourceSpec, reason: str, *, path: str | None = None, redacted_path: bool = False) -> dict[str, Any]:
|
|
426
|
-
item: dict[str, Any] = {
|
|
427
|
-
"path": path if path is not None else safe_raw_path_label(spec.path),
|
|
428
|
-
"status": "omitted",
|
|
429
|
-
"priority": spec.priority,
|
|
430
|
-
"reason": reason,
|
|
431
|
-
"input_index": spec.input_index,
|
|
432
|
-
}
|
|
433
|
-
if spec.label:
|
|
434
|
-
item["label"] = spec.label
|
|
435
|
-
if spec.lines and spec.lines.start > 0:
|
|
436
|
-
item["requested_lines"] = spec.lines.as_dict()
|
|
437
|
-
if redacted_path:
|
|
438
|
-
item["retrieval_omitted_reason"] = "redacted_path"
|
|
439
|
-
return item
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def safe_raw_path_label(raw: str) -> str:
|
|
443
|
-
text = raw.replace("\\", "/")
|
|
444
|
-
parts = []
|
|
445
|
-
for part in text.split("/"):
|
|
446
|
-
if part in {"", "."}:
|
|
447
|
-
continue
|
|
448
|
-
safe, _ = sanitize_path_component(part)
|
|
449
|
-
parts.append(safe)
|
|
450
|
-
return "/".join(parts) or "path"
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
def lexical_rel(raw_path: str) -> tuple[Path | None, str]:
|
|
454
|
-
path = Path(raw_path)
|
|
455
|
-
if path.is_absolute():
|
|
456
|
-
return None, "outside_root"
|
|
457
|
-
parts = path.parts
|
|
458
|
-
if not parts or any(part in {"..", ""} for part in parts):
|
|
459
|
-
return None, "outside_root"
|
|
460
|
-
cleaned = Path(*[part for part in parts if part != "."])
|
|
461
|
-
if not cleaned.parts:
|
|
462
|
-
return None, "outside_root"
|
|
463
|
-
return cleaned, ""
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
|
|
467
|
-
flags = os.O_RDONLY
|
|
468
|
-
if hasattr(os, "O_DIRECTORY"):
|
|
469
|
-
flags |= os.O_DIRECTORY
|
|
470
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
471
|
-
flags |= os.O_NOFOLLOW
|
|
472
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
473
|
-
flags |= os.O_CLOEXEC
|
|
474
|
-
if dir_fd is None:
|
|
475
|
-
fd = os.open(path, flags)
|
|
476
|
-
else:
|
|
477
|
-
fd = os.open(path, flags, dir_fd=dir_fd)
|
|
478
|
-
try:
|
|
479
|
-
st = os.fstat(fd)
|
|
480
|
-
if not stat.S_ISDIR(st.st_mode):
|
|
481
|
-
raise PackError("not a directory")
|
|
482
|
-
return fd
|
|
483
|
-
except Exception:
|
|
484
|
-
os.close(fd)
|
|
485
|
-
raise
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
def file_open_flags() -> int:
|
|
489
|
-
flags = os.O_RDONLY
|
|
490
|
-
for name in ("O_NOFOLLOW", "O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
|
|
491
|
-
flags |= getattr(os, name, 0)
|
|
492
|
-
return flags
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
def stat_leaf_no_follow(name: str, *, dir_fd: int) -> os.stat_result | None:
|
|
496
|
-
supports_dir_fd = os.stat in getattr(os, "supports_dir_fd", set())
|
|
497
|
-
supports_no_follow = os.stat in getattr(os, "supports_follow_symlinks", set())
|
|
498
|
-
if not supports_dir_fd or not supports_no_follow:
|
|
499
|
-
return None
|
|
500
|
-
return os.stat(name, dir_fd=dir_fd, follow_symlinks=False)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
|
|
504
|
-
current_fd: int | None = None
|
|
505
|
-
try:
|
|
506
|
-
current_fd = open_dir_no_follow(root)
|
|
507
|
-
for index, part in enumerate(rel.parts):
|
|
508
|
-
if part in {"", ".", ".."}:
|
|
509
|
-
return None, "outside_root"
|
|
510
|
-
is_final = index == len(rel.parts) - 1
|
|
511
|
-
if not is_final:
|
|
512
|
-
try:
|
|
513
|
-
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
514
|
-
except FileNotFoundError:
|
|
515
|
-
return None, "missing"
|
|
516
|
-
except NotADirectoryError:
|
|
517
|
-
return None, "missing"
|
|
518
|
-
except OSError:
|
|
519
|
-
return None, "unsafe_path"
|
|
520
|
-
os.close(current_fd)
|
|
521
|
-
current_fd = next_fd
|
|
522
|
-
continue
|
|
523
|
-
try:
|
|
524
|
-
pre_st = stat_leaf_no_follow(part, dir_fd=current_fd)
|
|
525
|
-
except FileNotFoundError:
|
|
526
|
-
return None, "missing"
|
|
527
|
-
except NotADirectoryError:
|
|
528
|
-
return None, "missing"
|
|
529
|
-
except OSError:
|
|
530
|
-
return None, "unsafe_path"
|
|
531
|
-
if pre_st is not None:
|
|
532
|
-
if stat.S_ISLNK(pre_st.st_mode):
|
|
533
|
-
return None, "unsafe_path"
|
|
534
|
-
if not stat.S_ISREG(pre_st.st_mode):
|
|
535
|
-
return None, "empty_source"
|
|
536
|
-
flags = file_open_flags()
|
|
537
|
-
file_fd = -1
|
|
538
|
-
try:
|
|
539
|
-
file_fd = os.open(part, flags, dir_fd=current_fd)
|
|
540
|
-
st = os.fstat(file_fd)
|
|
541
|
-
if not stat.S_ISREG(st.st_mode):
|
|
542
|
-
os.close(file_fd)
|
|
543
|
-
file_fd = -1
|
|
544
|
-
return None, "empty_source"
|
|
545
|
-
handle = os.fdopen(file_fd, "r", encoding="utf-8", errors="replace", newline="")
|
|
546
|
-
file_fd = -1
|
|
547
|
-
return handle, ""
|
|
548
|
-
except FileNotFoundError:
|
|
549
|
-
return None, "missing"
|
|
550
|
-
except IsADirectoryError:
|
|
551
|
-
return None, "empty_source"
|
|
552
|
-
except NotADirectoryError:
|
|
553
|
-
return None, "missing"
|
|
554
|
-
except OSError:
|
|
555
|
-
return None, "unsafe_path"
|
|
556
|
-
finally:
|
|
557
|
-
if file_fd >= 0:
|
|
558
|
-
try:
|
|
559
|
-
os.close(file_fd)
|
|
560
|
-
except OSError:
|
|
561
|
-
pass
|
|
562
|
-
except OSError:
|
|
563
|
-
return None, "unsafe_path"
|
|
564
|
-
finally:
|
|
565
|
-
if current_fd is not None:
|
|
566
|
-
try:
|
|
567
|
-
os.close(current_fd)
|
|
568
|
-
except OSError:
|
|
569
|
-
pass
|
|
570
|
-
return None, "unsafe_path"
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
def resolve_source(root: Path, spec: SourceSpec) -> tuple[ResolvedSource | None, dict[str, Any] | None]:
|
|
574
|
-
if spec.lines is not None and spec.lines.start < 1:
|
|
575
|
-
return None, omission(spec, "invalid_lines")
|
|
576
|
-
rel, reason = lexical_rel(spec.path)
|
|
577
|
-
if rel is None:
|
|
578
|
-
return None, omission(spec, reason)
|
|
579
|
-
display, redacted_path = display_rel_path(rel.as_posix())
|
|
580
|
-
handle, reason = open_regular_under_root(root, rel)
|
|
581
|
-
if handle is None:
|
|
582
|
-
return None, omission(spec, reason, path=display, redacted_path=redacted_path)
|
|
583
|
-
try:
|
|
584
|
-
with handle:
|
|
585
|
-
raw_text = handle.read()
|
|
586
|
-
except OSError:
|
|
587
|
-
return None, omission(spec, "unsafe_path", path=display, redacted_path=redacted_path)
|
|
588
|
-
sanitized, redacted_lines = sanitize_text(raw_text)
|
|
589
|
-
all_lines = sanitized.splitlines(True)
|
|
590
|
-
if not all_lines:
|
|
591
|
-
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
592
|
-
total_lines = len(all_lines)
|
|
593
|
-
requested = spec.lines or LineRange(1, total_lines)
|
|
594
|
-
if requested.start > total_lines:
|
|
595
|
-
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
596
|
-
end = min(requested.end, total_lines)
|
|
597
|
-
selected = all_lines[requested.start - 1:end]
|
|
598
|
-
if not selected:
|
|
599
|
-
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
600
|
-
return ResolvedSource(
|
|
601
|
-
spec=spec,
|
|
602
|
-
abs_path=root / rel,
|
|
603
|
-
display_path=display,
|
|
604
|
-
redacted_path=redacted_path,
|
|
605
|
-
requested_lines=requested,
|
|
606
|
-
selected_lines=selected,
|
|
607
|
-
total_lines=total_lines,
|
|
608
|
-
redacted_lines=redacted_lines,
|
|
609
|
-
), None
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
def retrieval_cli(root_arg: str, display_path: str, lines: LineRange) -> str:
|
|
613
|
-
return (
|
|
614
|
-
f"context-guard-pack slice --root {shlex.quote(root_arg)} "
|
|
615
|
-
f"--path {shlex.quote(display_path)} --lines {lines.start}:{lines.end} --json"
|
|
616
|
-
)
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
def safe_root_arg_for_retrieval(root_arg: str) -> str | None:
|
|
620
|
-
text = str(root_arg)
|
|
621
|
-
if CONTROL_CHAR_RE.search(text) or SECRET_CONTENT_RE.search(text) or SECRET_PATH_COMPONENT_RE.search(text):
|
|
622
|
-
return None
|
|
623
|
-
for part in text.replace("\\", "/").split("/"):
|
|
624
|
-
if not part:
|
|
625
|
-
continue
|
|
626
|
-
_safe, redacted = sanitize_path_component(part)
|
|
627
|
-
if redacted:
|
|
628
|
-
return None
|
|
629
|
-
return text
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
def safe_repo_map_root_arg_for_retrieval(root_arg: str) -> str | None:
|
|
633
|
-
text = str(root_arg)
|
|
634
|
-
if repo_map_path_has_sensitive_evidence(text):
|
|
635
|
-
return None
|
|
636
|
-
return safe_root_arg_for_retrieval(text)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
def retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacted_path: bool) -> tuple[str | None, str | None]:
|
|
640
|
-
if redacted_path:
|
|
641
|
-
return None, "redacted_path"
|
|
642
|
-
safe_root = safe_root_arg_for_retrieval(root_arg)
|
|
643
|
-
if safe_root is None:
|
|
644
|
-
return None, "unsafe_root_path"
|
|
645
|
-
return retrieval_cli(safe_root, display_path, lines), None
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, status: str, included: LineRange) -> str:
|
|
649
|
-
title = source.spec.label or source.display_path
|
|
650
|
-
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
651
|
-
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
652
|
-
header = [
|
|
653
|
-
f"## {title}",
|
|
654
|
-
f"Source: `{source.display_path}`",
|
|
655
|
-
f"Priority: {source.spec.priority}",
|
|
656
|
-
f"Status: {status}",
|
|
657
|
-
f"Included lines: {included.start}:{included.end}",
|
|
658
|
-
f"Requested lines: {requested.start}:{requested.end}",
|
|
659
|
-
]
|
|
660
|
-
if retrieval:
|
|
661
|
-
header.append(f"Retrieval: `{retrieval}`")
|
|
662
|
-
elif retrieval_omitted_reason:
|
|
663
|
-
header.append(f"Retrieval omitted: {retrieval_omitted_reason}")
|
|
664
|
-
return "\n".join(header) + "\n\n```text\n" + "".join(lines) + ("" if not lines or lines[-1].endswith("\n") else "\n") + "```\n\n"
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
def source_metadata(source: ResolvedSource, *, status: str, lines: list[str], included: LineRange, root_arg: str) -> dict[str, Any]:
|
|
668
|
-
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
669
|
-
item: dict[str, Any] = {
|
|
670
|
-
"path": source.display_path,
|
|
671
|
-
"status": status,
|
|
672
|
-
"priority": source.spec.priority,
|
|
673
|
-
"input_index": source.spec.input_index,
|
|
674
|
-
"requested_lines": requested.as_dict(),
|
|
675
|
-
"included_lines": included.as_dict(),
|
|
676
|
-
"bytes": byte_len("".join(lines)),
|
|
677
|
-
}
|
|
678
|
-
if source.spec.label:
|
|
679
|
-
item["label"] = source.spec.label
|
|
680
|
-
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
681
|
-
if retrieval:
|
|
682
|
-
item["retrieval_cli"] = retrieval
|
|
683
|
-
elif retrieval_omitted_reason:
|
|
684
|
-
item["retrieval_omitted_reason"] = retrieval_omitted_reason
|
|
685
|
-
if status == "partial":
|
|
686
|
-
item["reason"] = "budget_exhausted"
|
|
687
|
-
return item
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
def budget_omission(source: ResolvedSource, *, root_arg: str) -> dict[str, Any]:
|
|
691
|
-
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
692
|
-
item = omission(source.spec, "budget_exhausted", path=source.display_path, redacted_path=source.redacted_path)
|
|
693
|
-
item["requested_lines"] = requested.as_dict()
|
|
694
|
-
item["total_lines"] = source.total_lines
|
|
695
|
-
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, requested, redacted_path=source.redacted_path)
|
|
696
|
-
if retrieval:
|
|
697
|
-
item["retrieval_cli"] = retrieval
|
|
698
|
-
item.pop("retrieval_omitted_reason", None)
|
|
699
|
-
elif retrieval_omitted_reason:
|
|
700
|
-
item["retrieval_omitted_reason"] = retrieval_omitted_reason
|
|
701
|
-
return item
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
def fit_partial_lines(source: ResolvedSource, remaining: int, *, root_arg: str) -> tuple[list[str], str | None, LineRange | None]:
|
|
705
|
-
if remaining <= 0:
|
|
706
|
-
return [], None, None
|
|
707
|
-
picked: list[str] = []
|
|
708
|
-
for line in source.selected_lines:
|
|
709
|
-
candidate = picked + [line]
|
|
710
|
-
included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(candidate) - 1)
|
|
711
|
-
block = render_block(source, candidate, root_arg=root_arg, status="partial", included=included)
|
|
712
|
-
if byte_len(block) <= remaining:
|
|
713
|
-
picked = candidate
|
|
714
|
-
else:
|
|
715
|
-
break
|
|
716
|
-
if not picked:
|
|
717
|
-
return [], None, None
|
|
718
|
-
included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(picked) - 1)
|
|
719
|
-
return picked, render_block(source, picked, root_arg=root_arg, status="partial", included=included), included
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def metadata_size(data: dict[str, Any]) -> int:
|
|
723
|
-
return len(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True).encode("utf-8", errors="replace")) + 1
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
def artifact_failure(error: str, *, bytes_count: int = 0, capped: bool = False) -> dict[str, Any]:
|
|
727
|
-
return {
|
|
728
|
-
"stored": False,
|
|
729
|
-
"path": None,
|
|
730
|
-
"bytes": bytes_count,
|
|
731
|
-
"capped": capped,
|
|
732
|
-
"error": error,
|
|
733
|
-
"cap_bytes": MAX_RECEIPT_BYTES,
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
def ensure_private_pack_dir(root: Path) -> tuple[Path | None, int | None, str | None]:
|
|
738
|
-
"""Create/verify the receipt directory by walking from a no-follow root fd."""
|
|
739
|
-
current_fd: int | None = None
|
|
740
|
-
try:
|
|
741
|
-
current_fd = open_dir_no_follow(root)
|
|
742
|
-
for part in (".context-guard", "packs"):
|
|
743
|
-
while True:
|
|
744
|
-
try:
|
|
745
|
-
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
746
|
-
break
|
|
747
|
-
except FileNotFoundError:
|
|
748
|
-
try:
|
|
749
|
-
os.mkdir(part, 0o700, dir_fd=current_fd)
|
|
750
|
-
except FileExistsError:
|
|
751
|
-
continue
|
|
752
|
-
except (OSError, NotImplementedError):
|
|
753
|
-
return None, None, "artifact_dir_unavailable"
|
|
754
|
-
except NotADirectoryError:
|
|
755
|
-
return None, None, "unsafe_artifact_dir"
|
|
756
|
-
except (OSError, NotImplementedError):
|
|
757
|
-
return None, None, "unsafe_artifact_dir"
|
|
758
|
-
try:
|
|
759
|
-
os.fchmod(next_fd, 0o700)
|
|
760
|
-
except (AttributeError, OSError):
|
|
761
|
-
pass
|
|
762
|
-
os.close(current_fd)
|
|
763
|
-
current_fd = next_fd
|
|
764
|
-
dir_fd = current_fd
|
|
765
|
-
current_fd = None
|
|
766
|
-
return root / PACK_DIR, dir_fd, None
|
|
767
|
-
except OSError:
|
|
768
|
-
return None, None, "unsafe_artifact_dir"
|
|
769
|
-
finally:
|
|
770
|
-
if current_fd is not None:
|
|
771
|
-
try:
|
|
772
|
-
os.close(current_fd)
|
|
773
|
-
except OSError:
|
|
774
|
-
pass
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
def atomic_write_ops_supported() -> bool:
|
|
778
|
-
return (
|
|
779
|
-
os.open in os.supports_dir_fd
|
|
780
|
-
and os.rename in os.supports_dir_fd
|
|
781
|
-
and os.unlink in os.supports_dir_fd
|
|
782
|
-
)
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
def fsync_dir_fd(dir_fd: int) -> None:
|
|
786
|
-
os.fsync(dir_fd)
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
def validate_existing_output_target_at(dir_fd: int, filename: str, option_name: str) -> None:
|
|
790
|
-
flags = os.O_WRONLY
|
|
791
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
792
|
-
flags |= os.O_NOFOLLOW
|
|
793
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
794
|
-
flags |= os.O_CLOEXEC
|
|
795
|
-
if hasattr(os, "O_NONBLOCK"):
|
|
796
|
-
flags |= os.O_NONBLOCK
|
|
797
|
-
file_fd = -1
|
|
798
|
-
try:
|
|
799
|
-
file_fd = os.open(filename, flags, dir_fd=dir_fd)
|
|
800
|
-
st = os.fstat(file_fd)
|
|
801
|
-
if not stat.S_ISREG(st.st_mode):
|
|
802
|
-
raise PackError(f"invalid {option_name}: unsafe_path")
|
|
803
|
-
except FileNotFoundError:
|
|
804
|
-
return
|
|
805
|
-
except IsADirectoryError as exc:
|
|
806
|
-
raise PackError(f"invalid {option_name}: unsafe_path") from exc
|
|
807
|
-
except OSError as exc:
|
|
808
|
-
raise PackError(f"invalid {option_name}: {exc.strerror or exc.__class__.__name__}") from exc
|
|
809
|
-
finally:
|
|
810
|
-
if file_fd >= 0:
|
|
811
|
-
try:
|
|
812
|
-
os.close(file_fd)
|
|
813
|
-
except OSError:
|
|
814
|
-
pass
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
def write_text_atomic_at(dir_fd: int, filename: str, content: str, *, mode: int, option_name: str) -> None:
|
|
818
|
-
if "/" in filename or filename in {"", ".", ".."}:
|
|
819
|
-
raise PackError(f"invalid {option_name}: unsafe_path")
|
|
820
|
-
if not atomic_write_ops_supported():
|
|
821
|
-
raise PackError(f"invalid {option_name}: atomic_write_unsupported")
|
|
822
|
-
validate_existing_output_target_at(dir_fd, filename, option_name)
|
|
823
|
-
digest = hashlib.sha256(f"{filename}:{os.getpid()}:{time.time_ns()}".encode("utf-8", "replace")).hexdigest()[:16]
|
|
824
|
-
temp_name = f".context-guard-pack-{digest}.tmp"
|
|
825
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
|
|
826
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
827
|
-
flags |= os.O_NOFOLLOW
|
|
828
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
829
|
-
flags |= os.O_CLOEXEC
|
|
830
|
-
fd = -1
|
|
831
|
-
temp_created = False
|
|
832
|
-
try:
|
|
833
|
-
fd = os.open(temp_name, flags, mode, dir_fd=dir_fd)
|
|
834
|
-
temp_created = True
|
|
835
|
-
with os.fdopen(fd, "w", encoding="utf-8", newline="") as handle:
|
|
836
|
-
fd = -1
|
|
837
|
-
handle.write(content)
|
|
838
|
-
handle.flush()
|
|
839
|
-
os.fsync(handle.fileno())
|
|
840
|
-
fsync_dir_fd(dir_fd)
|
|
841
|
-
os.rename(temp_name, filename, src_dir_fd=dir_fd, dst_dir_fd=dir_fd)
|
|
842
|
-
temp_created = False
|
|
843
|
-
try:
|
|
844
|
-
os.chmod(filename, mode, dir_fd=dir_fd, follow_symlinks=False)
|
|
845
|
-
except (OSError, TypeError, NotImplementedError):
|
|
846
|
-
pass
|
|
847
|
-
fsync_dir_fd(dir_fd)
|
|
848
|
-
finally:
|
|
849
|
-
if fd >= 0:
|
|
850
|
-
try:
|
|
851
|
-
os.close(fd)
|
|
852
|
-
except OSError:
|
|
853
|
-
pass
|
|
854
|
-
if temp_created:
|
|
855
|
-
try:
|
|
856
|
-
os.unlink(temp_name, dir_fd=dir_fd)
|
|
857
|
-
except OSError:
|
|
858
|
-
pass
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
def write_private_json_at(dir_fd: int, filename: str, data: dict[str, Any]) -> None:
|
|
862
|
-
if "/" in filename or filename in {"", ".", ".."}:
|
|
863
|
-
raise PackError("unsafe_artifact_path")
|
|
864
|
-
content = json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
865
|
-
write_text_atomic_at(dir_fd, filename, content, mode=0o600, option_name="artifact receipt")
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
def finalize_receipt_size(receipt: dict[str, Any]) -> int:
|
|
869
|
-
artifact = receipt.setdefault("artifact", {})
|
|
870
|
-
size = metadata_size(receipt)
|
|
871
|
-
for _ in range(4):
|
|
872
|
-
artifact["bytes"] = size
|
|
873
|
-
next_size = metadata_size(receipt)
|
|
874
|
-
if next_size == size:
|
|
875
|
-
return size
|
|
876
|
-
size = next_size
|
|
877
|
-
artifact["bytes"] = size
|
|
878
|
-
return metadata_size(receipt)
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
def shrink_receipt_for_write(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
|
882
|
-
receipt = copy.deepcopy(data)
|
|
883
|
-
capped = False
|
|
884
|
-
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
885
|
-
return receipt, capped
|
|
886
|
-
capped = True
|
|
887
|
-
receipt.setdefault("artifact", {})["capped"] = True
|
|
888
|
-
receipt.setdefault("artifact", {})["cap_bytes"] = MAX_RECEIPT_BYTES
|
|
889
|
-
for item in receipt.get("omitted_sources", []):
|
|
890
|
-
if isinstance(item, dict):
|
|
891
|
-
item.pop("preview", None)
|
|
892
|
-
if "label" in item:
|
|
893
|
-
item["label"] = cap_label(item.get("label"), limit=80)
|
|
894
|
-
if "reason" in item:
|
|
895
|
-
item["reason"] = cap_label(item.get("reason"), default=str(item.get("reason")), limit=MAX_REASON_CHARS)
|
|
896
|
-
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
897
|
-
return receipt, capped
|
|
898
|
-
for item in receipt.get("included_sources", []):
|
|
899
|
-
if isinstance(item, dict):
|
|
900
|
-
item.pop("preview", None)
|
|
901
|
-
if "label" in item:
|
|
902
|
-
item["label"] = cap_label(item.get("label"), limit=80)
|
|
903
|
-
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
904
|
-
return receipt, capped
|
|
905
|
-
# The stdout payload remains authoritative for the full pack body. Receipts may omit it to stay readable.
|
|
906
|
-
receipt["pack_omitted_from_receipt"] = True
|
|
907
|
-
receipt.pop("pack", None)
|
|
908
|
-
return receipt, capped
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
def store_receipt(root: Path, result: dict[str, Any]) -> dict[str, Any]:
|
|
912
|
-
out_dir, dir_fd, dir_error = ensure_private_pack_dir(root)
|
|
913
|
-
if out_dir is None or dir_fd is None:
|
|
914
|
-
return artifact_failure(dir_error or "unsafe_artifact_dir")
|
|
915
|
-
size = 0
|
|
916
|
-
capped = False
|
|
917
|
-
try:
|
|
918
|
-
receipt, capped = shrink_receipt_for_write(result)
|
|
919
|
-
size = metadata_size(receipt)
|
|
920
|
-
if size > MAX_RECEIPT_BYTES:
|
|
921
|
-
return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
|
|
922
|
-
pack_id = str(result["pack_id"])
|
|
923
|
-
filename = f"{pack_id}.json"
|
|
924
|
-
receipt.setdefault("artifact", {})["stored"] = True
|
|
925
|
-
receipt.setdefault("artifact", {})["path"] = f"{PACK_DIR}/{pack_id}.json"
|
|
926
|
-
receipt.setdefault("artifact", {})["capped"] = capped
|
|
927
|
-
size = finalize_receipt_size(receipt)
|
|
928
|
-
if size > MAX_RECEIPT_BYTES:
|
|
929
|
-
return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
|
|
930
|
-
write_private_json_at(dir_fd, filename, receipt)
|
|
931
|
-
except (OSError, PackError, NotImplementedError):
|
|
932
|
-
return artifact_failure("artifact_write_failed", bytes_count=size, capped=capped)
|
|
933
|
-
finally:
|
|
934
|
-
try:
|
|
935
|
-
os.close(dir_fd)
|
|
936
|
-
except OSError:
|
|
937
|
-
pass
|
|
938
|
-
return {
|
|
939
|
-
"stored": True,
|
|
940
|
-
"path": f"{PACK_DIR}/{pack_id}.json",
|
|
941
|
-
"bytes": size,
|
|
942
|
-
"capped": capped,
|
|
943
|
-
"cap_bytes": MAX_RECEIPT_BYTES,
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
def build_pack(root: Path, specs: list[SourceSpec], *, budget_bytes: int, root_arg: str, store_artifact: bool) -> dict[str, Any]:
|
|
948
|
-
seen: set[tuple[str, str]] = set()
|
|
949
|
-
resolved: list[ResolvedSource] = []
|
|
950
|
-
omitted: list[dict[str, Any]] = []
|
|
951
|
-
canonical_specs: list[dict[str, Any]] = []
|
|
952
|
-
for spec in specs:
|
|
953
|
-
rel, reason = lexical_rel(spec.path)
|
|
954
|
-
if spec.lines is not None and spec.lines.start < 1:
|
|
955
|
-
omitted_item = omission(spec, "invalid_lines")
|
|
956
|
-
omitted.append(omitted_item)
|
|
957
|
-
canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": "invalid", "status": "invalid_lines"})
|
|
958
|
-
continue
|
|
959
|
-
if rel is not None and spec.lines is not None and spec.lines.start > 0:
|
|
960
|
-
identity_lines = spec.lines.identity()
|
|
961
|
-
elif rel is not None:
|
|
962
|
-
identity_lines = "all"
|
|
963
|
-
else:
|
|
964
|
-
identity_lines = "invalid"
|
|
965
|
-
identity = (rel.as_posix() if rel is not None else spec.path, identity_lines)
|
|
966
|
-
if rel is not None and identity in seen:
|
|
967
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
968
|
-
omitted.append(omission(spec, "duplicate_source", path=display, redacted_path=redacted))
|
|
969
|
-
canonical_specs.append({"path": display, "priority": spec.priority, "lines": identity_lines, "status": "duplicate_source"})
|
|
970
|
-
continue
|
|
971
|
-
if rel is not None:
|
|
972
|
-
seen.add(identity)
|
|
973
|
-
source, omitted_item = resolve_source(root, spec)
|
|
974
|
-
if omitted_item is not None:
|
|
975
|
-
omitted.append(omitted_item)
|
|
976
|
-
canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": identity_lines, "status": omitted_item.get("reason")})
|
|
977
|
-
continue
|
|
978
|
-
assert source is not None
|
|
979
|
-
resolved.append(source)
|
|
980
|
-
canonical_specs.append({"path": source.display_path, "priority": spec.priority, "lines": identity_lines, "status": "candidate"})
|
|
981
|
-
resolved.sort(key=lambda item: (-item.spec.priority, item.spec.input_index, item.display_path))
|
|
982
|
-
header = "# Context Pack\n\nGenerated by context-guard-pack. Token counts are estimated proxies; byte counts are observed.\n\n"
|
|
983
|
-
parts: list[str] = []
|
|
984
|
-
included: list[dict[str, Any]] = []
|
|
985
|
-
current_pack_bytes = 0
|
|
986
|
-
header_bytes = byte_len(header)
|
|
987
|
-
if header_bytes <= budget_bytes:
|
|
988
|
-
parts.append(header)
|
|
989
|
-
current_pack_bytes += header_bytes
|
|
990
|
-
for source in resolved:
|
|
991
|
-
start_line = source.requested_lines.start if source.requested_lines else 1
|
|
992
|
-
included_range = LineRange(start_line, start_line + len(source.selected_lines) - 1)
|
|
993
|
-
full_block = render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included_range)
|
|
994
|
-
full_block_bytes = byte_len(full_block)
|
|
995
|
-
remaining = budget_bytes - current_pack_bytes
|
|
996
|
-
if full_block_bytes <= remaining:
|
|
997
|
-
parts.append(full_block)
|
|
998
|
-
current_pack_bytes += full_block_bytes
|
|
999
|
-
included.append(source_metadata(source, status="included", lines=source.selected_lines, included=included_range, root_arg=root_arg))
|
|
1000
|
-
continue
|
|
1001
|
-
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
1002
|
-
if partial_block is not None and partial_range is not None:
|
|
1003
|
-
parts.append(partial_block)
|
|
1004
|
-
current_pack_bytes += byte_len(partial_block)
|
|
1005
|
-
included.append(source_metadata(source, status="partial", lines=partial_lines, included=partial_range, root_arg=root_arg))
|
|
1006
|
-
else:
|
|
1007
|
-
omitted.append(budget_omission(source, root_arg=root_arg))
|
|
1008
|
-
pack = "".join(parts)
|
|
1009
|
-
pack_bytes = current_pack_bytes
|
|
1010
|
-
redacted_lines = sum(source.redacted_lines for source in resolved)
|
|
1011
|
-
partial_count = sum(1 for item in included if item.get("status") == "partial")
|
|
1012
|
-
omitted_sorted = sorted(omitted, key=lambda item: (item.get("input_index", 0), str(item.get("path", "")), str(item.get("reason", ""))))
|
|
1013
|
-
canonical = {
|
|
1014
|
-
"version": VERSION,
|
|
1015
|
-
"root": display_root(root),
|
|
1016
|
-
"budget_bytes": budget_bytes,
|
|
1017
|
-
"sources": canonical_specs,
|
|
1018
|
-
"pack_sha256": sha256_text(pack),
|
|
1019
|
-
"omission_summary": sorted({str(item.get("reason")) for item in omitted_sorted}),
|
|
1020
|
-
}
|
|
1021
|
-
pack_id = hashlib.sha256(json.dumps(canonical, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")).hexdigest()[:20]
|
|
1022
|
-
result: dict[str, Any] = {
|
|
1023
|
-
"tool": TOOL_NAME,
|
|
1024
|
-
"version": VERSION,
|
|
1025
|
-
"pack_id": pack_id,
|
|
1026
|
-
"root": display_root(root),
|
|
1027
|
-
"budget_bytes": budget_bytes,
|
|
1028
|
-
"pack_bytes": pack_bytes,
|
|
1029
|
-
"pack": pack,
|
|
1030
|
-
"token_proxy": {"measurement": "estimated", "method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}", "pack": token_proxy(pack)},
|
|
1031
|
-
"sources": {"total": len(specs), "included": len(included) - partial_count, "partial": partial_count, "omitted": len(omitted_sorted)},
|
|
1032
|
-
"included_sources": included,
|
|
1033
|
-
"omitted_sources": omitted_sorted,
|
|
1034
|
-
"redaction": {"redacted_lines": redacted_lines, "redacted_before_pack": True},
|
|
1035
|
-
"artifact": {"stored": False, "path": None, "bytes": 0, "capped": False, "cap_bytes": MAX_RECEIPT_BYTES},
|
|
1036
|
-
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
1037
|
-
}
|
|
1038
|
-
if store_artifact:
|
|
1039
|
-
artifact = store_receipt(root, result)
|
|
1040
|
-
result["artifact"] = artifact
|
|
1041
|
-
return result
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
def parse_all_sources(args: argparse.Namespace) -> list[SourceSpec]:
|
|
1045
|
-
specs: list[SourceSpec] = []
|
|
1046
|
-
if args.manifest:
|
|
1047
|
-
specs.extend(read_manifest(Path(args.manifest)))
|
|
1048
|
-
for raw in args.source or []:
|
|
1049
|
-
specs.append(parse_source_spec(raw))
|
|
1050
|
-
for index, spec in enumerate(specs):
|
|
1051
|
-
spec.input_index = index
|
|
1052
|
-
return specs
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
def slice_source(root: Path, *, raw_path: str, lines: LineRange) -> tuple[dict[str, Any], int]:
|
|
1056
|
-
spec = SourceSpec(path=raw_path, lines=lines)
|
|
1057
|
-
source, omitted_item = resolve_source(root, spec)
|
|
1058
|
-
if omitted_item is not None:
|
|
1059
|
-
payload = {"tool": TOOL_NAME, "status": "error", "reason": omitted_item.get("reason"), "path": omitted_item.get("path")}
|
|
1060
|
-
return payload, 1
|
|
1061
|
-
assert source is not None
|
|
1062
|
-
content = "".join(source.selected_lines)
|
|
1063
|
-
payload = {
|
|
1064
|
-
"tool": TOOL_NAME,
|
|
1065
|
-
"version": VERSION,
|
|
1066
|
-
"status": "ok",
|
|
1067
|
-
"path": source.display_path,
|
|
1068
|
-
"query": {"type": "lines", "start": lines.start, "end": min(lines.end, source.total_lines), "returned_lines": len(source.selected_lines)},
|
|
1069
|
-
"content": content,
|
|
1070
|
-
"bytes": byte_len(content),
|
|
1071
|
-
"redaction": {"redacted_lines": source.redacted_lines, "redacted_before_pack": True},
|
|
1072
|
-
}
|
|
1073
|
-
return payload, 0
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
def suggest_tokens(text: str) -> set[str]:
|
|
1077
|
-
sanitized = SECRET_CONTENT_RE.sub(" ", text.lower())
|
|
1078
|
-
return {part for part in re.findall(r"[a-z0-9_][a-z0-9_.-]{1,}", sanitized) if len(part) >= 2}
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
def suggest_score_path(path: str, query_terms: set[str]) -> int:
|
|
1082
|
-
lowered = path.lower()
|
|
1083
|
-
score = 0
|
|
1084
|
-
for term in query_terms:
|
|
1085
|
-
if term in lowered:
|
|
1086
|
-
score += 120
|
|
1087
|
-
return score
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
def suggest_reason(*parts: str) -> str:
|
|
1091
|
-
return cap_label("; ".join(part for part in parts if part), default="local heuristic", limit=MAX_REASON_CHARS) or "local heuristic"
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
def split_suggest_files(values: list[str] | None) -> list[str]:
|
|
1095
|
-
out: list[str] = []
|
|
1096
|
-
for value in values or []:
|
|
1097
|
-
for part in str(value).split(","):
|
|
1098
|
-
text = part.strip()
|
|
1099
|
-
if text:
|
|
1100
|
-
out.append(text)
|
|
1101
|
-
return out
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
def line_window(line_number: int, total_lines: int | None, context_lines: int) -> LineRange:
|
|
1105
|
-
start = max(1, line_number - context_lines)
|
|
1106
|
-
if total_lines is None:
|
|
1107
|
-
end = max(start, line_number + context_lines)
|
|
1108
|
-
else:
|
|
1109
|
-
end = min(max(start, line_number + context_lines), max(1, total_lines))
|
|
1110
|
-
return LineRange(start, end)
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
def merge_line_window(existing: LineRange | None, line_number: int, context_lines: int) -> LineRange:
|
|
1114
|
-
window = line_window(line_number, None, context_lines)
|
|
1115
|
-
if existing is None:
|
|
1116
|
-
return window
|
|
1117
|
-
return LineRange(min(existing.start, window.start), max(existing.end, window.end))
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
def add_suggest_candidate(
|
|
1121
|
-
candidates: list[SuggestCandidate],
|
|
1122
|
-
*,
|
|
1123
|
-
path: str,
|
|
1124
|
-
score: int,
|
|
1125
|
-
reason: str,
|
|
1126
|
-
lines: LineRange | None = None,
|
|
1127
|
-
label: str | None = None,
|
|
1128
|
-
) -> None:
|
|
1129
|
-
candidates.append(
|
|
1130
|
-
SuggestCandidate(
|
|
1131
|
-
path=path,
|
|
1132
|
-
score=score,
|
|
1133
|
-
reason=suggest_reason(reason),
|
|
1134
|
-
lines=lines,
|
|
1135
|
-
label=cap_label(label),
|
|
1136
|
-
input_index=len(candidates),
|
|
1137
|
-
)
|
|
1138
|
-
)
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
def run_git_diff(root: Path, diff_ref: str) -> str:
|
|
1142
|
-
ref = diff_ref.strip()
|
|
1143
|
-
if not ref:
|
|
1144
|
-
raise PackError("empty --diff")
|
|
1145
|
-
command = ["git", "-C", str(root), "diff", "--no-ext-diff", "--no-textconv", "--unified=3"]
|
|
1146
|
-
if ref in {"staged", "--staged", "cached", "--cached"}:
|
|
1147
|
-
command.extend(["--cached"])
|
|
1148
|
-
elif ref in {"worktree", "unstaged", "working-tree"}:
|
|
1149
|
-
pass
|
|
1150
|
-
elif ref.startswith("-"):
|
|
1151
|
-
raise PackError("invalid --diff: revision must not start with '-'")
|
|
1152
|
-
else:
|
|
1153
|
-
command.append(ref)
|
|
1154
|
-
try:
|
|
1155
|
-
proc = subprocess.run(command, text=True, errors="replace", capture_output=True, timeout=10, check=False)
|
|
1156
|
-
except (OSError, UnicodeError, subprocess.TimeoutExpired) as exc:
|
|
1157
|
-
raise PackError(f"could not read diff: {exc.__class__.__name__}") from exc
|
|
1158
|
-
if proc.returncode != 0:
|
|
1159
|
-
detail = sanitize_text(proc.stderr or proc.stdout or "git diff failed")[0].strip().splitlines()
|
|
1160
|
-
message = detail[0] if detail else "git diff failed"
|
|
1161
|
-
raise PackError(f"could not read diff: {cap_label(message, default='git diff failed', limit=160)}")
|
|
1162
|
-
return sanitize_text(proc.stdout[:MAX_SUGGEST_INPUT_BYTES])[0]
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
def collect_diff_candidates(root: Path, diff_ref: str, query_terms: set[str], context_lines: int) -> list[SuggestCandidate]:
|
|
1166
|
-
diff_text = run_git_diff(root, diff_ref)
|
|
1167
|
-
candidates: list[SuggestCandidate] = []
|
|
1168
|
-
current_path: str | None = None
|
|
1169
|
-
hunk_re = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
|
|
1170
|
-
for line in diff_text.splitlines():
|
|
1171
|
-
if line.startswith("diff --git "):
|
|
1172
|
-
match = re.match(r"^diff --git a/(.+?) b/(.+)$", line)
|
|
1173
|
-
current_path = None
|
|
1174
|
-
if match:
|
|
1175
|
-
left, right = match.groups()
|
|
1176
|
-
current_path = right if right != "/dev/null" else left
|
|
1177
|
-
continue
|
|
1178
|
-
if current_path is None:
|
|
1179
|
-
continue
|
|
1180
|
-
hunk = hunk_re.match(line)
|
|
1181
|
-
if hunk:
|
|
1182
|
-
start = int(hunk.group(1))
|
|
1183
|
-
count = int(hunk.group(2) or "1")
|
|
1184
|
-
end_line = max(start, start + max(1, count) - 1)
|
|
1185
|
-
start_line = max(1, start - context_lines)
|
|
1186
|
-
window = LineRange(start_line, max(start_line, end_line + context_lines))
|
|
1187
|
-
score = 7_000 + suggest_score_path(current_path, query_terms)
|
|
1188
|
-
add_suggest_candidate(
|
|
1189
|
-
candidates,
|
|
1190
|
-
path=current_path,
|
|
1191
|
-
score=score,
|
|
1192
|
-
reason="changed diff hunk",
|
|
1193
|
-
lines=window,
|
|
1194
|
-
label=f"diff:{safe_raw_path_label(current_path)}",
|
|
1195
|
-
)
|
|
1196
|
-
return candidates
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
OUTPUT_PATH_RE = re.compile(
|
|
1200
|
-
r"(?<![A-Za-z0-9_./-])"
|
|
1201
|
-
r"(?P<path>(?:\.\/)?(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\."
|
|
1202
|
-
r"(?:py|js|jsx|ts|tsx|mjs|cjs|md|json|yml|yaml|toml|sh|css|html|txt|rb|go|rs|java|kt|swift|c|cc|cpp|h|hpp))"
|
|
1203
|
-
r"(?::(?P<line>\d+))?"
|
|
1204
|
-
)
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
def read_text_input_under_root(root: Path, raw_path: str) -> tuple[str | None, dict[str, Any] | None]:
|
|
1208
|
-
rel, reason = lexical_rel(raw_path)
|
|
1209
|
-
display = safe_raw_path_label(raw_path)
|
|
1210
|
-
if rel is None:
|
|
1211
|
-
return None, {"path": display, "status": "omitted", "reason": reason}
|
|
1212
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
1213
|
-
if redacted:
|
|
1214
|
-
return None, {"path": display, "status": "omitted", "reason": "redacted_path", "retrieval_omitted_reason": "redacted_path"}
|
|
1215
|
-
handle, reason = open_regular_under_root(root, rel)
|
|
1216
|
-
if handle is None:
|
|
1217
|
-
return None, {"path": display, "status": "omitted", "reason": reason}
|
|
1218
|
-
try:
|
|
1219
|
-
with handle:
|
|
1220
|
-
text = handle.read(MAX_SUGGEST_INPUT_BYTES + 1)
|
|
1221
|
-
except (OSError, UnicodeError):
|
|
1222
|
-
return None, {"path": display, "status": "omitted", "reason": "unsafe_path"}
|
|
1223
|
-
if len(text.encode("utf-8", errors="replace")) > MAX_SUGGEST_INPUT_BYTES:
|
|
1224
|
-
text = text[:MAX_SUGGEST_INPUT_BYTES]
|
|
1225
|
-
sanitized, _redacted = sanitize_text(text)
|
|
1226
|
-
return sanitized, None
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
def collect_output_candidates(
|
|
1230
|
-
root: Path,
|
|
1231
|
-
raw_paths: list[str] | None,
|
|
1232
|
-
query_terms: set[str],
|
|
1233
|
-
context_lines: int,
|
|
1234
|
-
*,
|
|
1235
|
-
origin: str,
|
|
1236
|
-
) -> tuple[list[SuggestCandidate], list[dict[str, Any]]]:
|
|
1237
|
-
candidates: list[SuggestCandidate] = []
|
|
1238
|
-
omitted: list[dict[str, Any]] = []
|
|
1239
|
-
for raw in raw_paths or []:
|
|
1240
|
-
text, omission_item = read_text_input_under_root(root, raw)
|
|
1241
|
-
if omission_item is not None:
|
|
1242
|
-
omission_item["origin"] = origin
|
|
1243
|
-
omitted.append(omission_item)
|
|
1244
|
-
continue
|
|
1245
|
-
assert text is not None
|
|
1246
|
-
by_path: dict[str, LineRange | None] = {}
|
|
1247
|
-
for match in OUTPUT_PATH_RE.finditer(text):
|
|
1248
|
-
path = match.group("path")
|
|
1249
|
-
if path.startswith("./"):
|
|
1250
|
-
path = path[2:]
|
|
1251
|
-
line_text = match.group("line")
|
|
1252
|
-
if line_text:
|
|
1253
|
-
try:
|
|
1254
|
-
line_number = int(line_text)
|
|
1255
|
-
except ValueError:
|
|
1256
|
-
line_number = 1
|
|
1257
|
-
by_path[path] = merge_line_window(by_path.get(path), line_number, context_lines)
|
|
1258
|
-
else:
|
|
1259
|
-
by_path.setdefault(path, None)
|
|
1260
|
-
for path, lines in sorted(by_path.items()):
|
|
1261
|
-
score = 5_000 + suggest_score_path(path, query_terms)
|
|
1262
|
-
add_suggest_candidate(
|
|
1263
|
-
candidates,
|
|
1264
|
-
path=path,
|
|
1265
|
-
score=score,
|
|
1266
|
-
reason=f"{origin} referenced path",
|
|
1267
|
-
lines=lines,
|
|
1268
|
-
label=f"{origin}:{safe_raw_path_label(path)}",
|
|
1269
|
-
)
|
|
1270
|
-
return candidates, omitted
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
def git_ls_files(root: Path) -> list[str]:
|
|
1274
|
-
try:
|
|
1275
|
-
proc = subprocess.run(
|
|
1276
|
-
["git", "-C", str(root), "ls-files", "-z"],
|
|
1277
|
-
text=False,
|
|
1278
|
-
capture_output=True,
|
|
1279
|
-
timeout=10,
|
|
1280
|
-
check=False,
|
|
1281
|
-
)
|
|
1282
|
-
except (OSError, subprocess.TimeoutExpired):
|
|
1283
|
-
proc = None
|
|
1284
|
-
if proc is not None and proc.returncode == 0:
|
|
1285
|
-
raw = proc.stdout[: MAX_QUERY_SCAN_FILES * 512]
|
|
1286
|
-
return [part.decode("utf-8", "replace") for part in raw.split(b"\0") if part][:MAX_QUERY_SCAN_FILES]
|
|
1287
|
-
out: list[str] = []
|
|
1288
|
-
skip_dirs = {".git", ".omx", ".context-guard", "node_modules", "dist", "build", "__pycache__"}
|
|
1289
|
-
for current, dirs, files in os.walk(root):
|
|
1290
|
-
dirs[:] = [name for name in dirs if name not in skip_dirs and not name.startswith(".pytest")]
|
|
1291
|
-
current_path = Path(current)
|
|
1292
|
-
for name in files:
|
|
1293
|
-
rel = (current_path / name).relative_to(root).as_posix()
|
|
1294
|
-
out.append(rel)
|
|
1295
|
-
if len(out) >= MAX_QUERY_SCAN_FILES:
|
|
1296
|
-
return out
|
|
1297
|
-
return out
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
def collect_query_candidates(root: Path, query_terms: set[str], context_lines: int) -> list[SuggestCandidate]:
|
|
1301
|
-
if not query_terms:
|
|
1302
|
-
return []
|
|
1303
|
-
candidates: list[SuggestCandidate] = []
|
|
1304
|
-
for rel_path in git_ls_files(root):
|
|
1305
|
-
rel, reason = lexical_rel(rel_path)
|
|
1306
|
-
if rel is None or reason:
|
|
1307
|
-
continue
|
|
1308
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
1309
|
-
if redacted:
|
|
1310
|
-
continue
|
|
1311
|
-
path_score = suggest_score_path(display, query_terms)
|
|
1312
|
-
handle, open_reason = open_regular_under_root(root, rel)
|
|
1313
|
-
if handle is None:
|
|
1314
|
-
continue
|
|
1315
|
-
first_match_line: int | None = None
|
|
1316
|
-
content_score = 0
|
|
1317
|
-
try:
|
|
1318
|
-
with handle:
|
|
1319
|
-
scanned_bytes = 0
|
|
1320
|
-
for index, raw_line in enumerate(handle, start=1):
|
|
1321
|
-
scanned_bytes += byte_len(raw_line)
|
|
1322
|
-
if scanned_bytes > MAX_QUERY_SCAN_BYTES_PER_FILE:
|
|
1323
|
-
break
|
|
1324
|
-
if index > SUGGEST_WHOLE_FILE_MAX_LINES and content_score == 0 and path_score == 0:
|
|
1325
|
-
break
|
|
1326
|
-
lowered = raw_line.lower()
|
|
1327
|
-
hits = sum(1 for term in query_terms if term in lowered)
|
|
1328
|
-
if hits:
|
|
1329
|
-
content_score += 250 * hits
|
|
1330
|
-
if first_match_line is None:
|
|
1331
|
-
first_match_line = index
|
|
1332
|
-
except (OSError, UnicodeError):
|
|
1333
|
-
_ = open_reason
|
|
1334
|
-
continue
|
|
1335
|
-
if path_score == 0 and content_score == 0:
|
|
1336
|
-
continue
|
|
1337
|
-
if first_match_line is not None:
|
|
1338
|
-
lines = line_window(first_match_line, None, context_lines)
|
|
1339
|
-
reason = "query matched file content"
|
|
1340
|
-
else:
|
|
1341
|
-
lines = None
|
|
1342
|
-
reason = "query matched file path"
|
|
1343
|
-
add_suggest_candidate(
|
|
1344
|
-
candidates,
|
|
1345
|
-
path=display,
|
|
1346
|
-
score=3_000 + path_score + content_score,
|
|
1347
|
-
reason=reason,
|
|
1348
|
-
lines=lines,
|
|
1349
|
-
label=f"query:{display}",
|
|
1350
|
-
)
|
|
1351
|
-
return candidates
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
def source_selected_range(source: ResolvedSource) -> LineRange:
|
|
1355
|
-
start = source.requested_lines.start if source.requested_lines else 1
|
|
1356
|
-
return LineRange(start, start + max(len(source.selected_lines), 1) - 1)
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
def resolved_block_bytes(source: ResolvedSource, *, root_arg: str) -> int:
|
|
1360
|
-
included = source_selected_range(source)
|
|
1361
|
-
return byte_len(render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included))
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
def manifest_source_for_candidate(source: ResolvedSource, *, priority: int, label: str | None) -> dict[str, Any]:
|
|
1365
|
-
item: dict[str, Any] = {"path": source.display_path, "priority": priority}
|
|
1366
|
-
if label:
|
|
1367
|
-
item["label"] = label
|
|
1368
|
-
if source.requested_lines is not None:
|
|
1369
|
-
item["lines"] = source_selected_range(source).as_dict()
|
|
1370
|
-
return item
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
def suggested_source_payload(source: ResolvedSource, candidate: SuggestCandidate, *, root_arg: str) -> dict[str, Any]:
|
|
1374
|
-
included = source_selected_range(source)
|
|
1375
|
-
payload: dict[str, Any] = {
|
|
1376
|
-
"path": source.display_path,
|
|
1377
|
-
"priority": candidate.score,
|
|
1378
|
-
"score": candidate.score,
|
|
1379
|
-
"reason": candidate.reason,
|
|
1380
|
-
"lines": included.as_dict(),
|
|
1381
|
-
"bytes": byte_len("".join(source.selected_lines)),
|
|
1382
|
-
}
|
|
1383
|
-
if candidate.label:
|
|
1384
|
-
payload["label"] = candidate.label
|
|
1385
|
-
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
1386
|
-
if retrieval:
|
|
1387
|
-
payload["retrieval_cli"] = retrieval
|
|
1388
|
-
elif retrieval_omitted_reason:
|
|
1389
|
-
payload["retrieval_omitted_reason"] = retrieval_omitted_reason
|
|
1390
|
-
return payload
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
def normalize_suggest_source(root: Path, candidate: SuggestCandidate) -> tuple[ResolvedSource | None, dict[str, Any] | None]:
|
|
1394
|
-
spec = SourceSpec(
|
|
1395
|
-
path=candidate.path,
|
|
1396
|
-
priority=candidate.score,
|
|
1397
|
-
lines=candidate.lines,
|
|
1398
|
-
label=candidate.label,
|
|
1399
|
-
input_index=candidate.input_index,
|
|
1400
|
-
origin="suggest",
|
|
1401
|
-
)
|
|
1402
|
-
source, omitted_item = resolve_source(root, spec)
|
|
1403
|
-
if omitted_item is not None:
|
|
1404
|
-
omitted_item["reason"] = omitted_item.get("reason") or candidate.reason
|
|
1405
|
-
omitted_item["suggest_reason"] = candidate.reason
|
|
1406
|
-
return None, omitted_item
|
|
1407
|
-
assert source is not None
|
|
1408
|
-
if source.redacted_path:
|
|
1409
|
-
return None, omission(spec, "redacted_path", path=source.display_path, redacted_path=True)
|
|
1410
|
-
if spec.lines is None and source.total_lines > SUGGEST_WHOLE_FILE_MAX_LINES:
|
|
1411
|
-
capped = SourceSpec(
|
|
1412
|
-
path=candidate.path,
|
|
1413
|
-
priority=candidate.score,
|
|
1414
|
-
lines=LineRange(1, min(SUGGEST_WHOLE_FILE_MAX_LINES, source.total_lines)),
|
|
1415
|
-
label=candidate.label,
|
|
1416
|
-
input_index=candidate.input_index,
|
|
1417
|
-
origin="suggest",
|
|
1418
|
-
)
|
|
1419
|
-
source, omitted_item = resolve_source(root, capped)
|
|
1420
|
-
if omitted_item is not None:
|
|
1421
|
-
omitted_item["suggest_reason"] = candidate.reason
|
|
1422
|
-
return None, omitted_item
|
|
1423
|
-
assert source is not None
|
|
1424
|
-
return source, None
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
def write_manifest_under_root(root: Path, raw_path: str, manifest: dict[str, Any]) -> str:
|
|
1428
|
-
content = json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
1429
|
-
return write_text_under_root(root, raw_path, content, "--manifest-out")
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
def validate_output_path_under_root(root: Path, raw_path: str, option_name: str) -> str:
|
|
1433
|
-
rel, reason = lexical_rel(raw_path)
|
|
1434
|
-
if rel is None:
|
|
1435
|
-
raise PackError(f"invalid {option_name}: {reason}")
|
|
1436
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
1437
|
-
if redacted:
|
|
1438
|
-
raise PackError(f"invalid {option_name}: redacted_path")
|
|
1439
|
-
parent_parts = rel.parts[:-1]
|
|
1440
|
-
filename = rel.parts[-1]
|
|
1441
|
-
current_fd: int | None = None
|
|
1442
|
-
file_fd = -1
|
|
1443
|
-
try:
|
|
1444
|
-
current_fd = open_dir_no_follow(root)
|
|
1445
|
-
for part in parent_parts:
|
|
1446
|
-
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
1447
|
-
os.close(current_fd)
|
|
1448
|
-
current_fd = next_fd
|
|
1449
|
-
flags = os.O_WRONLY
|
|
1450
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
1451
|
-
flags |= os.O_NOFOLLOW
|
|
1452
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
1453
|
-
flags |= os.O_CLOEXEC
|
|
1454
|
-
if hasattr(os, "O_NONBLOCK"):
|
|
1455
|
-
flags |= os.O_NONBLOCK
|
|
1456
|
-
try:
|
|
1457
|
-
file_fd = os.open(filename, flags, dir_fd=current_fd)
|
|
1458
|
-
st = os.fstat(file_fd)
|
|
1459
|
-
if not stat.S_ISREG(st.st_mode):
|
|
1460
|
-
raise PackError(f"invalid {option_name}: unsafe_path")
|
|
1461
|
-
except FileNotFoundError:
|
|
1462
|
-
temp_fd = -1
|
|
1463
|
-
temp_name = f".context-guard-pack-preflight-{os.getpid()}-{hashlib.sha256(raw_path.encode('utf-8', 'replace')).hexdigest()[:10]}"
|
|
1464
|
-
try:
|
|
1465
|
-
create_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
|
|
1466
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
1467
|
-
create_flags |= os.O_NOFOLLOW
|
|
1468
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
1469
|
-
create_flags |= os.O_CLOEXEC
|
|
1470
|
-
if hasattr(os, "O_NONBLOCK"):
|
|
1471
|
-
create_flags |= os.O_NONBLOCK
|
|
1472
|
-
temp_fd = os.open(temp_name, create_flags, 0o600, dir_fd=current_fd)
|
|
1473
|
-
except OSError as exc:
|
|
1474
|
-
raise PackError(f"invalid {option_name}: {exc.strerror or exc.__class__.__name__}") from exc
|
|
1475
|
-
finally:
|
|
1476
|
-
if temp_fd >= 0:
|
|
1477
|
-
try:
|
|
1478
|
-
os.close(temp_fd)
|
|
1479
|
-
except OSError:
|
|
1480
|
-
pass
|
|
1481
|
-
try:
|
|
1482
|
-
os.unlink(temp_name, dir_fd=current_fd)
|
|
1483
|
-
except OSError:
|
|
1484
|
-
pass
|
|
1485
|
-
except IsADirectoryError as exc:
|
|
1486
|
-
raise PackError(f"invalid {option_name}: unsafe_path") from exc
|
|
1487
|
-
except OSError as exc:
|
|
1488
|
-
raise PackError(f"invalid {option_name}: {exc.strerror or exc.__class__.__name__}") from exc
|
|
1489
|
-
except PackError:
|
|
1490
|
-
raise
|
|
1491
|
-
except FileNotFoundError as exc:
|
|
1492
|
-
raise PackError(f"invalid {option_name}: missing") from exc
|
|
1493
|
-
except OSError as exc:
|
|
1494
|
-
raise PackError(f"invalid {option_name}: {exc.strerror or exc.__class__.__name__}") from exc
|
|
1495
|
-
finally:
|
|
1496
|
-
if file_fd >= 0:
|
|
1497
|
-
try:
|
|
1498
|
-
os.close(file_fd)
|
|
1499
|
-
except OSError:
|
|
1500
|
-
pass
|
|
1501
|
-
if current_fd is not None:
|
|
1502
|
-
try:
|
|
1503
|
-
os.close(current_fd)
|
|
1504
|
-
except OSError:
|
|
1505
|
-
pass
|
|
1506
|
-
return display
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
def output_rel_for_collision_check(raw_path: str, option_name: str) -> Path:
|
|
1510
|
-
rel, reason = lexical_rel(raw_path)
|
|
1511
|
-
if rel is None:
|
|
1512
|
-
raise PackError(f"invalid {option_name}: {reason}")
|
|
1513
|
-
_display, redacted = display_rel_path(rel.as_posix())
|
|
1514
|
-
if redacted:
|
|
1515
|
-
raise PackError(f"invalid {option_name}: redacted_path")
|
|
1516
|
-
return rel
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
def existing_output_identity_under_root(root: Path, rel: Path) -> tuple[int, int] | None:
|
|
1520
|
-
current_fd: int | None = None
|
|
1521
|
-
try:
|
|
1522
|
-
current_fd = open_dir_no_follow(root)
|
|
1523
|
-
for part in rel.parts[:-1]:
|
|
1524
|
-
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
1525
|
-
os.close(current_fd)
|
|
1526
|
-
current_fd = next_fd
|
|
1527
|
-
st = os.stat(rel.parts[-1], dir_fd=current_fd, follow_symlinks=False)
|
|
1528
|
-
if not stat.S_ISREG(st.st_mode):
|
|
1529
|
-
return None
|
|
1530
|
-
return int(st.st_dev), int(st.st_ino)
|
|
1531
|
-
except (FileNotFoundError, OSError, NotImplementedError):
|
|
1532
|
-
return None
|
|
1533
|
-
finally:
|
|
1534
|
-
if current_fd is not None:
|
|
1535
|
-
try:
|
|
1536
|
-
os.close(current_fd)
|
|
1537
|
-
except OSError:
|
|
1538
|
-
pass
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
def reject_matching_output_targets(
|
|
1542
|
-
root: Path,
|
|
1543
|
-
*,
|
|
1544
|
-
first_rel: Path,
|
|
1545
|
-
second_rel: Path,
|
|
1546
|
-
second_option: str,
|
|
1547
|
-
reason: str,
|
|
1548
|
-
) -> None:
|
|
1549
|
-
first_identity = existing_output_identity_under_root(root, first_rel)
|
|
1550
|
-
second_identity = existing_output_identity_under_root(root, second_rel)
|
|
1551
|
-
same_existing_target = first_identity is not None and first_identity == second_identity
|
|
1552
|
-
same_lexical_target = first_rel == second_rel or first_rel.as_posix().casefold() == second_rel.as_posix().casefold()
|
|
1553
|
-
if same_lexical_target or same_existing_target:
|
|
1554
|
-
raise PackError(f"invalid {second_option}: {reason}")
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
def write_text_under_root(root: Path, raw_path: str, content: str, option_name: str) -> str:
|
|
1558
|
-
rel, reason = lexical_rel(raw_path)
|
|
1559
|
-
if rel is None:
|
|
1560
|
-
raise PackError(f"invalid {option_name}: {reason}")
|
|
1561
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
1562
|
-
if redacted:
|
|
1563
|
-
raise PackError(f"invalid {option_name}: redacted_path")
|
|
1564
|
-
parent_parts = rel.parts[:-1]
|
|
1565
|
-
filename = rel.parts[-1]
|
|
1566
|
-
current_fd: int | None = None
|
|
1567
|
-
try:
|
|
1568
|
-
current_fd = open_dir_no_follow(root)
|
|
1569
|
-
for part in parent_parts:
|
|
1570
|
-
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
1571
|
-
os.close(current_fd)
|
|
1572
|
-
current_fd = next_fd
|
|
1573
|
-
write_text_atomic_at(current_fd, filename, content, mode=0o600, option_name=option_name)
|
|
1574
|
-
except PackError:
|
|
1575
|
-
raise
|
|
1576
|
-
except FileNotFoundError as exc:
|
|
1577
|
-
raise PackError(f"invalid {option_name}: missing") from exc
|
|
1578
|
-
except OSError as exc:
|
|
1579
|
-
raise PackError(f"invalid {option_name}: {exc.strerror or exc.__class__.__name__}") from exc
|
|
1580
|
-
finally:
|
|
1581
|
-
if current_fd is not None:
|
|
1582
|
-
try:
|
|
1583
|
-
os.close(current_fd)
|
|
1584
|
-
except OSError:
|
|
1585
|
-
pass
|
|
1586
|
-
return display
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
def manifest_to_source_specs(manifest: dict[str, Any]) -> list[SourceSpec]:
|
|
1590
|
-
version = manifest.get("version", VERSION)
|
|
1591
|
-
if version != VERSION:
|
|
1592
|
-
raise PackError(f"unsupported manifest version: {version}")
|
|
1593
|
-
sources = manifest.get("sources")
|
|
1594
|
-
if not isinstance(sources, list):
|
|
1595
|
-
raise PackError("manifest sources must be a list")
|
|
1596
|
-
specs: list[SourceSpec] = []
|
|
1597
|
-
for index, item in enumerate(sources):
|
|
1598
|
-
if not isinstance(item, dict):
|
|
1599
|
-
raise PackError("manifest sources must be objects")
|
|
1600
|
-
if "path" not in item:
|
|
1601
|
-
raise PackError("manifest source missing path")
|
|
1602
|
-
try:
|
|
1603
|
-
lines = parse_line_range(item.get("lines"))
|
|
1604
|
-
except PackError:
|
|
1605
|
-
lines = LineRange(-1, -1)
|
|
1606
|
-
specs.append(SourceSpec(
|
|
1607
|
-
path=str(item.get("path", "")),
|
|
1608
|
-
priority=bounded_int(item.get("priority"), 0, -1_000_000, 1_000_000),
|
|
1609
|
-
lines=lines,
|
|
1610
|
-
label=cap_label(item.get("label")),
|
|
1611
|
-
input_index=index,
|
|
1612
|
-
origin="auto",
|
|
1613
|
-
))
|
|
1614
|
-
return specs
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
def build_suggest_manifest(sources: list[dict[str, Any]]) -> dict[str, Any]:
|
|
1618
|
-
manifest_sources: list[dict[str, Any]] = []
|
|
1619
|
-
for item in sources:
|
|
1620
|
-
source: dict[str, Any] = {"path": item["path"], "priority": item["priority"]}
|
|
1621
|
-
if "label" in item:
|
|
1622
|
-
source["label"] = item["label"]
|
|
1623
|
-
if "lines" in item:
|
|
1624
|
-
source["lines"] = item["lines"]
|
|
1625
|
-
manifest_sources.append(source)
|
|
1626
|
-
return {"version": VERSION, "sources": manifest_sources}
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
def suggest_build_hint(root_arg: str, manifest_path: str | None, budget: int) -> tuple[str | None, str | None]:
|
|
1630
|
-
safe_root = safe_root_arg_for_retrieval(root_arg)
|
|
1631
|
-
if safe_root is None:
|
|
1632
|
-
return None, "unsafe_root_path"
|
|
1633
|
-
manifest_arg = manifest_path or "<manifest.json>"
|
|
1634
|
-
command_parts = ["context-guard-pack", "build", "--root", ".", "--manifest", manifest_arg, "--budget-bytes", str(budget), "--json"]
|
|
1635
|
-
command = " ".join(shlex.quote(part) for part in command_parts)
|
|
1636
|
-
if safe_root in {".", ""}:
|
|
1637
|
-
return command, None
|
|
1638
|
-
return f"cd {shlex.quote(safe_root)} && {command}", None
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[dict[str, Any], int]:
|
|
1642
|
-
query_text, _query_redactions = sanitize_text(args.query or "")
|
|
1643
|
-
query = " ".join(query_text.split())
|
|
1644
|
-
query_terms = suggest_tokens(query)
|
|
1645
|
-
context_lines = bounded_int(args.context_lines, DEFAULT_SUGGEST_CONTEXT_LINES, 0, MAX_SUGGEST_CONTEXT_LINES)
|
|
1646
|
-
top = bounded_int(args.top, DEFAULT_SUGGEST_TOP, 1, MAX_SUGGEST_TOP)
|
|
1647
|
-
budget = bounded_int(args.budget_bytes, DEFAULT_BUDGET_BYTES, MIN_BUDGET_BYTES, MAX_BUDGET_BYTES)
|
|
1648
|
-
candidates: list[SuggestCandidate] = []
|
|
1649
|
-
omitted: list[dict[str, Any]] = []
|
|
1650
|
-
file_inputs = split_suggest_files(args.files)
|
|
1651
|
-
has_signal = bool(query or file_inputs or args.diff or args.output or args.test_output)
|
|
1652
|
-
if not has_signal:
|
|
1653
|
-
raise PackError("provide --query, --files, --diff, --output, or --test-output")
|
|
1654
|
-
|
|
1655
|
-
for raw_path in file_inputs:
|
|
1656
|
-
add_suggest_candidate(
|
|
1657
|
-
candidates,
|
|
1658
|
-
path=raw_path,
|
|
1659
|
-
score=9_000 + suggest_score_path(raw_path, query_terms),
|
|
1660
|
-
reason="explicit file request",
|
|
1661
|
-
label=f"file:{safe_raw_path_label(raw_path)}",
|
|
1662
|
-
)
|
|
1663
|
-
if args.diff:
|
|
1664
|
-
candidates.extend(collect_diff_candidates(root, args.diff, query_terms, context_lines))
|
|
1665
|
-
output_candidates, output_omitted = collect_output_candidates(root, args.output, query_terms, context_lines, origin="output")
|
|
1666
|
-
test_candidates, test_omitted = collect_output_candidates(root, args.test_output, query_terms, context_lines, origin="test-output")
|
|
1667
|
-
candidates.extend(output_candidates)
|
|
1668
|
-
candidates.extend(test_candidates)
|
|
1669
|
-
omitted.extend(output_omitted)
|
|
1670
|
-
omitted.extend(test_omitted)
|
|
1671
|
-
candidates.extend(collect_query_candidates(root, query_terms, context_lines))
|
|
1672
|
-
|
|
1673
|
-
candidates.sort(key=lambda item: (-item.score, item.input_index, item.path, item.lines.identity() if item.lines else "0:0"))
|
|
1674
|
-
seen: set[tuple[str, str]] = set()
|
|
1675
|
-
final_seen: set[tuple[str, str]] = set()
|
|
1676
|
-
selected: list[dict[str, Any]] = []
|
|
1677
|
-
manifest_seed: list[dict[str, Any]] = []
|
|
1678
|
-
current_bytes = byte_len("# Context Pack\n\nGenerated by context-guard-pack. Token counts are estimated proxies; byte counts are observed.\n\n")
|
|
1679
|
-
for candidate in candidates:
|
|
1680
|
-
rel, reason = lexical_rel(candidate.path)
|
|
1681
|
-
identity_path = rel.as_posix() if rel is not None else safe_raw_path_label(candidate.path)
|
|
1682
|
-
identity_lines = candidate.lines.identity() if candidate.lines else "all"
|
|
1683
|
-
identity = (identity_path, identity_lines)
|
|
1684
|
-
if rel is not None and identity in seen:
|
|
1685
|
-
display, redacted = display_rel_path(rel.as_posix())
|
|
1686
|
-
duplicate_item = {
|
|
1687
|
-
"path": display,
|
|
1688
|
-
"status": "omitted",
|
|
1689
|
-
"reason": "duplicate_source",
|
|
1690
|
-
"suggest_reason": candidate.reason,
|
|
1691
|
-
"priority": candidate.score,
|
|
1692
|
-
"retrieval_omitted_reason": "redacted_path" if redacted else None,
|
|
1693
|
-
}
|
|
1694
|
-
omitted.append({key: value for key, value in duplicate_item.items() if value is not None})
|
|
1695
|
-
continue
|
|
1696
|
-
if rel is not None:
|
|
1697
|
-
seen.add(identity)
|
|
1698
|
-
source, omitted_item = normalize_suggest_source(root, candidate)
|
|
1699
|
-
if omitted_item is not None:
|
|
1700
|
-
omitted_item["priority"] = candidate.score
|
|
1701
|
-
omitted_item["suggest_reason"] = candidate.reason
|
|
1702
|
-
omitted.append({key: value for key, value in omitted_item.items() if value is not None})
|
|
1703
|
-
continue
|
|
1704
|
-
assert source is not None
|
|
1705
|
-
final_identity = (source.display_path, source_selected_range(source).identity() if source.requested_lines is not None else "all")
|
|
1706
|
-
if final_identity in final_seen:
|
|
1707
|
-
omitted.append({
|
|
1708
|
-
"path": source.display_path,
|
|
1709
|
-
"status": "omitted",
|
|
1710
|
-
"reason": "duplicate_source",
|
|
1711
|
-
"suggest_reason": candidate.reason,
|
|
1712
|
-
"priority": candidate.score,
|
|
1713
|
-
})
|
|
1714
|
-
continue
|
|
1715
|
-
final_seen.add(final_identity)
|
|
1716
|
-
source_bytes = resolved_block_bytes(source, root_arg=root_arg)
|
|
1717
|
-
remaining = budget - current_bytes
|
|
1718
|
-
if source_bytes > remaining:
|
|
1719
|
-
if not selected and remaining > 0:
|
|
1720
|
-
partial_lines, _partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
1721
|
-
if partial_range is not None and partial_lines:
|
|
1722
|
-
partial_spec = SourceSpec(
|
|
1723
|
-
path=candidate.path,
|
|
1724
|
-
priority=candidate.score,
|
|
1725
|
-
lines=partial_range,
|
|
1726
|
-
label=candidate.label,
|
|
1727
|
-
input_index=candidate.input_index,
|
|
1728
|
-
origin="suggest",
|
|
1729
|
-
)
|
|
1730
|
-
source, omitted_item = resolve_source(root, partial_spec)
|
|
1731
|
-
if omitted_item is not None:
|
|
1732
|
-
omitted_item["priority"] = candidate.score
|
|
1733
|
-
omitted_item["suggest_reason"] = candidate.reason
|
|
1734
|
-
omitted.append(omitted_item)
|
|
1735
|
-
continue
|
|
1736
|
-
assert source is not None
|
|
1737
|
-
source_bytes = resolved_block_bytes(source, root_arg=root_arg)
|
|
1738
|
-
else:
|
|
1739
|
-
omitted.append({"path": source.display_path, "status": "omitted", "reason": "budget_exhausted", "priority": candidate.score})
|
|
1740
|
-
continue
|
|
1741
|
-
else:
|
|
1742
|
-
omitted.append({"path": source.display_path, "status": "omitted", "reason": "budget_exhausted", "priority": candidate.score})
|
|
1743
|
-
continue
|
|
1744
|
-
payload = suggested_source_payload(source, candidate, root_arg=root_arg)
|
|
1745
|
-
selected.append(payload)
|
|
1746
|
-
manifest_seed.append(manifest_source_for_candidate(source, priority=candidate.score, label=candidate.label))
|
|
1747
|
-
current_bytes += source_bytes
|
|
1748
|
-
if len(selected) >= top:
|
|
1749
|
-
break
|
|
1750
|
-
|
|
1751
|
-
manifest = build_suggest_manifest(manifest_seed)
|
|
1752
|
-
estimated_pack_bytes = current_bytes if selected else 0
|
|
1753
|
-
manifest_path: str | None = None
|
|
1754
|
-
if args.manifest_out:
|
|
1755
|
-
manifest_path = write_manifest_under_root(root, args.manifest_out, manifest)
|
|
1756
|
-
build_hint, build_hint_omitted_reason = suggest_build_hint(root_arg, manifest_path, budget)
|
|
1757
|
-
payload: dict[str, Any] = {
|
|
1758
|
-
"tool": TOOL_NAME,
|
|
1759
|
-
"schema_version": SUGGEST_SCHEMA_VERSION,
|
|
1760
|
-
"version": VERSION,
|
|
1761
|
-
"mode": "suggest",
|
|
1762
|
-
"root": display_root(root),
|
|
1763
|
-
"query": query,
|
|
1764
|
-
"budget_bytes": budget,
|
|
1765
|
-
"estimated_pack_bytes": estimated_pack_bytes,
|
|
1766
|
-
"token_proxy": {
|
|
1767
|
-
"measurement": "estimated",
|
|
1768
|
-
"method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
|
|
1769
|
-
"estimated_pack": estimated_pack_bytes // TOKEN_PROXY_CHARS_PER_TOKEN,
|
|
1770
|
-
},
|
|
1771
|
-
"sources": selected,
|
|
1772
|
-
"omitted_sources": sorted(omitted, key=lambda item: (str(item.get("path", "")), str(item.get("reason", "")), int(item.get("priority", 0) or 0))),
|
|
1773
|
-
"manifest": manifest,
|
|
1774
|
-
"manifest_path": manifest_path,
|
|
1775
|
-
"build_hint": build_hint,
|
|
1776
|
-
"caveats": [
|
|
1777
|
-
"Deterministic local heuristics only; no model, network, embedding, or provider-cost estimate is used.",
|
|
1778
|
-
"Byte and token values are pack-size proxies, not billing claims.",
|
|
1779
|
-
],
|
|
1780
|
-
}
|
|
1781
|
-
if build_hint_omitted_reason:
|
|
1782
|
-
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
1783
|
-
return payload, 0
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
def line_range_identity(value: object) -> str:
|
|
1787
|
-
if isinstance(value, dict):
|
|
1788
|
-
return f"{value.get('start')}:{value.get('end')}"
|
|
1789
|
-
if value is None:
|
|
1790
|
-
return "all"
|
|
1791
|
-
return str(value)
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
def copy_explain_fields(item: dict[str, Any], fields: tuple[str, ...]) -> dict[str, Any]:
|
|
1795
|
-
out: dict[str, Any] = {}
|
|
1796
|
-
for field in fields:
|
|
1797
|
-
if field in item and item[field] is not None:
|
|
1798
|
-
out[field] = copy.deepcopy(item[field])
|
|
1799
|
-
return out
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
def build_source_matches_exact(suggest_item: dict[str, Any], build_item: dict[str, Any]) -> bool:
|
|
1803
|
-
if build_item.get("path") != suggest_item.get("path"):
|
|
1804
|
-
return False
|
|
1805
|
-
if build_item.get("priority") != suggest_item.get("priority"):
|
|
1806
|
-
return False
|
|
1807
|
-
lines = line_range_identity(suggest_item.get("lines"))
|
|
1808
|
-
requested = line_range_identity(build_item.get("requested_lines"))
|
|
1809
|
-
included = line_range_identity(build_item.get("included_lines"))
|
|
1810
|
-
return lines in {requested, included, "all"}
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
def find_exact_build_source_for_explain(
|
|
1814
|
-
suggest_item: dict[str, Any],
|
|
1815
|
-
build_sources: list[dict[str, Any]],
|
|
1816
|
-
used_indexes: set[int],
|
|
1817
|
-
) -> dict[str, Any] | None:
|
|
1818
|
-
for index, item in enumerate(build_sources):
|
|
1819
|
-
if index in used_indexes:
|
|
1820
|
-
continue
|
|
1821
|
-
if build_source_matches_exact(suggest_item, item):
|
|
1822
|
-
used_indexes.add(index)
|
|
1823
|
-
return item
|
|
1824
|
-
return None
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
def find_fallback_build_source_for_explain(
|
|
1828
|
-
suggest_item: dict[str, Any],
|
|
1829
|
-
build_sources: list[dict[str, Any]],
|
|
1830
|
-
used_indexes: set[int],
|
|
1831
|
-
) -> dict[str, Any] | None:
|
|
1832
|
-
path = suggest_item.get("path")
|
|
1833
|
-
for index, item in enumerate(build_sources):
|
|
1834
|
-
if index in used_indexes or item.get("path") != path:
|
|
1835
|
-
continue
|
|
1836
|
-
used_indexes.add(index)
|
|
1837
|
-
return item
|
|
1838
|
-
return None
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
def explain_omission_key(item: dict[str, Any]) -> tuple[str, str, str, str, str]:
|
|
1842
|
-
return (
|
|
1843
|
-
str(item.get("phase", "")),
|
|
1844
|
-
str(item.get("path", "")),
|
|
1845
|
-
str(item.get("reason", "")),
|
|
1846
|
-
str(item.get("suggest_reason", "")),
|
|
1847
|
-
json.dumps(item.get("requested_lines", item.get("lines", "")), ensure_ascii=False, sort_keys=True),
|
|
1848
|
-
)
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
def sanitize_explain_text(value: str, *, limit: int = MAX_LABEL_CHARS) -> str:
|
|
1852
|
-
sanitized, _redacted = sanitize_text(str(value))
|
|
1853
|
-
return cap_label(sanitized, default="", limit=limit) or ""
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
def is_repo_map_text_path(path: str) -> bool:
|
|
1857
|
-
name = Path(path).name.lower()
|
|
1858
|
-
if name in {"readme", "license", "dockerfile", "makefile"}:
|
|
1859
|
-
return True
|
|
1860
|
-
return Path(path).suffix.lower() in REPO_MAP_TEXT_EXTENSIONS
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
def read_repo_map_text(root: Path, rel_path: str) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
|
|
1864
|
-
rel, reason = lexical_rel(rel_path)
|
|
1865
|
-
if rel is None:
|
|
1866
|
-
return None, {"path": repo_map_safe_raw_path_label(rel_path), "reason": reason}
|
|
1867
|
-
display, redacted_path = repo_map_display_rel_path(rel.as_posix())
|
|
1868
|
-
if not is_repo_map_text_path(display):
|
|
1869
|
-
return None, {"path": display, "reason": "unsupported_file_type"}
|
|
1870
|
-
handle, open_reason = open_regular_under_root(root, rel)
|
|
1871
|
-
if handle is None:
|
|
1872
|
-
return None, {"path": display, "reason": open_reason, "retrieval_omitted_reason": "redacted_path" if redacted_path else None}
|
|
1873
|
-
try:
|
|
1874
|
-
with handle:
|
|
1875
|
-
text = handle.read(MAX_REPO_MAP_BYTES_PER_FILE + 1)
|
|
1876
|
-
except (OSError, UnicodeError):
|
|
1877
|
-
return None, {"path": display, "reason": "unsafe_path", "retrieval_omitted_reason": "redacted_path" if redacted_path else None}
|
|
1878
|
-
capped = byte_len(text) > MAX_REPO_MAP_BYTES_PER_FILE
|
|
1879
|
-
if capped:
|
|
1880
|
-
text = text.encode("utf-8", errors="replace")[:MAX_REPO_MAP_BYTES_PER_FILE].decode("utf-8", errors="ignore")
|
|
1881
|
-
risk_counts = secret_risk_counts(text)
|
|
1882
|
-
sanitized_text, redacted_lines = sanitize_text(text)
|
|
1883
|
-
return {
|
|
1884
|
-
"path": display,
|
|
1885
|
-
"raw_path": rel.as_posix(),
|
|
1886
|
-
"redacted_path": redacted_path,
|
|
1887
|
-
"text": sanitized_text,
|
|
1888
|
-
"bytes": byte_len(sanitized_text),
|
|
1889
|
-
"bytes_capped": capped,
|
|
1890
|
-
"line_count": len(sanitized_text.splitlines()) or (1 if sanitized_text else 0),
|
|
1891
|
-
"redacted_lines": redacted_lines,
|
|
1892
|
-
"secret_risk_counts": risk_counts,
|
|
1893
|
-
}, None
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
def repo_map_records(root: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
|
|
1897
|
-
paths = git_ls_files(root)
|
|
1898
|
-
path_cap_reached = len(paths) > MAX_REPO_MAP_FILES
|
|
1899
|
-
records: list[dict[str, Any]] = []
|
|
1900
|
-
omitted: list[dict[str, Any]] = []
|
|
1901
|
-
for rel_path in paths[:MAX_REPO_MAP_FILES]:
|
|
1902
|
-
record, omission_item = read_repo_map_text(root, rel_path)
|
|
1903
|
-
if record is not None:
|
|
1904
|
-
records.append(record)
|
|
1905
|
-
elif omission_item is not None and omission_item.get("reason") != "unsupported_file_type":
|
|
1906
|
-
omitted.append({key: value for key, value in omission_item.items() if value is not None})
|
|
1907
|
-
caps = {
|
|
1908
|
-
"max_files": MAX_REPO_MAP_FILES,
|
|
1909
|
-
"files_capped": path_cap_reached,
|
|
1910
|
-
"max_bytes_per_file": MAX_REPO_MAP_BYTES_PER_FILE,
|
|
1911
|
-
"bytes_per_file_capped_count": sum(1 for item in records if item.get("bytes_capped")),
|
|
1912
|
-
"max_tree_entries": MAX_REPO_MAP_TREE_ENTRIES,
|
|
1913
|
-
"max_signature_entries": MAX_REPO_MAP_SIGNATURE_ENTRIES,
|
|
1914
|
-
"max_graph_rank_entries": MAX_REPO_MAP_GRAPH_RANK_ENTRIES,
|
|
1915
|
-
"max_retrieval_hints": MAX_REPO_MAP_RETRIEVAL_HINTS,
|
|
1916
|
-
"max_secret_risk_files": MAX_REPO_MAP_SECRET_RISK_FILES,
|
|
1917
|
-
}
|
|
1918
|
-
return records, omitted, caps
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
def secret_risk_counts(text: str) -> dict[str, int]:
|
|
1922
|
-
counts: dict[str, int] = {}
|
|
1923
|
-
for name, pattern in SECRET_RISK_PATTERNS:
|
|
1924
|
-
found = len(pattern.findall(text))
|
|
1925
|
-
if found:
|
|
1926
|
-
counts[name] = found
|
|
1927
|
-
return counts
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
def build_secret_scan(records: list[dict[str, Any]]) -> dict[str, Any]:
|
|
1931
|
-
risk_counts: dict[str, int] = {}
|
|
1932
|
-
files: list[dict[str, Any]] = []
|
|
1933
|
-
for record in records:
|
|
1934
|
-
counts = dict(record.get("secret_risk_counts", {}) if isinstance(record.get("secret_risk_counts"), dict) else {})
|
|
1935
|
-
if not counts:
|
|
1936
|
-
continue
|
|
1937
|
-
for name, count in counts.items():
|
|
1938
|
-
risk_counts[name] = risk_counts.get(name, 0) + count
|
|
1939
|
-
files.append({
|
|
1940
|
-
"path": record["path"],
|
|
1941
|
-
"counts": counts,
|
|
1942
|
-
"redacted_path": bool(record.get("redacted_path")),
|
|
1943
|
-
})
|
|
1944
|
-
files.sort(key=lambda item: (-sum(item["counts"].values()), item["path"]))
|
|
1945
|
-
return {
|
|
1946
|
-
"risk_counts": dict(sorted(risk_counts.items())),
|
|
1947
|
-
"files_with_risks": files[:MAX_REPO_MAP_SECRET_RISK_FILES],
|
|
1948
|
-
"files_omitted_by_cap": max(0, len(files) - MAX_REPO_MAP_SECRET_RISK_FILES),
|
|
1949
|
-
"caveat": "Counts are local best-effort secret-pattern risk signals; raw matched values are never emitted.",
|
|
1950
|
-
}
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
def build_token_tree(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1954
|
-
directory_totals: dict[str, dict[str, int]] = {}
|
|
1955
|
-
file_entries: list[dict[str, Any]] = []
|
|
1956
|
-
for record in records:
|
|
1957
|
-
path = str(record["path"])
|
|
1958
|
-
bytes_count = int(record.get("bytes", 0) or 0)
|
|
1959
|
-
file_entries.append({
|
|
1960
|
-
"kind": "file",
|
|
1961
|
-
"path": path,
|
|
1962
|
-
"bytes": bytes_count,
|
|
1963
|
-
"token_proxy": token_proxy(str(record.get("text", ""))),
|
|
1964
|
-
"line_count": int(record.get("line_count", 0) or 0),
|
|
1965
|
-
"bytes_capped": bool(record.get("bytes_capped")),
|
|
1966
|
-
})
|
|
1967
|
-
parts = path.split("/")
|
|
1968
|
-
if len(parts) > 1:
|
|
1969
|
-
prefix = ""
|
|
1970
|
-
for part in parts[:-1]:
|
|
1971
|
-
prefix = part if not prefix else f"{prefix}/{part}"
|
|
1972
|
-
bucket = directory_totals.setdefault(prefix, {"bytes": 0, "file_count": 0})
|
|
1973
|
-
bucket["bytes"] += bytes_count
|
|
1974
|
-
bucket["file_count"] += 1
|
|
1975
|
-
directory_entries = [
|
|
1976
|
-
{
|
|
1977
|
-
"kind": "directory",
|
|
1978
|
-
"path": path,
|
|
1979
|
-
"bytes": data["bytes"],
|
|
1980
|
-
"token_proxy": max(0, round(data["bytes"] / TOKEN_PROXY_CHARS_PER_TOKEN)),
|
|
1981
|
-
"file_count": data["file_count"],
|
|
1982
|
-
}
|
|
1983
|
-
for path, data in directory_totals.items()
|
|
1984
|
-
]
|
|
1985
|
-
entries = directory_entries + file_entries
|
|
1986
|
-
entries.sort(key=lambda item: (-int(item.get("bytes", 0) or 0), str(item.get("path", ""))))
|
|
1987
|
-
return entries[:MAX_REPO_MAP_TREE_ENTRIES]
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
def signature_range(line_number: int, total_lines: int) -> LineRange:
|
|
1991
|
-
return LineRange(max(1, line_number), min(max(1, total_lines), max(1, line_number) + 24))
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
def signature_entry(record: dict[str, Any], *, kind: str, name: str, raw_signature: str, line_number: int) -> dict[str, Any]:
|
|
1995
|
-
total_lines = int(record.get("line_count", 0) or 1)
|
|
1996
|
-
line_range = signature_range(line_number, total_lines)
|
|
1997
|
-
return {
|
|
1998
|
-
"path": record["path"],
|
|
1999
|
-
"kind": kind,
|
|
2000
|
-
"name": sanitize_explain_text(name, limit=80),
|
|
2001
|
-
"signature": sanitize_explain_text(raw_signature, limit=180),
|
|
2002
|
-
"line": line_number,
|
|
2003
|
-
"lines": line_range.as_dict(),
|
|
2004
|
-
}
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
def python_signatures(record: dict[str, Any], text: str) -> list[dict[str, Any]]:
|
|
2008
|
-
try:
|
|
2009
|
-
module = ast.parse(text)
|
|
2010
|
-
except (SyntaxError, ValueError, RecursionError):
|
|
2011
|
-
return []
|
|
2012
|
-
lines = text.splitlines()
|
|
2013
|
-
out: list[dict[str, Any]] = []
|
|
2014
|
-
for node in module.body:
|
|
2015
|
-
if isinstance(node, ast.ClassDef):
|
|
2016
|
-
raw = lines[node.lineno - 1].strip() if 0 < node.lineno <= len(lines) else f"class {node.name}"
|
|
2017
|
-
out.append(signature_entry(record, kind="class", name=node.name, raw_signature=raw, line_number=node.lineno))
|
|
2018
|
-
for child in node.body:
|
|
2019
|
-
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
2020
|
-
raw_child = lines[child.lineno - 1].strip() if 0 < child.lineno <= len(lines) else f"def {child.name}"
|
|
2021
|
-
out.append(signature_entry(record, kind="method", name=child.name, raw_signature=raw_child, line_number=child.lineno))
|
|
2022
|
-
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
2023
|
-
raw = lines[node.lineno - 1].strip() if 0 < node.lineno <= len(lines) else f"def {node.name}"
|
|
2024
|
-
out.append(signature_entry(record, kind="function", name=node.name, raw_signature=raw, line_number=node.lineno))
|
|
2025
|
-
return out
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
def regex_signatures(record: dict[str, Any], text: str) -> list[dict[str, Any]]:
|
|
2029
|
-
out: list[dict[str, Any]] = []
|
|
2030
|
-
suffix = Path(str(record.get("path", ""))).suffix.lower()
|
|
2031
|
-
for index, raw in enumerate(text.splitlines(), start=1):
|
|
2032
|
-
stripped = raw.strip()
|
|
2033
|
-
if suffix in {".md", ".mdx"}:
|
|
2034
|
-
heading = re.match(r"^(#{1,6})\s+(.+)$", stripped)
|
|
2035
|
-
if heading:
|
|
2036
|
-
out.append(signature_entry(record, kind="heading", name=heading.group(2), raw_signature=stripped, line_number=index))
|
|
2037
|
-
continue
|
|
2038
|
-
match = SIGNATURE_LINE_RE.match(raw)
|
|
2039
|
-
if not match:
|
|
2040
|
-
continue
|
|
2041
|
-
name = next((group for group in match.groups() if group), "signature")
|
|
2042
|
-
kind = "class" if re.search(r"\bclass\s+" + re.escape(name), raw) else "function"
|
|
2043
|
-
out.append(signature_entry(record, kind=kind, name=name, raw_signature=stripped, line_number=index))
|
|
2044
|
-
return out
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
def extract_signatures(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
2048
|
-
signatures: list[dict[str, Any]] = []
|
|
2049
|
-
for record in records:
|
|
2050
|
-
text = str(record.get("text", ""))
|
|
2051
|
-
suffix = Path(str(record.get("path", ""))).suffix.lower()
|
|
2052
|
-
if suffix == ".py":
|
|
2053
|
-
parsed = python_signatures(record, text)
|
|
2054
|
-
if parsed:
|
|
2055
|
-
signatures.extend(parsed)
|
|
2056
|
-
continue
|
|
2057
|
-
signatures.extend(regex_signatures(record, text))
|
|
2058
|
-
signatures.sort(key=lambda item: (str(item.get("path", "")), int(item.get("line", 0) or 0), str(item.get("name", ""))))
|
|
2059
|
-
return signatures[:MAX_REPO_MAP_SIGNATURE_ENTRIES]
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
def normalize_repo_map_candidate(path: str) -> str:
|
|
2063
|
-
normalized = posixpath.normpath(path.replace("\\", "/"))
|
|
2064
|
-
if normalized == ".":
|
|
2065
|
-
return ""
|
|
2066
|
-
return normalized.lstrip("/")
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
def resolve_import_target(raw_target: str, source_path: str, known_paths: set[str]) -> str | None:
|
|
2070
|
-
target = raw_target.strip()
|
|
2071
|
-
if not target:
|
|
2072
|
-
return None
|
|
2073
|
-
candidates: list[str] = []
|
|
2074
|
-
source_dir = Path(source_path).parent.as_posix()
|
|
2075
|
-
if target.startswith("."):
|
|
2076
|
-
if target.startswith("./") or target.startswith("../"):
|
|
2077
|
-
base = normalize_repo_map_candidate(posixpath.join(source_dir, target))
|
|
2078
|
-
else:
|
|
2079
|
-
leading = len(target) - len(target.lstrip("."))
|
|
2080
|
-
remainder = target[leading:].replace(".", "/")
|
|
2081
|
-
base_dir = source_dir
|
|
2082
|
-
for _ in range(max(0, leading - 1)):
|
|
2083
|
-
base_dir = posixpath.dirname(base_dir)
|
|
2084
|
-
base = normalize_repo_map_candidate(posixpath.join(base_dir, remainder)) if remainder else normalize_repo_map_candidate(base_dir)
|
|
2085
|
-
candidates.extend([base, f"{base}.py", f"{base}.ts", f"{base}.tsx", f"{base}.js", f"{base}.jsx", f"{base}/index.ts", f"{base}/index.js"])
|
|
2086
|
-
else:
|
|
2087
|
-
module_path = target.replace(".", "/")
|
|
2088
|
-
candidates.extend([f"{module_path}.py", f"{module_path}.ts", f"{module_path}.tsx", f"{module_path}.js", f"{module_path}.jsx", f"{module_path}/index.ts", f"{module_path}/index.js"])
|
|
2089
|
-
for candidate in candidates:
|
|
2090
|
-
normalized = normalize_repo_map_candidate(candidate)
|
|
2091
|
-
if normalized in known_paths:
|
|
2092
|
-
return normalized
|
|
2093
|
-
return None
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
def python_from_import_targets(module_name: str, imported_names: str) -> list[str]:
|
|
2097
|
-
targets = [module_name]
|
|
2098
|
-
if module_name.strip("."):
|
|
2099
|
-
return targets
|
|
2100
|
-
for raw_name in imported_names.replace("(", " ").replace(")", " ").split(","):
|
|
2101
|
-
name = raw_name.strip().split(" as ", 1)[0].strip()
|
|
2102
|
-
if not re.fullmatch(r"[A-Za-z_]\w*", name):
|
|
2103
|
-
continue
|
|
2104
|
-
targets.append(f"{module_name}{name}")
|
|
2105
|
-
return targets
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
def collect_import_edges(records: list[dict[str, Any]]) -> list[dict[str, str]]:
|
|
2109
|
-
known = {str(record.get("path", "")) for record in records}
|
|
2110
|
-
edges: list[dict[str, str]] = []
|
|
2111
|
-
seen: set[tuple[str, str]] = set()
|
|
2112
|
-
for record in records:
|
|
2113
|
-
source = str(record.get("path", ""))
|
|
2114
|
-
for line in str(record.get("text", "")).splitlines():
|
|
2115
|
-
py_from_match = PY_FROM_IMPORT_LINE_RE.match(line)
|
|
2116
|
-
if py_from_match:
|
|
2117
|
-
raw_targets = python_from_import_targets(py_from_match.group("module"), py_from_match.group("names"))
|
|
2118
|
-
else:
|
|
2119
|
-
raw_targets = [next((value for value in match.groupdict().values() if value), "") for match in IMPORT_PATH_RE.finditer(line)]
|
|
2120
|
-
for raw_target in raw_targets:
|
|
2121
|
-
target = resolve_import_target(raw_target, source, known)
|
|
2122
|
-
if target is None or target == source:
|
|
2123
|
-
continue
|
|
2124
|
-
edge = (source, target)
|
|
2125
|
-
if edge in seen:
|
|
2126
|
-
continue
|
|
2127
|
-
seen.add(edge)
|
|
2128
|
-
edges.append({"from": source, "to": target})
|
|
2129
|
-
if len(edges) >= MAX_REPO_MAP_FILES:
|
|
2130
|
-
return edges
|
|
2131
|
-
return edges
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
def repo_map_seed_paths(args: argparse.Namespace, suggest_payload: dict[str, Any], build_payload: dict[str, Any]) -> set[str]:
|
|
2135
|
-
seeds: set[str] = set()
|
|
2136
|
-
for raw in split_suggest_files(getattr(args, "files", None)):
|
|
2137
|
-
rel, _reason = lexical_rel(raw)
|
|
2138
|
-
if rel is not None:
|
|
2139
|
-
display, redacted = repo_map_display_rel_path(rel.as_posix())
|
|
2140
|
-
if not redacted:
|
|
2141
|
-
seeds.add(display)
|
|
2142
|
-
for source in suggest_payload.get("sources", []):
|
|
2143
|
-
if isinstance(source, dict) and isinstance(source.get("path"), str):
|
|
2144
|
-
seeds.add(source["path"])
|
|
2145
|
-
for source in build_payload.get("included_sources", []):
|
|
2146
|
-
if isinstance(source, dict) and isinstance(source.get("path"), str):
|
|
2147
|
-
seeds.add(source["path"])
|
|
2148
|
-
return seeds
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
def build_graph_rank(
|
|
2152
|
-
records: list[dict[str, Any]],
|
|
2153
|
-
signatures: list[dict[str, Any]],
|
|
2154
|
-
edges: list[dict[str, str]],
|
|
2155
|
-
*,
|
|
2156
|
-
query_terms: set[str],
|
|
2157
|
-
seed_paths: set[str],
|
|
2158
|
-
secret_scan: dict[str, Any],
|
|
2159
|
-
) -> list[dict[str, Any]]:
|
|
2160
|
-
signature_paths = {str(item.get("path", "")) for item in signatures}
|
|
2161
|
-
secret_paths = {str(item.get("path", "")) for item in secret_scan.get("files_with_risks", []) if isinstance(item, dict)}
|
|
2162
|
-
degree: dict[str, int] = {}
|
|
2163
|
-
for edge in edges:
|
|
2164
|
-
degree[edge["from"]] = degree.get(edge["from"], 0) + 1
|
|
2165
|
-
degree[edge["to"]] = degree.get(edge["to"], 0) + 1
|
|
2166
|
-
ranked: list[dict[str, Any]] = []
|
|
2167
|
-
for record in records:
|
|
2168
|
-
path = str(record.get("path", ""))
|
|
2169
|
-
text = str(record.get("text", "")).lower()
|
|
2170
|
-
components = {
|
|
2171
|
-
"seed": 1000 if path in seed_paths else 0,
|
|
2172
|
-
"query_path": suggest_score_path(path, query_terms),
|
|
2173
|
-
"query_content": min(500, 25 * sum(text.count(term) for term in query_terms)),
|
|
2174
|
-
"signature": 80 if path in signature_paths else 0,
|
|
2175
|
-
"graph_degree": 25 * degree.get(path, 0),
|
|
2176
|
-
"secret_risk_penalty": -25 if path in secret_paths else 0,
|
|
2177
|
-
}
|
|
2178
|
-
score = sum(components.values())
|
|
2179
|
-
if score <= 0:
|
|
2180
|
-
continue
|
|
2181
|
-
ranked.append({
|
|
2182
|
-
"path": path,
|
|
2183
|
-
"score": score,
|
|
2184
|
-
"components": components,
|
|
2185
|
-
"explain_only": True,
|
|
2186
|
-
"line_count": int(record.get("line_count", 0) or 0),
|
|
2187
|
-
})
|
|
2188
|
-
ranked.sort(key=lambda item: (-int(item["score"]), str(item["path"])))
|
|
2189
|
-
return ranked[:MAX_REPO_MAP_GRAPH_RANK_ENTRIES]
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
def repo_map_retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacted_path: bool) -> tuple[str | None, str | None]:
|
|
2193
|
-
if redacted_path:
|
|
2194
|
-
return None, "redacted_path"
|
|
2195
|
-
safe_root = safe_repo_map_root_arg_for_retrieval(root_arg)
|
|
2196
|
-
if safe_root is None:
|
|
2197
|
-
return None, "unsafe_root_path"
|
|
2198
|
-
return retrieval_cli(safe_root, display_path, lines), None
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
def repo_map_retrieval(
|
|
2202
|
-
record_by_path: dict[str, dict[str, Any]],
|
|
2203
|
-
signatures: list[dict[str, Any]],
|
|
2204
|
-
graph_rank: list[dict[str, Any]],
|
|
2205
|
-
*,
|
|
2206
|
-
root_arg: str,
|
|
2207
|
-
) -> list[dict[str, Any]]:
|
|
2208
|
-
out: list[dict[str, Any]] = []
|
|
2209
|
-
seen: set[tuple[str, str, str]] = set()
|
|
2210
|
-
|
|
2211
|
-
def add(path: str, line_range: LineRange, source: str, name: str | None = None) -> None:
|
|
2212
|
-
record = record_by_path.get(path)
|
|
2213
|
-
if record is None:
|
|
2214
|
-
return
|
|
2215
|
-
retrieval, reason = repo_map_retrieval_for(root_arg, path, line_range, redacted_path=bool(record.get("redacted_path")))
|
|
2216
|
-
key = (path, line_range.identity(), source)
|
|
2217
|
-
if key in seen:
|
|
2218
|
-
return
|
|
2219
|
-
seen.add(key)
|
|
2220
|
-
item: dict[str, Any] = {"path": path, "source": source, "lines": line_range.as_dict()}
|
|
2221
|
-
if retrieval:
|
|
2222
|
-
item["slice_cli"] = retrieval
|
|
2223
|
-
elif reason:
|
|
2224
|
-
item["retrieval_omitted_reason"] = reason
|
|
2225
|
-
if name and retrieval and Path(path).suffix.lower() in SYMBOL_HINT_EXTENSIONS:
|
|
2226
|
-
item["symbol_cli"] = " ".join(shlex.quote(part) for part in ["context-guard-read-symbol", "--json", path, name])
|
|
2227
|
-
out.append(item)
|
|
2228
|
-
|
|
2229
|
-
for signature in signatures:
|
|
2230
|
-
lines = signature.get("lines")
|
|
2231
|
-
if isinstance(lines, dict):
|
|
2232
|
-
try:
|
|
2233
|
-
line_range = LineRange(int(lines.get("start")), int(lines.get("end")))
|
|
2234
|
-
except (TypeError, ValueError):
|
|
2235
|
-
continue
|
|
2236
|
-
add(str(signature.get("path", "")), line_range, "signature", str(signature.get("name", "")) or None)
|
|
2237
|
-
if len(out) >= MAX_REPO_MAP_RETRIEVAL_HINTS:
|
|
2238
|
-
return out[:MAX_REPO_MAP_RETRIEVAL_HINTS]
|
|
2239
|
-
for item in graph_rank:
|
|
2240
|
-
path = str(item.get("path", ""))
|
|
2241
|
-
record = record_by_path.get(path)
|
|
2242
|
-
if record is None:
|
|
2243
|
-
continue
|
|
2244
|
-
total = int(record.get("line_count", 0) or 1)
|
|
2245
|
-
add(path, LineRange(1, min(total, 80)), "graph_rank")
|
|
2246
|
-
if len(out) >= MAX_REPO_MAP_RETRIEVAL_HINTS:
|
|
2247
|
-
break
|
|
2248
|
-
return out[:MAX_REPO_MAP_RETRIEVAL_HINTS]
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
def build_repo_map_payload(
|
|
2252
|
-
root: Path,
|
|
2253
|
-
args: argparse.Namespace,
|
|
2254
|
-
suggest_payload: dict[str, Any],
|
|
2255
|
-
build_payload: dict[str, Any],
|
|
2256
|
-
*,
|
|
2257
|
-
root_arg: str,
|
|
2258
|
-
) -> dict[str, Any]:
|
|
2259
|
-
records, omitted, caps = repo_map_records(root)
|
|
2260
|
-
record_by_path = {str(record["path"]): record for record in records}
|
|
2261
|
-
signatures = extract_signatures(records)
|
|
2262
|
-
secret_scan = build_secret_scan(records)
|
|
2263
|
-
edges = collect_import_edges(records)
|
|
2264
|
-
query_terms = suggest_tokens(str(suggest_payload.get("query", "")))
|
|
2265
|
-
graph_rank = build_graph_rank(
|
|
2266
|
-
records,
|
|
2267
|
-
signatures,
|
|
2268
|
-
edges,
|
|
2269
|
-
query_terms=query_terms,
|
|
2270
|
-
seed_paths=repo_map_seed_paths(args, suggest_payload, build_payload),
|
|
2271
|
-
secret_scan=secret_scan,
|
|
2272
|
-
)
|
|
2273
|
-
retrieval = repo_map_retrieval(record_by_path, signatures, graph_rank, root_arg=root_arg)
|
|
2274
|
-
tree = build_token_tree(records)
|
|
2275
|
-
total_bytes = sum(int(record.get("bytes", 0) or 0) for record in records)
|
|
2276
|
-
return {
|
|
2277
|
-
"schema_version": REPO_MAP_SCHEMA_VERSION,
|
|
2278
|
-
"summary": {
|
|
2279
|
-
"files_scanned": len(records),
|
|
2280
|
-
"files_capped": bool(caps["files_capped"]),
|
|
2281
|
-
"bytes_per_file_capped_count": int(caps["bytes_per_file_capped_count"]),
|
|
2282
|
-
"tree_bytes": total_bytes,
|
|
2283
|
-
"tree_token_proxy": sum(int(item.get("token_proxy", 0) or 0) for item in tree),
|
|
2284
|
-
"signature_files": len({str(item.get("path", "")) for item in signatures}),
|
|
2285
|
-
"signature_count": len(signatures),
|
|
2286
|
-
"secret_risk_files": len(secret_scan.get("files_with_risks", [])),
|
|
2287
|
-
"graph_edges": len(edges),
|
|
2288
|
-
},
|
|
2289
|
-
"caps": caps,
|
|
2290
|
-
"token_tree": tree,
|
|
2291
|
-
"secret_scan": secret_scan,
|
|
2292
|
-
"signature_index": signatures,
|
|
2293
|
-
"graph": {
|
|
2294
|
-
"edges": edges[:MAX_REPO_MAP_GRAPH_RANK_ENTRIES],
|
|
2295
|
-
"edges_omitted_by_cap": max(0, len(edges) - MAX_REPO_MAP_GRAPH_RANK_ENTRIES),
|
|
2296
|
-
},
|
|
2297
|
-
"graph_rank": graph_rank,
|
|
2298
|
-
"retrieval": retrieval,
|
|
2299
|
-
"omitted_files": omitted[:MAX_REPO_MAP_TREE_ENTRIES],
|
|
2300
|
-
"safety": {
|
|
2301
|
-
"deterministic_local_only": True,
|
|
2302
|
-
"no_network": True,
|
|
2303
|
-
"no_model_or_embedding": True,
|
|
2304
|
-
"explain_only": True,
|
|
2305
|
-
"redacted_before_output": True,
|
|
2306
|
-
"tree_sitter": {"status": "unavailable_without_optional_dependency", "fallback": "python_ast_and_regex_signatures"},
|
|
2307
|
-
"caveats": [
|
|
2308
|
-
"Repo-map bytes are local sampled UTF-8 bytes and estimated chars_div_4 token proxies, not provider-token or savings claims.",
|
|
2309
|
-
"Graph ranking is deterministic explain metadata only; it does not change pack selection in this stage.",
|
|
2310
|
-
],
|
|
2311
|
-
},
|
|
2312
|
-
}
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
def build_auto_explain_payload(
|
|
2316
|
-
args: argparse.Namespace,
|
|
2317
|
-
suggest_payload: dict[str, Any],
|
|
2318
|
-
build_payload: dict[str, Any],
|
|
2319
|
-
payload: dict[str, Any],
|
|
2320
|
-
*,
|
|
2321
|
-
root: Path | None = None,
|
|
2322
|
-
root_arg: str = ".",
|
|
2323
|
-
) -> dict[str, Any]:
|
|
2324
|
-
build_sources = [
|
|
2325
|
-
item
|
|
2326
|
-
for item in build_payload.get("included_sources", [])
|
|
2327
|
-
if isinstance(item, dict)
|
|
2328
|
-
]
|
|
2329
|
-
used_build_indexes: set[int] = set()
|
|
2330
|
-
suggest_sources = [
|
|
2331
|
-
item
|
|
2332
|
-
for item in suggest_payload.get("sources", [])
|
|
2333
|
-
if isinstance(item, dict)
|
|
2334
|
-
]
|
|
2335
|
-
exact_matches: dict[int, dict[str, Any]] = {}
|
|
2336
|
-
for index, item in enumerate(suggest_sources):
|
|
2337
|
-
build_item = find_exact_build_source_for_explain(item, build_sources, used_build_indexes)
|
|
2338
|
-
if build_item is not None:
|
|
2339
|
-
exact_matches[index] = build_item
|
|
2340
|
-
|
|
2341
|
-
selection: list[dict[str, Any]] = []
|
|
2342
|
-
for index, item in enumerate(suggest_sources):
|
|
2343
|
-
entry = copy_explain_fields(
|
|
2344
|
-
item,
|
|
2345
|
-
("path", "score", "priority", "reason", "label", "lines", "bytes", "retrieval_cli", "retrieval_omitted_reason"),
|
|
2346
|
-
)
|
|
2347
|
-
build_item = exact_matches.get(index)
|
|
2348
|
-
if build_item is None:
|
|
2349
|
-
build_item = find_fallback_build_source_for_explain(item, build_sources, used_build_indexes)
|
|
2350
|
-
if build_item is not None:
|
|
2351
|
-
entry["build_status"] = build_item.get("status", "included")
|
|
2352
|
-
for key in ("requested_lines", "included_lines"):
|
|
2353
|
-
if key in build_item:
|
|
2354
|
-
entry[key] = copy.deepcopy(build_item[key])
|
|
2355
|
-
if "bytes" in build_item:
|
|
2356
|
-
entry["build_bytes"] = build_item["bytes"]
|
|
2357
|
-
else:
|
|
2358
|
-
entry["build_status"] = "not_built"
|
|
2359
|
-
selection.append(entry)
|
|
2360
|
-
|
|
2361
|
-
omissions: list[dict[str, Any]] = []
|
|
2362
|
-
seen_omissions: set[tuple[str, str, str, str, str]] = set()
|
|
2363
|
-
omission_fields = (
|
|
2364
|
-
"path",
|
|
2365
|
-
"status",
|
|
2366
|
-
"reason",
|
|
2367
|
-
"suggest_reason",
|
|
2368
|
-
"priority",
|
|
2369
|
-
"label",
|
|
2370
|
-
"requested_lines",
|
|
2371
|
-
"included_lines",
|
|
2372
|
-
"lines",
|
|
2373
|
-
"total_lines",
|
|
2374
|
-
"retrieval_cli",
|
|
2375
|
-
"retrieval_omitted_reason",
|
|
2376
|
-
"input_index",
|
|
2377
|
-
)
|
|
2378
|
-
for phase, source in (("suggest", suggest_payload), ("build", build_payload)):
|
|
2379
|
-
for item in source.get("omitted_sources", []):
|
|
2380
|
-
if not isinstance(item, dict):
|
|
2381
|
-
continue
|
|
2382
|
-
entry = copy_explain_fields(item, omission_fields)
|
|
2383
|
-
entry["phase"] = phase
|
|
2384
|
-
key = explain_omission_key(entry)
|
|
2385
|
-
if key in seen_omissions:
|
|
2386
|
-
continue
|
|
2387
|
-
seen_omissions.add(key)
|
|
2388
|
-
omissions.append(entry)
|
|
2389
|
-
omissions.sort(key=explain_omission_key)
|
|
2390
|
-
|
|
2391
|
-
build_source_counts = build_payload.get("sources", {}) if isinstance(build_payload.get("sources"), dict) else {}
|
|
2392
|
-
auto_source_counts = payload.get("sources", {}) if isinstance(payload.get("sources"), dict) else {}
|
|
2393
|
-
artifact = build_payload.get("artifact", {}) if isinstance(build_payload.get("artifact"), dict) else {}
|
|
2394
|
-
pack_bytes = int(payload.get("pack_bytes", build_payload.get("pack_bytes", 0)) or 0)
|
|
2395
|
-
budget_bytes = int(payload.get("budget_bytes", build_payload.get("budget_bytes", 0)) or 0)
|
|
2396
|
-
budget_omitted_count = sum(1 for item in omissions if item.get("reason") == "budget_exhausted")
|
|
2397
|
-
explicit_files = split_suggest_files(args.files)
|
|
2398
|
-
query = str(suggest_payload.get("query", ""))
|
|
2399
|
-
diff_label = cap_label(args.diff) if getattr(args, "diff", None) else None
|
|
2400
|
-
explain = {
|
|
2401
|
-
"schema_version": AUTO_EXPLAIN_SCHEMA_VERSION,
|
|
2402
|
-
"summary": {
|
|
2403
|
-
"suggested": int(auto_source_counts.get("suggested", len(selection)) or 0),
|
|
2404
|
-
"included": int(auto_source_counts.get("included", build_source_counts.get("included", 0)) or 0),
|
|
2405
|
-
"partial": int(auto_source_counts.get("partial", build_source_counts.get("partial", 0)) or 0),
|
|
2406
|
-
"omitted": int(auto_source_counts.get("omitted", build_source_counts.get("omitted", 0)) or 0),
|
|
2407
|
-
"suggest_omitted": len([item for item in suggest_payload.get("omitted_sources", []) if isinstance(item, dict)]),
|
|
2408
|
-
"explain_omissions": len(omissions),
|
|
2409
|
-
"pack_bytes": pack_bytes,
|
|
2410
|
-
"budget_bytes": budget_bytes,
|
|
2411
|
-
"manifest_written": bool(payload.get("manifest_path")),
|
|
2412
|
-
"pack_written": bool(payload.get("pack_path")),
|
|
2413
|
-
"artifact_stored": bool(artifact.get("stored")),
|
|
2414
|
-
"artifact_capped": bool(artifact.get("capped")),
|
|
2415
|
-
},
|
|
2416
|
-
"inputs": {
|
|
2417
|
-
"query": query,
|
|
2418
|
-
"query_present": bool(query),
|
|
2419
|
-
"diff": diff_label,
|
|
2420
|
-
"diff_present": bool(diff_label),
|
|
2421
|
-
"explicit_file_count": len(explicit_files),
|
|
2422
|
-
"output_count": len(args.output or []),
|
|
2423
|
-
"test_output_count": len(args.test_output or []),
|
|
2424
|
-
"top": bounded_int(args.top, DEFAULT_SUGGEST_TOP, 1, MAX_SUGGEST_TOP),
|
|
2425
|
-
"context_lines": bounded_int(args.context_lines, DEFAULT_SUGGEST_CONTEXT_LINES, 0, MAX_SUGGEST_CONTEXT_LINES),
|
|
2426
|
-
"no_artifact": bool(args.no_artifact),
|
|
2427
|
-
"manifest_path": payload.get("manifest_path"),
|
|
2428
|
-
"pack_path": payload.get("pack_path"),
|
|
2429
|
-
},
|
|
2430
|
-
"selection": selection,
|
|
2431
|
-
"omissions": omissions,
|
|
2432
|
-
"budget": {
|
|
2433
|
-
"pack_bytes": pack_bytes,
|
|
2434
|
-
"budget_bytes": budget_bytes,
|
|
2435
|
-
"remaining_bytes": budget_bytes - pack_bytes,
|
|
2436
|
-
"partial_count": int(build_source_counts.get("partial", 0) or 0),
|
|
2437
|
-
"budget_omitted_count": budget_omitted_count,
|
|
2438
|
-
"token_proxy": copy.deepcopy(payload.get("token_proxy", {})),
|
|
2439
|
-
"measurement": "observed_bytes_estimated_tokens",
|
|
2440
|
-
"caveat": "Byte counts are observed pack bytes; token counts are estimated chars_div_4 proxies, not provider-token savings.",
|
|
2441
|
-
},
|
|
2442
|
-
"safety": {
|
|
2443
|
-
"redaction": copy.deepcopy(build_payload.get("redaction", {})),
|
|
2444
|
-
"caveats": copy.deepcopy(payload.get("caveats", [])),
|
|
2445
|
-
"deterministic_local_only": True,
|
|
2446
|
-
"raw_output_embedded": False,
|
|
2447
|
-
"raw_test_output_embedded": False,
|
|
2448
|
-
},
|
|
2449
|
-
}
|
|
2450
|
-
if root is not None:
|
|
2451
|
-
explain["repo_map"] = build_repo_map_payload(root, args, suggest_payload, build_payload, root_arg=root_arg)
|
|
2452
|
-
return explain
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
def auto_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[dict[str, Any], int]:
|
|
2456
|
-
manifest_rel = output_rel_for_collision_check(args.manifest_out, "--manifest-out") if args.manifest_out else None
|
|
2457
|
-
pack_rel = output_rel_for_collision_check(args.pack_out, "--pack-out") if args.pack_out else None
|
|
2458
|
-
if manifest_rel is not None and pack_rel is not None:
|
|
2459
|
-
reject_matching_output_targets(
|
|
2460
|
-
root,
|
|
2461
|
-
first_rel=manifest_rel,
|
|
2462
|
-
second_rel=pack_rel,
|
|
2463
|
-
second_option="--pack-out",
|
|
2464
|
-
reason="same_as_manifest_out",
|
|
2465
|
-
)
|
|
2466
|
-
if args.manifest_out:
|
|
2467
|
-
validate_output_path_under_root(root, args.manifest_out, "--manifest-out")
|
|
2468
|
-
if args.pack_out:
|
|
2469
|
-
validate_output_path_under_root(root, args.pack_out, "--pack-out")
|
|
2470
|
-
suggest_args = copy.copy(args)
|
|
2471
|
-
suggest_args.manifest_out = None
|
|
2472
|
-
suggest_payload, rc = suggest_pack(root, suggest_args, root_arg=root_arg)
|
|
2473
|
-
manifest = suggest_payload["manifest"]
|
|
2474
|
-
specs = manifest_to_source_specs(manifest)
|
|
2475
|
-
budget = bounded_int(args.budget_bytes, DEFAULT_BUDGET_BYTES, MIN_BUDGET_BYTES, MAX_BUDGET_BYTES)
|
|
2476
|
-
build_payload = build_pack(root, specs, budget_bytes=budget, root_arg=root_arg, store_artifact=False)
|
|
2477
|
-
if not args.no_artifact:
|
|
2478
|
-
receipt_rel = Path(PACK_DIR) / f"{build_payload['pack_id']}.json"
|
|
2479
|
-
if manifest_rel is not None:
|
|
2480
|
-
reject_matching_output_targets(
|
|
2481
|
-
root,
|
|
2482
|
-
first_rel=receipt_rel,
|
|
2483
|
-
second_rel=manifest_rel,
|
|
2484
|
-
second_option="--manifest-out",
|
|
2485
|
-
reason="same_as_artifact_receipt",
|
|
2486
|
-
)
|
|
2487
|
-
if pack_rel is not None:
|
|
2488
|
-
reject_matching_output_targets(
|
|
2489
|
-
root,
|
|
2490
|
-
first_rel=receipt_rel,
|
|
2491
|
-
second_rel=pack_rel,
|
|
2492
|
-
second_option="--pack-out",
|
|
2493
|
-
reason="same_as_artifact_receipt",
|
|
2494
|
-
)
|
|
2495
|
-
manifest_path: str | None = None
|
|
2496
|
-
pack_path: str | None = None
|
|
2497
|
-
if args.pack_out:
|
|
2498
|
-
pack_path = write_text_under_root(root, args.pack_out, str(build_payload["pack"]), "--pack-out")
|
|
2499
|
-
if args.manifest_out:
|
|
2500
|
-
manifest_path = write_manifest_under_root(root, args.manifest_out, manifest)
|
|
2501
|
-
if not args.no_artifact:
|
|
2502
|
-
build_payload["artifact"] = store_receipt(root, build_payload)
|
|
2503
|
-
build_hint, build_hint_omitted_reason = suggest_build_hint(root_arg, manifest_path, budget)
|
|
2504
|
-
suggest_payload["manifest_path"] = manifest_path
|
|
2505
|
-
suggest_payload["build_hint"] = build_hint
|
|
2506
|
-
suggest_payload.pop("build_hint_omitted_reason", None)
|
|
2507
|
-
if build_hint_omitted_reason:
|
|
2508
|
-
suggest_payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
2509
|
-
payload: dict[str, Any] = {
|
|
2510
|
-
"tool": TOOL_NAME,
|
|
2511
|
-
"schema_version": AUTO_SCHEMA_VERSION,
|
|
2512
|
-
"version": VERSION,
|
|
2513
|
-
"mode": "auto",
|
|
2514
|
-
"root": display_root(root),
|
|
2515
|
-
"query": suggest_payload.get("query", ""),
|
|
2516
|
-
"budget_bytes": budget,
|
|
2517
|
-
"manifest": manifest,
|
|
2518
|
-
"manifest_path": manifest_path,
|
|
2519
|
-
"pack_path": pack_path,
|
|
2520
|
-
"suggest": suggest_payload,
|
|
2521
|
-
"build": build_payload,
|
|
2522
|
-
"sources": {
|
|
2523
|
-
"suggested": len(suggest_payload.get("sources", [])),
|
|
2524
|
-
"included": build_payload.get("sources", {}).get("included", 0),
|
|
2525
|
-
"partial": build_payload.get("sources", {}).get("partial", 0),
|
|
2526
|
-
"omitted": build_payload.get("sources", {}).get("omitted", 0),
|
|
2527
|
-
},
|
|
2528
|
-
"pack_bytes": build_payload.get("pack_bytes", 0),
|
|
2529
|
-
"token_proxy": build_payload.get("token_proxy", {}),
|
|
2530
|
-
"caveats": [
|
|
2531
|
-
"Deterministic local heuristics only; no model, network, embedding, or provider-cost estimate is used.",
|
|
2532
|
-
"Byte and token values are pack-size proxies, not billing claims.",
|
|
2533
|
-
],
|
|
2534
|
-
}
|
|
2535
|
-
if build_hint_omitted_reason:
|
|
2536
|
-
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
2537
|
-
if args.explain:
|
|
2538
|
-
payload["explain"] = build_auto_explain_payload(args, suggest_payload, build_payload, payload, root=root, root_arg=root_arg)
|
|
2539
|
-
return payload, rc
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
def print_suggest_text(payload: dict[str, Any]) -> None:
|
|
2543
|
-
print(
|
|
2544
|
-
f"context-guard-pack suggest: {len(payload['sources'])} source(s), "
|
|
2545
|
-
f"estimated {payload['estimated_pack_bytes']}/{payload['budget_bytes']} bytes"
|
|
2546
|
-
)
|
|
2547
|
-
for item in payload["sources"]:
|
|
2548
|
-
lines = item.get("lines")
|
|
2549
|
-
line_text = f":{lines['start']}:{lines['end']}" if isinstance(lines, dict) else ""
|
|
2550
|
-
print(f"- {item['path']}{line_text} priority={item['priority']} reason={item['reason']}")
|
|
2551
|
-
if payload.get("manifest_path"):
|
|
2552
|
-
print(f"manifest: {payload['manifest_path']}")
|
|
2553
|
-
if payload.get("build_hint"):
|
|
2554
|
-
print(f"build: {payload['build_hint']}")
|
|
2555
|
-
elif payload.get("build_hint_omitted_reason"):
|
|
2556
|
-
print(f"build hint omitted: {payload['build_hint_omitted_reason']}")
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
def print_auto_text(payload: dict[str, Any]) -> None:
|
|
2560
|
-
print(
|
|
2561
|
-
f"context-guard-pack auto: {payload['sources']['suggested']} suggested source(s), "
|
|
2562
|
-
f"pack {payload['pack_bytes']}/{payload['budget_bytes']} bytes"
|
|
2563
|
-
)
|
|
2564
|
-
explain = payload.get("explain")
|
|
2565
|
-
if isinstance(explain, dict):
|
|
2566
|
-
summary = explain.get("summary", {}) if isinstance(explain.get("summary"), dict) else {}
|
|
2567
|
-
budget = explain.get("budget", {}) if isinstance(explain.get("budget"), dict) else {}
|
|
2568
|
-
print(
|
|
2569
|
-
"explain: "
|
|
2570
|
-
f"selected={summary.get('suggested', 0)} "
|
|
2571
|
-
f"included={summary.get('included', 0)} "
|
|
2572
|
-
f"partial={summary.get('partial', 0)} "
|
|
2573
|
-
f"omitted={summary.get('omitted', 0)} "
|
|
2574
|
-
f"budget={budget.get('pack_bytes', payload.get('pack_bytes', 0))}/{budget.get('budget_bytes', payload.get('budget_bytes', 0))} "
|
|
2575
|
-
"heuristic=local"
|
|
2576
|
-
)
|
|
2577
|
-
for item in (explain.get("selection", []) if isinstance(explain.get("selection"), list) else [])[:5]:
|
|
2578
|
-
if not isinstance(item, dict):
|
|
2579
|
-
continue
|
|
2580
|
-
lines = item.get("included_lines") or item.get("lines")
|
|
2581
|
-
if isinstance(lines, dict):
|
|
2582
|
-
line_text = f":{lines.get('start')}:{lines.get('end')}"
|
|
2583
|
-
else:
|
|
2584
|
-
line_text = ""
|
|
2585
|
-
print(
|
|
2586
|
-
f"- {item.get('path')}{line_text} "
|
|
2587
|
-
f"status={item.get('build_status', 'unknown')} "
|
|
2588
|
-
f"score={item.get('score', item.get('priority', 0))} "
|
|
2589
|
-
f"reason={item.get('reason', 'local heuristic')}"
|
|
2590
|
-
)
|
|
2591
|
-
omissions = explain.get("omissions", []) if isinstance(explain.get("omissions"), list) else []
|
|
2592
|
-
if omissions:
|
|
2593
|
-
reason_counts: dict[str, int] = {}
|
|
2594
|
-
for item in omissions:
|
|
2595
|
-
if not isinstance(item, dict):
|
|
2596
|
-
continue
|
|
2597
|
-
reason = str(item.get("reason", "unknown"))
|
|
2598
|
-
reason_counts[reason] = reason_counts.get(reason, 0) + 1
|
|
2599
|
-
reason_text = ", ".join(f"{reason}={count}" for reason, count in sorted(reason_counts.items()))
|
|
2600
|
-
print(f"omitted reasons: {reason_text}")
|
|
2601
|
-
if payload.get("manifest_path"):
|
|
2602
|
-
print(f"manifest: {payload['manifest_path']}")
|
|
2603
|
-
if payload.get("pack_path"):
|
|
2604
|
-
print(f"pack: {payload['pack_path']}")
|
|
2605
|
-
else:
|
|
2606
|
-
print()
|
|
2607
|
-
sys.stdout.write(str(payload["build"]["pack"]))
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
def build_parser() -> argparse.ArgumentParser:
|
|
2611
|
-
parser = argparse.ArgumentParser(description="Build budgeted local context packs with exact retrieval hints.")
|
|
2612
|
-
sub = parser.add_subparsers(dest="command", required=True)
|
|
2613
|
-
build = sub.add_parser("build", help="assemble a prioritized context pack")
|
|
2614
|
-
build.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
2615
|
-
build.add_argument("--manifest", help="JSON manifest with version/sources")
|
|
2616
|
-
build.add_argument("--source", action="append", help="source spec: path=REL[,priority=N][,lines=A:B][,label=TEXT]")
|
|
2617
|
-
build.add_argument("--budget-bytes", type=int, default=DEFAULT_BUDGET_BYTES)
|
|
2618
|
-
build.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2619
|
-
build.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
|
|
2620
|
-
slice_cmd = sub.add_parser("slice", help="retrieve an exact sanitized file slice")
|
|
2621
|
-
slice_cmd.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
2622
|
-
slice_cmd.add_argument("--path", required=True, help="relative file path under root")
|
|
2623
|
-
slice_cmd.add_argument("--lines", required=True, help="inclusive 1-indexed START:END")
|
|
2624
|
-
slice_cmd.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2625
|
-
suggest = sub.add_parser("suggest", help="suggest a build-compatible context pack manifest from local signals")
|
|
2626
|
-
suggest.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
2627
|
-
suggest.add_argument("--query", default="", help="task or question to match against local files")
|
|
2628
|
-
suggest.add_argument("--diff", help="git diff range, or staged/worktree, to seed changed-file ranges")
|
|
2629
|
-
suggest.add_argument("--files", "--file", dest="files", action="append", help="explicit relative file path(s), comma-separated or repeated")
|
|
2630
|
-
suggest.add_argument("--output", action="append", help="relative path to sanitized command output text under root")
|
|
2631
|
-
suggest.add_argument("--test-output", action="append", help="relative path to sanitized test output text under root")
|
|
2632
|
-
suggest.add_argument("--budget-bytes", type=int, default=DEFAULT_BUDGET_BYTES)
|
|
2633
|
-
suggest.add_argument("--top", type=int, default=DEFAULT_SUGGEST_TOP, help="maximum suggested sources")
|
|
2634
|
-
suggest.add_argument("--context-lines", type=int, default=DEFAULT_SUGGEST_CONTEXT_LINES, help="line context around diff/output hits")
|
|
2635
|
-
suggest.add_argument("--manifest-out", help="write the suggested build manifest to this relative path under root")
|
|
2636
|
-
suggest.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2637
|
-
auto = sub.add_parser("auto", help="suggest a context pack manifest and build the budgeted pack in one local step")
|
|
2638
|
-
auto.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
2639
|
-
auto.add_argument("--query", default="", help="task or question to match against local files")
|
|
2640
|
-
auto.add_argument("--diff", help="git diff range, or staged/worktree, to seed changed-file ranges")
|
|
2641
|
-
auto.add_argument("--files", "--file", dest="files", action="append", help="explicit relative file path(s), comma-separated or repeated")
|
|
2642
|
-
auto.add_argument("--output", action="append", help="relative path to sanitized command output text under root")
|
|
2643
|
-
auto.add_argument("--test-output", action="append", help="relative path to sanitized test output text under root")
|
|
2644
|
-
auto.add_argument("--budget-bytes", type=int, default=DEFAULT_BUDGET_BYTES)
|
|
2645
|
-
auto.add_argument("--top", type=int, default=DEFAULT_SUGGEST_TOP, help="maximum suggested sources")
|
|
2646
|
-
auto.add_argument("--context-lines", type=int, default=DEFAULT_SUGGEST_CONTEXT_LINES, help="line context around diff/output hits")
|
|
2647
|
-
auto.add_argument("--manifest-out", help="write the suggested build manifest to this relative path under root")
|
|
2648
|
-
auto.add_argument("--pack-out", help="write the built Markdown pack to this relative path under root")
|
|
2649
|
-
auto.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2650
|
-
auto.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
|
|
2651
|
-
auto.add_argument("--explain", action="store_true", help="include deterministic local selection/build explanation metadata")
|
|
2652
|
-
return parser
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
def main(argv: list[str] | None = None) -> int:
|
|
2656
|
-
parser = build_parser()
|
|
2657
|
-
args = parser.parse_args(argv)
|
|
2658
|
-
try:
|
|
2659
|
-
root = normalize_root(Path(args.root))
|
|
2660
|
-
if args.command == "build":
|
|
2661
|
-
specs = parse_all_sources(args)
|
|
2662
|
-
if not specs:
|
|
2663
|
-
raise PackError("provide --manifest or --source")
|
|
2664
|
-
budget = bounded_int(args.budget_bytes, DEFAULT_BUDGET_BYTES, MIN_BUDGET_BYTES, MAX_BUDGET_BYTES)
|
|
2665
|
-
result = build_pack(root, specs, budget_bytes=budget, root_arg=str(args.root), store_artifact=not args.no_artifact)
|
|
2666
|
-
if args.json:
|
|
2667
|
-
json.dump(result, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
2668
|
-
sys.stdout.write("\n")
|
|
2669
|
-
else:
|
|
2670
|
-
sys.stdout.write(str(result["pack"]))
|
|
2671
|
-
print(
|
|
2672
|
-
f"[context-guard-pack] pack_id={result['pack_id']} bytes={result['pack_bytes']}/{result['budget_bytes']} "
|
|
2673
|
-
f"included={result['sources']['included']} partial={result['sources']['partial']} omitted={result['sources']['omitted']}",
|
|
2674
|
-
file=sys.stderr,
|
|
2675
|
-
)
|
|
2676
|
-
return 0
|
|
2677
|
-
if args.command == "slice":
|
|
2678
|
-
lines = parse_line_range(args.lines)
|
|
2679
|
-
if lines is None:
|
|
2680
|
-
raise PackError("invalid_lines")
|
|
2681
|
-
payload, rc = slice_source(root, raw_path=args.path, lines=lines)
|
|
2682
|
-
if args.json:
|
|
2683
|
-
json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
2684
|
-
sys.stdout.write("\n")
|
|
2685
|
-
elif rc == 0:
|
|
2686
|
-
sys.stdout.write(str(payload.get("content", "")))
|
|
2687
|
-
else:
|
|
2688
|
-
print(f"context-guard-pack: {payload.get('reason')}", file=sys.stderr)
|
|
2689
|
-
return rc
|
|
2690
|
-
if args.command == "suggest":
|
|
2691
|
-
payload, rc = suggest_pack(root, args, root_arg=str(args.root))
|
|
2692
|
-
if args.json:
|
|
2693
|
-
json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
2694
|
-
sys.stdout.write("\n")
|
|
2695
|
-
else:
|
|
2696
|
-
print_suggest_text(payload)
|
|
2697
|
-
return rc
|
|
2698
|
-
if args.command == "auto":
|
|
2699
|
-
payload, rc = auto_pack(root, args, root_arg=str(args.root))
|
|
2700
|
-
if args.json:
|
|
2701
|
-
json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
2702
|
-
sys.stdout.write("\n")
|
|
2703
|
-
else:
|
|
2704
|
-
print_auto_text(payload)
|
|
2705
|
-
return rc
|
|
2706
|
-
raise PackError("unknown command")
|
|
2707
|
-
except PackError as exc:
|
|
2708
|
-
print(f"context-guard-pack: {exc}", file=sys.stderr)
|
|
2709
|
-
return 2
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
if __name__ == "__main__":
|
|
2713
|
-
raise SystemExit(main())
|