@ictechgy/context-guard 0.4.11 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.ko.md +19 -12
- package/README.md +11 -11
- package/package.json +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/bin/context-guard +42 -46
- package/plugins/context-guard/bin/context-guard-audit +3 -3
- package/plugins/context-guard/bin/context-guard-bench +136 -16
- package/plugins/context-guard/bin/context-guard-cache-score +29 -2
- package/plugins/context-guard/bin/context-guard-compress +89 -27
- package/plugins/context-guard/bin/context-guard-filter +88 -18
- package/plugins/context-guard/bin/context-guard-pack +28 -2
- package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +169 -6
- package/plugins/context-guard/bin/context-guard-setup +21 -5
- package/plugins/context-guard/bin/context-guard-tool-prune +48 -10
- package/plugins/context-guard/bin/context-guard-trim-output +109 -52
- package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
- package/plugins/context-guard/lib/context_guard_commands.py +4 -1
|
@@ -49,9 +49,17 @@ PRIVATE_KEY_END_RE = re.compile(
|
|
|
49
49
|
AUTH_HEADER_RE = re.compile(
|
|
50
50
|
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
|
|
51
51
|
)
|
|
52
|
+
COOKIE_HEADER_RE = re.compile(
|
|
53
|
+
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Set-)?Cookie\s*:\s*).+$"
|
|
54
|
+
)
|
|
55
|
+
SESSION_SECRET_KEY = (
|
|
56
|
+
r"(?:session(?:[_-]?(?:id|token))?|sessionid|sid|jsessionid|"
|
|
57
|
+
r"csrf(?:[_-]?token)?|xsrf(?:[_-]?token)?)"
|
|
58
|
+
)
|
|
52
59
|
SECRET_KEY = (
|
|
53
60
|
r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
|
|
54
61
|
r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
|
|
62
|
+
rf"|{SESSION_SECRET_KEY}"
|
|
55
63
|
r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
|
|
56
64
|
r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
|
|
57
65
|
)
|
|
@@ -61,11 +69,48 @@ INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
|
61
69
|
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
62
70
|
rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
|
|
63
71
|
)
|
|
72
|
+
CODE_IDENTIFIER = r"[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)*"
|
|
73
|
+
CALL_ARGUMENT_CHUNK = r"(?:[^()\"'\n;]+|\"(?:\\.|[^\"\\])*\"|'(?:\\.|[^'\\])*'|\([^()]*\))*"
|
|
74
|
+
INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE = re.compile(
|
|
75
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
76
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
77
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
78
|
+
rf"(?P<value>(?![\"']){CODE_IDENTIFIER}\({CALL_ARGUMENT_CHUNK}\))"
|
|
79
|
+
)
|
|
80
|
+
SECRET_IDENTIFIER_PART = (
|
|
81
|
+
r"(?:[A-Za-z_$][A-Za-z0-9_$]*(?:api_?key|apikey|token|secret|password|passwd|pwd|"
|
|
82
|
+
r"private_?key|access_?key|client_?secret|sessionid|session_id|session_token|"
|
|
83
|
+
r"csrf_token|xsrf_token)[A-Za-z0-9_$]*|session|sid|csrf|xsrf)"
|
|
84
|
+
)
|
|
85
|
+
FALLBACK_SECRET_OPERAND = rf"(?:[A-Za-z_$][A-Za-z0-9_$]*\.)*{SECRET_IDENTIFIER_PART}"
|
|
86
|
+
INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE = re.compile(
|
|
87
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
88
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
89
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
90
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])"
|
|
91
|
+
rf"[^;\n]*?(?:\bor\b|\|\||\?\?|\belse\b|\?[^:\n;]*:)\s*"
|
|
92
|
+
rf"(?:[\"'](?:\\.|[^\"'\\])*[\"']|{FALLBACK_SECRET_OPERAND})[^;\n]*)"
|
|
93
|
+
)
|
|
94
|
+
INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
95
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
96
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
97
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
98
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])"
|
|
99
|
+
rf"[^\s,;}}\]]*(?:\([^;\n]*?\)|\{{[^;\n]*?\}}|\[[^;\n]*?\])[^\s,;}}\]]*)"
|
|
100
|
+
)
|
|
64
101
|
INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
65
102
|
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
66
103
|
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
67
104
|
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
68
|
-
rf"(?P<value>[^\s,;}}\]]+)"
|
|
105
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])[^\s,;}}\]]+)"
|
|
106
|
+
)
|
|
107
|
+
UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
|
|
108
|
+
rf"(?i)(?:^|[\s;{{\[,])"
|
|
109
|
+
rf"(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
110
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<value>(?![\"']).*)$"
|
|
111
|
+
)
|
|
112
|
+
CONTINUATION_OPERATOR_RE = re.compile(
|
|
113
|
+
r"(?i)(?:\\|\|\||&&|\?\?|[+*/%&|^?,]|\?|:|\bor\b|\band\b|\belse\b)\s*(?://.*|#.*)?$"
|
|
69
114
|
)
|
|
70
115
|
URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
|
|
71
116
|
URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
|
|
@@ -80,6 +125,43 @@ SAFE_UNQUOTED_VALUES = {
|
|
|
80
125
|
"undefined",
|
|
81
126
|
}
|
|
82
127
|
IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
|
|
128
|
+
SAFE_ENV_LOOKUP_CALL_RE = re.compile(r"^(?:os\.getenv|os\.environ\.get)\(\s*[\"'][A-Za-z0-9_.-]{1,80}[\"']\s*\)$")
|
|
129
|
+
SAFE_RE_COMPILE_CALL_RE = re.compile(r"^re\.compile\([^;\n]*\)$")
|
|
130
|
+
SAFE_CODE_EXPRESSION_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\(\s*(?:{CODE_IDENTIFIER}(?:\s*,\s*{CODE_IDENTIFIER})*)?\s*\)$")
|
|
131
|
+
GETTER_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\.get\(\s*[\"'](?P<key>[A-Za-z0-9_.-]{{1,80}})[\"']\s*\)$")
|
|
132
|
+
CAMEL_ACRONYM_BOUNDARY_RE = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
|
|
133
|
+
CAMEL_WORD_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
|
|
134
|
+
SAFE_GETTER_KEY_NAMES = {
|
|
135
|
+
"access_key",
|
|
136
|
+
"access_token",
|
|
137
|
+
"api_key",
|
|
138
|
+
"apikey",
|
|
139
|
+
"auth",
|
|
140
|
+
"authorization",
|
|
141
|
+
"aws_access_key_id",
|
|
142
|
+
"aws_secret_access_key",
|
|
143
|
+
"aws_session_token",
|
|
144
|
+
"azure_client_secret",
|
|
145
|
+
"client_id",
|
|
146
|
+
"client_secret",
|
|
147
|
+
"cookie",
|
|
148
|
+
"credential",
|
|
149
|
+
"credentials",
|
|
150
|
+
"csrf",
|
|
151
|
+
"google_application_credentials",
|
|
152
|
+
"jwt",
|
|
153
|
+
"password",
|
|
154
|
+
"passwd",
|
|
155
|
+
"private_key",
|
|
156
|
+
"pwd",
|
|
157
|
+
"refresh_token",
|
|
158
|
+
"secret",
|
|
159
|
+
"session",
|
|
160
|
+
"session_id",
|
|
161
|
+
"sessionid",
|
|
162
|
+
"sid",
|
|
163
|
+
"token",
|
|
164
|
+
}
|
|
83
165
|
INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
|
84
166
|
(re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
85
167
|
(re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
@@ -171,20 +253,33 @@ def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
|
|
|
171
253
|
return body[:keep] + marker + newline, True
|
|
172
254
|
|
|
173
255
|
|
|
256
|
+
def normalize_getter_key(key: str) -> str:
|
|
257
|
+
key = CAMEL_ACRONYM_BOUNDARY_RE.sub("_", key)
|
|
258
|
+
key = CAMEL_WORD_BOUNDARY_RE.sub("_", key)
|
|
259
|
+
key = re.sub(r"[_.-]+", "_", key)
|
|
260
|
+
return re.sub(r"_+", "_", key).strip("_").lower()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def is_safe_getter_key(key: str) -> bool:
|
|
264
|
+
return normalize_getter_key(key) in SAFE_GETTER_KEY_NAMES
|
|
265
|
+
|
|
266
|
+
|
|
174
267
|
def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
|
|
175
268
|
value = match.group("value").strip()
|
|
269
|
+
prefix = match.group("prefix")
|
|
176
270
|
if not value:
|
|
177
271
|
return False
|
|
178
272
|
if value.lower() in SAFE_UNQUOTED_VALUES:
|
|
179
273
|
return False
|
|
180
274
|
if IDENTIFIER_CHAIN_RE.match(value):
|
|
181
275
|
return False
|
|
182
|
-
|
|
183
|
-
if end < len(line) and line[end] in "([{":
|
|
184
|
-
# Likely a function call or expression (`api_key = os.getenv(...)`);
|
|
185
|
-
# preserve it so Claude can still reason about code flow.
|
|
276
|
+
if SAFE_ENV_LOOKUP_CALL_RE.match(value) or SAFE_RE_COMPILE_CALL_RE.match(value):
|
|
186
277
|
return False
|
|
187
|
-
|
|
278
|
+
getter_match = GETTER_CALL_RE.match(value)
|
|
279
|
+
if re.search(r"\s[:=]\s*$", prefix) and (
|
|
280
|
+
SAFE_CODE_EXPRESSION_CALL_RE.match(value)
|
|
281
|
+
or (getter_match is not None and is_safe_getter_key(getter_match.group("key")))
|
|
282
|
+
):
|
|
188
283
|
return False
|
|
189
284
|
return True
|
|
190
285
|
|
|
@@ -218,6 +313,9 @@ def redact_secret_assignments(line: str) -> tuple[str, bool]:
|
|
|
218
313
|
return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
|
|
219
314
|
|
|
220
315
|
line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
|
|
316
|
+
line = INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
317
|
+
line = INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
318
|
+
line = INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
221
319
|
line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
222
320
|
return line, redacted
|
|
223
321
|
|
|
@@ -257,6 +355,54 @@ def detect_multiline_secret_assignment(line: str) -> str | None:
|
|
|
257
355
|
return None
|
|
258
356
|
|
|
259
357
|
|
|
358
|
+
def expression_bracket_delta(text: str) -> int:
|
|
359
|
+
delta = 0
|
|
360
|
+
quote: str | None = None
|
|
361
|
+
escaped = False
|
|
362
|
+
for char in text:
|
|
363
|
+
if quote is not None:
|
|
364
|
+
if escaped:
|
|
365
|
+
escaped = False
|
|
366
|
+
elif char == "\\":
|
|
367
|
+
escaped = True
|
|
368
|
+
elif char == quote:
|
|
369
|
+
quote = None
|
|
370
|
+
continue
|
|
371
|
+
if char in {"'", '"'}:
|
|
372
|
+
quote = char
|
|
373
|
+
elif char in "([{":
|
|
374
|
+
delta += 1
|
|
375
|
+
elif char in ")}]":
|
|
376
|
+
delta -= 1
|
|
377
|
+
return delta
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def ends_with_continuation_operator(text: str) -> bool:
|
|
381
|
+
return bool(CONTINUATION_OPERATOR_RE.search(text.rstrip()))
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def detect_multiline_secret_expression(line: str) -> int | None:
|
|
385
|
+
marker = UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE.search(line)
|
|
386
|
+
if marker is None:
|
|
387
|
+
return None
|
|
388
|
+
value = marker.group("value").strip()
|
|
389
|
+
if not value:
|
|
390
|
+
return 0
|
|
391
|
+
delta = expression_bracket_delta(value)
|
|
392
|
+
if delta > 0:
|
|
393
|
+
return delta
|
|
394
|
+
if ends_with_continuation_operator(value):
|
|
395
|
+
return max(delta, 0)
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def update_multiline_secret_expression_state(line: str, depth: int) -> int | None:
|
|
400
|
+
next_depth = max(0, depth + expression_bracket_delta(line))
|
|
401
|
+
if next_depth == 0 and not ends_with_continuation_operator(line):
|
|
402
|
+
return None
|
|
403
|
+
return next_depth
|
|
404
|
+
|
|
405
|
+
|
|
260
406
|
def private_key_state_after_line(line: str) -> bool | None:
|
|
261
407
|
"""Return updated private-key state for a line, or None when no marker appears."""
|
|
262
408
|
if PRIVATE_KEY_BEGIN_RE.search(line):
|
|
@@ -277,6 +423,7 @@ class LineSanitizer:
|
|
|
277
423
|
self.show_paths = show_paths
|
|
278
424
|
self.in_private_key_block = False
|
|
279
425
|
self.multiline_secret_quote: str | None = None
|
|
426
|
+
self.multiline_secret_expression_depth: int | None = None
|
|
280
427
|
self.redactions = 0
|
|
281
428
|
|
|
282
429
|
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
@@ -309,6 +456,12 @@ class LineSanitizer:
|
|
|
309
456
|
self.in_private_key_block = False
|
|
310
457
|
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
311
458
|
|
|
459
|
+
if self.multiline_secret_expression_depth is not None:
|
|
460
|
+
self.multiline_secret_expression_depth = update_multiline_secret_expression_state(
|
|
461
|
+
line, self.multiline_secret_expression_depth
|
|
462
|
+
)
|
|
463
|
+
return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
|
|
464
|
+
|
|
312
465
|
multiline_quote = detect_multiline_secret_assignment(line)
|
|
313
466
|
if multiline_quote is not None:
|
|
314
467
|
self.multiline_secret_quote = multiline_quote
|
|
@@ -323,11 +476,21 @@ class LineSanitizer:
|
|
|
323
476
|
self.in_private_key_block = True
|
|
324
477
|
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
325
478
|
|
|
479
|
+
expression_depth = detect_multiline_secret_expression(line)
|
|
480
|
+
if expression_depth is not None:
|
|
481
|
+
self.multiline_secret_expression_depth = expression_depth
|
|
482
|
+
return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
|
|
483
|
+
|
|
326
484
|
new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
|
|
327
485
|
if count:
|
|
328
486
|
redacted = True
|
|
329
487
|
line = new_line
|
|
330
488
|
|
|
489
|
+
new_line, count = COOKIE_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
|
|
490
|
+
if count:
|
|
491
|
+
redacted = True
|
|
492
|
+
line = new_line
|
|
493
|
+
|
|
331
494
|
line, assignment_redacted = redact_secret_assignments(line)
|
|
332
495
|
if assignment_redacted:
|
|
333
496
|
redacted = True
|
|
@@ -2210,6 +2210,25 @@ def backup_existing(path: Path) -> Path | None:
|
|
|
2210
2210
|
return backup
|
|
2211
2211
|
|
|
2212
2212
|
|
|
2213
|
+
def rollback_restore_guidance(settings_path: Path, backup_path: Path | None, original_existed: bool) -> str:
|
|
2214
|
+
if backup_path is not None:
|
|
2215
|
+
return (
|
|
2216
|
+
"Restore only with a no-follow, symlink-safe copy that opens the backup and target parent "
|
|
2217
|
+
"without following links, then atomically replaces the target; do not use generic shell "
|
|
2218
|
+
f"copy/delete commands on this mutable target. Backup: {backup_path}. Target: {settings_path}."
|
|
2219
|
+
)
|
|
2220
|
+
if original_existed:
|
|
2221
|
+
return (
|
|
2222
|
+
"No backup path was recorded; inspect the target with no-follow file operations before any "
|
|
2223
|
+
f"manual recovery. Do not use generic shell copy/delete commands on this mutable target: {settings_path}."
|
|
2224
|
+
)
|
|
2225
|
+
return (
|
|
2226
|
+
"The target did not exist before setup. If cleanup is required, verify the target and every parent "
|
|
2227
|
+
"without following symlinks and remove only the verified regular file; do not use generic shell "
|
|
2228
|
+
f"delete commands on this mutable target: {settings_path}."
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2231
|
+
|
|
2213
2232
|
def write_rollback_record(
|
|
2214
2233
|
*,
|
|
2215
2234
|
root: Path,
|
|
@@ -2237,11 +2256,8 @@ def write_rollback_record(
|
|
|
2237
2256
|
"target_path": str(settings_path),
|
|
2238
2257
|
"backup_path": str(backup_path) if backup_path else None,
|
|
2239
2258
|
"original_existed": original_existed,
|
|
2240
|
-
"restore": (
|
|
2241
|
-
|
|
2242
|
-
if backup_path
|
|
2243
|
-
else f"rm -f {shlex.quote(str(settings_path))}"
|
|
2244
|
-
),
|
|
2259
|
+
"restore": rollback_restore_guidance(settings_path, backup_path, original_existed),
|
|
2260
|
+
"restore_requires_no_follow": True,
|
|
2245
2261
|
}
|
|
2246
2262
|
atomic_write(rollback_path, json.dumps(record, indent=2, sort_keys=True) + "\n", 0o600)
|
|
2247
2263
|
return rollback_id, rollback_path
|
|
@@ -87,6 +87,8 @@ class Candidate:
|
|
|
87
87
|
index: int
|
|
88
88
|
score: float = 0.0
|
|
89
89
|
rank: int = 0
|
|
90
|
+
schema_bytes: int = 0
|
|
91
|
+
parameter_terms: frozenset[str] | None = None
|
|
90
92
|
|
|
91
93
|
|
|
92
94
|
def fail(message: str) -> NoReturn:
|
|
@@ -276,7 +278,15 @@ def tool_schema_from_dict(raw: dict[str, Any], *, fallback_name: str | None = No
|
|
|
276
278
|
schema["description"] = description
|
|
277
279
|
if server and "server" not in schema:
|
|
278
280
|
schema["server"] = server
|
|
279
|
-
return Candidate(
|
|
281
|
+
return Candidate(
|
|
282
|
+
name=name,
|
|
283
|
+
server=cap_text(server, MAX_LABEL_CHARS) if server else None,
|
|
284
|
+
description=description,
|
|
285
|
+
schema=schema,
|
|
286
|
+
index=index,
|
|
287
|
+
schema_bytes=byte_len_json(schema),
|
|
288
|
+
parameter_terms=frozenset(terms(" ".join(collect_parameter_text(schema)))),
|
|
289
|
+
)
|
|
280
290
|
|
|
281
291
|
|
|
282
292
|
def normalize_catalog(raw: Any) -> list[Candidate]:
|
|
@@ -362,7 +372,11 @@ def score_candidate(candidate: Candidate, query_terms: set[str]) -> float:
|
|
|
362
372
|
return 0.0
|
|
363
373
|
name_terms = terms(candidate.name)
|
|
364
374
|
desc_terms = terms(candidate.description)
|
|
365
|
-
parameter_terms =
|
|
375
|
+
parameter_terms = (
|
|
376
|
+
set(candidate.parameter_terms)
|
|
377
|
+
if candidate.parameter_terms is not None
|
|
378
|
+
else terms(" ".join(collect_parameter_text(candidate.schema)))
|
|
379
|
+
)
|
|
366
380
|
score = 0.0
|
|
367
381
|
score += 4.0 * len(query_terms & name_terms)
|
|
368
382
|
score += 1.5 * len(query_terms & desc_terms)
|
|
@@ -379,14 +393,38 @@ def rank_candidates(candidates: list[Candidate], query: str) -> list[Candidate]:
|
|
|
379
393
|
query_terms = terms(query)
|
|
380
394
|
scored: list[Candidate] = []
|
|
381
395
|
for cand in candidates:
|
|
382
|
-
scored.append(Candidate(
|
|
396
|
+
scored.append(Candidate(
|
|
397
|
+
cand.name,
|
|
398
|
+
cand.server,
|
|
399
|
+
cand.description,
|
|
400
|
+
cand.schema,
|
|
401
|
+
cand.index,
|
|
402
|
+
score_candidate(cand, query_terms),
|
|
403
|
+
0,
|
|
404
|
+
schema_bytes=cand.schema_bytes,
|
|
405
|
+
parameter_terms=cand.parameter_terms,
|
|
406
|
+
))
|
|
383
407
|
scored.sort(key=lambda item: (-item.score, item.index))
|
|
384
408
|
ranked: list[Candidate] = []
|
|
385
409
|
for rank, cand in enumerate(scored, start=1):
|
|
386
|
-
ranked.append(Candidate(
|
|
410
|
+
ranked.append(Candidate(
|
|
411
|
+
cand.name,
|
|
412
|
+
cand.server,
|
|
413
|
+
cand.description,
|
|
414
|
+
cand.schema,
|
|
415
|
+
cand.index,
|
|
416
|
+
cand.score,
|
|
417
|
+
rank,
|
|
418
|
+
schema_bytes=cand.schema_bytes,
|
|
419
|
+
parameter_terms=cand.parameter_terms,
|
|
420
|
+
))
|
|
387
421
|
return ranked
|
|
388
422
|
|
|
389
423
|
|
|
424
|
+
def candidate_schema_bytes(cand: Candidate) -> int:
|
|
425
|
+
return cand.schema_bytes if cand.schema_bytes > 0 else byte_len_json(cand.schema)
|
|
426
|
+
|
|
427
|
+
|
|
390
428
|
def normalized_link_target(parent: Path, raw_target: str) -> Path:
|
|
391
429
|
target = Path(raw_target)
|
|
392
430
|
if not target.is_absolute():
|
|
@@ -707,7 +745,7 @@ def build_payload(receipt_id: str, ranked: list[Candidate], query: str, redactio
|
|
|
707
745
|
"description": cand.description,
|
|
708
746
|
"score": cand.score,
|
|
709
747
|
"rank": cand.rank,
|
|
710
|
-
"schema_bytes":
|
|
748
|
+
"schema_bytes": candidate_schema_bytes(cand),
|
|
711
749
|
"schema": cand.schema,
|
|
712
750
|
}
|
|
713
751
|
for cand in ranked
|
|
@@ -739,7 +777,7 @@ def retrieval_command(receipt_id: str, *, store_dir: str, tool_name: str | None
|
|
|
739
777
|
|
|
740
778
|
|
|
741
779
|
def selected_tool_record(cand: Candidate, receipt_id: str, budget_left: int, *, store_dir: str) -> tuple[dict[str, Any], int]:
|
|
742
|
-
schema_size =
|
|
780
|
+
schema_size = candidate_schema_bytes(cand)
|
|
743
781
|
record: dict[str, Any] = {
|
|
744
782
|
"name": cand.name,
|
|
745
783
|
"server": cand.server,
|
|
@@ -765,7 +803,7 @@ def deferred_tool_record(cand: Candidate, receipt_id: str, *, store_dir: str) ->
|
|
|
765
803
|
"score": cand.score,
|
|
766
804
|
"rank": cand.rank,
|
|
767
805
|
"description": cand.description,
|
|
768
|
-
"schema_bytes":
|
|
806
|
+
"schema_bytes": candidate_schema_bytes(cand),
|
|
769
807
|
"reason": "deferred_after_core_top",
|
|
770
808
|
"retrieval": retrieval_command(receipt_id, store_dir=store_dir, tool_name=cand.name),
|
|
771
809
|
}
|
|
@@ -1008,9 +1046,9 @@ def defer_report(args: argparse.Namespace) -> str:
|
|
|
1008
1046
|
store_dir=args.store_dir,
|
|
1009
1047
|
namespace_top=namespace_top,
|
|
1010
1048
|
)
|
|
1011
|
-
all_schema_bytes = sum(
|
|
1012
|
-
listed_deferred_schema_bytes = sum(
|
|
1013
|
-
total_deferred_schema_bytes = sum(
|
|
1049
|
+
all_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked)
|
|
1050
|
+
listed_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in deferred_candidates)
|
|
1051
|
+
total_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked[core_top:])
|
|
1014
1052
|
tool_stub_report_bytes = byte_len_json(core_tools) + byte_len_json(deferred_tools)
|
|
1015
1053
|
all_schema_tokens = proxy_tokens(all_schema_bytes)
|
|
1016
1054
|
inline_core_schema_tokens = proxy_tokens(core_schema_bytes)
|
|
@@ -20,6 +20,7 @@ import signal
|
|
|
20
20
|
import stat
|
|
21
21
|
import subprocess
|
|
22
22
|
import sys
|
|
23
|
+
import tempfile
|
|
23
24
|
import threading
|
|
24
25
|
import time
|
|
25
26
|
import types
|
|
@@ -398,23 +399,75 @@ def store_sanitized_artifact_receipt(
|
|
|
398
399
|
return receipt
|
|
399
400
|
|
|
400
401
|
|
|
401
|
-
|
|
402
|
-
*,
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
402
|
+
class SanitizedArtifactCapture:
|
|
403
|
+
def __init__(self, *, enabled: bool, max_bytes: int) -> None:
|
|
404
|
+
self.enabled = enabled
|
|
405
|
+
self.max_bytes = max_bytes
|
|
406
|
+
self.bytes = 0
|
|
407
|
+
self.overflow = False
|
|
408
|
+
self.error: str | None = None
|
|
409
|
+
self._file: BinaryIO | None = None
|
|
410
|
+
|
|
411
|
+
def _ensure_file(self) -> BinaryIO | None:
|
|
412
|
+
if self._file is not None:
|
|
413
|
+
return self._file
|
|
414
|
+
try:
|
|
415
|
+
self._file = tempfile.TemporaryFile("w+b")
|
|
416
|
+
except OSError as exc:
|
|
417
|
+
self._record_error(exc)
|
|
418
|
+
return None
|
|
419
|
+
return self._file
|
|
420
|
+
|
|
421
|
+
def _record_error(self, exc: OSError) -> None:
|
|
422
|
+
if self.error is None:
|
|
423
|
+
self.error = f"{exc.__class__.__name__}: {exc}"
|
|
424
|
+
|
|
425
|
+
def add(self, sanitized_line: str) -> None:
|
|
426
|
+
if not self.enabled or self.overflow or self.error:
|
|
427
|
+
return
|
|
428
|
+
encoded = sanitized_line.encode("utf-8", errors="replace")
|
|
429
|
+
source_bytes = len(encoded)
|
|
430
|
+
if self.bytes + source_bytes > self.max_bytes:
|
|
431
|
+
self.overflow = True
|
|
432
|
+
self.close()
|
|
433
|
+
return
|
|
434
|
+
target = self._ensure_file()
|
|
435
|
+
if target is None:
|
|
436
|
+
return
|
|
437
|
+
try:
|
|
438
|
+
target.write(encoded)
|
|
439
|
+
except OSError as exc:
|
|
440
|
+
self._record_error(exc)
|
|
441
|
+
self.close()
|
|
442
|
+
return
|
|
443
|
+
self.bytes += source_bytes
|
|
444
|
+
|
|
445
|
+
def text(self) -> str:
|
|
446
|
+
if self._file is None:
|
|
447
|
+
return ""
|
|
448
|
+
try:
|
|
449
|
+
self._file.flush()
|
|
450
|
+
self._file.seek(0)
|
|
451
|
+
return self._file.read().decode("utf-8", errors="replace")
|
|
452
|
+
except OSError as exc:
|
|
453
|
+
self._record_error(exc)
|
|
454
|
+
self.close()
|
|
455
|
+
return ""
|
|
456
|
+
|
|
457
|
+
def close(self) -> None:
|
|
458
|
+
target = self._file
|
|
459
|
+
self._file = None
|
|
460
|
+
if target is not None:
|
|
461
|
+
try:
|
|
462
|
+
target.close()
|
|
463
|
+
except OSError as exc:
|
|
464
|
+
self._record_error(exc)
|
|
465
|
+
|
|
466
|
+
def __enter__(self) -> "SanitizedArtifactCapture":
|
|
467
|
+
return self
|
|
468
|
+
|
|
469
|
+
def __exit__(self, *exc: object) -> None:
|
|
470
|
+
self.close()
|
|
418
471
|
|
|
419
472
|
|
|
420
473
|
def unique_keep_order(lines: Iterable[str]) -> list[str]:
|
|
@@ -1512,11 +1565,10 @@ def main() -> int:
|
|
|
1512
1565
|
runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
|
|
1513
1566
|
duplicate_tracker = DuplicateLineTracker()
|
|
1514
1567
|
redacted_lines = 0
|
|
1515
|
-
|
|
1516
|
-
artifact_capture_bytes = 0
|
|
1517
|
-
artifact_capture_overflow = False
|
|
1568
|
+
artifact_capture = SanitizedArtifactCapture(enabled=args.artifact_receipt, max_bytes=args.artifact_max_bytes)
|
|
1518
1569
|
|
|
1519
1570
|
if proc.stdout is None:
|
|
1571
|
+
artifact_capture.close()
|
|
1520
1572
|
print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
|
|
1521
1573
|
return 1
|
|
1522
1574
|
command_stream = TimedCommandStream(
|
|
@@ -1532,14 +1584,7 @@ def main() -> int:
|
|
|
1532
1584
|
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1533
1585
|
if redacted:
|
|
1534
1586
|
redacted_lines += 1
|
|
1535
|
-
|
|
1536
|
-
capture_enabled=args.artifact_receipt,
|
|
1537
|
-
sanitized_line=visible_source,
|
|
1538
|
-
artifact_lines=artifact_lines,
|
|
1539
|
-
capture_bytes=artifact_capture_bytes,
|
|
1540
|
-
capture_overflow=artifact_capture_overflow,
|
|
1541
|
-
max_bytes=args.artifact_max_bytes,
|
|
1542
|
-
)
|
|
1587
|
+
artifact_capture.add(visible_source)
|
|
1543
1588
|
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1544
1589
|
any_line_capped = any_line_capped or line_capped
|
|
1545
1590
|
visible_chars += len(visible_line)
|
|
@@ -1562,14 +1607,7 @@ def main() -> int:
|
|
|
1562
1607
|
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1563
1608
|
if redacted:
|
|
1564
1609
|
redacted_lines += 1
|
|
1565
|
-
|
|
1566
|
-
capture_enabled=args.artifact_receipt,
|
|
1567
|
-
sanitized_line=visible_source,
|
|
1568
|
-
artifact_lines=artifact_lines,
|
|
1569
|
-
capture_bytes=artifact_capture_bytes,
|
|
1570
|
-
capture_overflow=artifact_capture_overflow,
|
|
1571
|
-
max_bytes=args.artifact_max_bytes,
|
|
1572
|
-
)
|
|
1610
|
+
artifact_capture.add(visible_source)
|
|
1573
1611
|
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1574
1612
|
any_line_capped = any_line_capped or line_capped
|
|
1575
1613
|
visible_chars += len(visible_line)
|
|
@@ -1602,32 +1640,49 @@ def main() -> int:
|
|
|
1602
1640
|
duplicate_line_groups=duplicate_tracker.as_list(),
|
|
1603
1641
|
)
|
|
1604
1642
|
if args.artifact_receipt:
|
|
1605
|
-
if
|
|
1643
|
+
if artifact_capture.overflow:
|
|
1606
1644
|
payload["artifact_receipt"] = {
|
|
1607
1645
|
"stored": False,
|
|
1608
1646
|
"error": "sanitized_output_exceeds_artifact_max_bytes",
|
|
1609
1647
|
"max_bytes": args.artifact_max_bytes,
|
|
1610
1648
|
"exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
|
|
1611
1649
|
}
|
|
1650
|
+
elif artifact_capture.error:
|
|
1651
|
+
payload["artifact_receipt"] = {
|
|
1652
|
+
"stored": False,
|
|
1653
|
+
"error": "artifact_receipt_capture_unavailable",
|
|
1654
|
+
"reason": artifact_capture.error,
|
|
1655
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
|
|
1656
|
+
}
|
|
1612
1657
|
else:
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
sanitized_text="".join(artifact_lines),
|
|
1616
|
-
command=command,
|
|
1617
|
-
args=args,
|
|
1618
|
-
line_sanitizer=line_sanitizer,
|
|
1619
|
-
redacted_lines=redacted_lines,
|
|
1620
|
-
)
|
|
1621
|
-
except UnsafeAdjacentModuleError as exc:
|
|
1622
|
-
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1623
|
-
return 2
|
|
1624
|
-
except Exception as exc:
|
|
1658
|
+
sanitized_artifact_text = artifact_capture.text()
|
|
1659
|
+
if artifact_capture.error:
|
|
1625
1660
|
payload["artifact_receipt"] = {
|
|
1626
1661
|
"stored": False,
|
|
1627
|
-
"error": "
|
|
1628
|
-
"reason":
|
|
1629
|
-
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1662
|
+
"error": "artifact_receipt_capture_unavailable",
|
|
1663
|
+
"reason": artifact_capture.error,
|
|
1664
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
|
|
1630
1665
|
}
|
|
1666
|
+
else:
|
|
1667
|
+
try:
|
|
1668
|
+
payload["artifact_receipt"] = store_sanitized_artifact_receipt(
|
|
1669
|
+
sanitized_text=sanitized_artifact_text,
|
|
1670
|
+
command=command,
|
|
1671
|
+
args=args,
|
|
1672
|
+
line_sanitizer=line_sanitizer,
|
|
1673
|
+
redacted_lines=redacted_lines,
|
|
1674
|
+
)
|
|
1675
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1676
|
+
artifact_capture.close()
|
|
1677
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1678
|
+
return 2
|
|
1679
|
+
except Exception as exc:
|
|
1680
|
+
payload["artifact_receipt"] = {
|
|
1681
|
+
"stored": False,
|
|
1682
|
+
"error": "artifact_receipt_unavailable",
|
|
1683
|
+
"reason": f"{exc.__class__.__name__}: {exc}",
|
|
1684
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1685
|
+
}
|
|
1631
1686
|
artifact_receipt = payload.get("artifact_receipt")
|
|
1632
1687
|
if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
|
|
1633
1688
|
next_queries = payload.setdefault("next_queries", [])
|
|
@@ -1642,6 +1697,7 @@ def main() -> int:
|
|
|
1642
1697
|
sys.stdout.write(render_digest_json(payload, args.max_chars))
|
|
1643
1698
|
else:
|
|
1644
1699
|
sys.stdout.write(render_digest_markdown(payload, args.max_chars))
|
|
1700
|
+
artifact_capture.close()
|
|
1645
1701
|
return rc
|
|
1646
1702
|
|
|
1647
1703
|
if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
|
|
@@ -1689,6 +1745,7 @@ def main() -> int:
|
|
|
1689
1745
|
output += "[context-guard-kit] final summary was capped by --max-chars.\n"
|
|
1690
1746
|
sys.stdout.write(output)
|
|
1691
1747
|
|
|
1748
|
+
artifact_capture.close()
|
|
1692
1749
|
return rc
|
|
1693
1750
|
|
|
1694
1751
|
|