@ictechgy/context-guard 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.ko.md +41 -24
- package/README.md +66 -26
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +21 -13
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +99 -18
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -43,6 +43,8 @@ SUGGEST_SCHEMA_VERSION = "contextguard.pack-suggest.v1"
|
|
|
43
43
|
AUTO_SCHEMA_VERSION = "contextguard.pack-auto.v1"
|
|
44
44
|
AUTO_EXPLAIN_SCHEMA_VERSION = "contextguard.pack-auto-explain.v1"
|
|
45
45
|
REPO_MAP_SCHEMA_VERSION = "contextguard.pack-repo-map.v1"
|
|
46
|
+
ADAPTIVE_K_SCHEMA_VERSION = "contextguard.pack-adaptive-k.v1"
|
|
47
|
+
SYMBOL_MEMORY_SCHEMA_VERSION = "contextguard.pack-symbol-memory.v1"
|
|
46
48
|
DEFAULT_SUGGEST_TOP = 8
|
|
47
49
|
MAX_SUGGEST_TOP = 50
|
|
48
50
|
DEFAULT_SUGGEST_CONTEXT_LINES = 20
|
|
@@ -52,14 +54,22 @@ MAX_SUGGEST_INPUT_BYTES = 256_000
|
|
|
52
54
|
MAX_QUERY_SCAN_FILES = 2_000
|
|
53
55
|
MAX_QUERY_SCAN_BYTES_PER_FILE = 200_000
|
|
54
56
|
MAX_REPO_MAP_FILES = 1_000
|
|
57
|
+
MAX_REPO_MAP_SCAN_FILES = 160
|
|
55
58
|
MAX_REPO_MAP_BYTES_PER_FILE = 120_000
|
|
56
59
|
MAX_REPO_MAP_TREE_ENTRIES = 30
|
|
57
60
|
MAX_REPO_MAP_SIGNATURE_ENTRIES = 40
|
|
58
61
|
MAX_REPO_MAP_GRAPH_RANK_ENTRIES = 30
|
|
59
62
|
MAX_REPO_MAP_RETRIEVAL_HINTS = 30
|
|
60
63
|
MAX_REPO_MAP_SECRET_RISK_FILES = 20
|
|
64
|
+
MAX_ADAPTIVE_K_SCORE_SAMPLES = 200
|
|
65
|
+
MAX_SYMBOL_MEMORY_ITEMS = 12
|
|
66
|
+
MAX_SYMBOL_MEMORY_GRAPH_ITEMS = 12
|
|
61
67
|
PACK_DIR = ".context-guard/packs"
|
|
62
68
|
REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
|
|
69
|
+
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
70
|
+
"tmp": Path("/private/tmp"),
|
|
71
|
+
"var": Path("/private/var"),
|
|
72
|
+
}
|
|
63
73
|
CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f-\x9f]")
|
|
64
74
|
SECRET_CONTENT_RE = re.compile(
|
|
65
75
|
r"(?is)("
|
|
@@ -235,6 +245,30 @@ def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
|
|
|
235
245
|
return "".join(out), redacted
|
|
236
246
|
|
|
237
247
|
|
|
248
|
+
def sanitize_source_lines(handle: Any, requested: LineRange | None) -> tuple[list[str], int, int]:
|
|
249
|
+
"""Sanitize a source stream while retaining only the requested line window.
|
|
250
|
+
|
|
251
|
+
Explicit line-window retrieval still scans the complete file so global
|
|
252
|
+
redaction counts and total line counts stay compatible with previous
|
|
253
|
+
outputs, but it no longer materializes a sanitized all-lines list before
|
|
254
|
+
slicing.
|
|
255
|
+
"""
|
|
256
|
+
sanitizer = load_line_sanitizer()
|
|
257
|
+
selected: list[str] = []
|
|
258
|
+
redacted = 0
|
|
259
|
+
total_lines = 0
|
|
260
|
+
collect_all = requested is None
|
|
261
|
+
start = requested.start if requested is not None else 1
|
|
262
|
+
end = requested.end if requested is not None else 0
|
|
263
|
+
for total_lines, raw_line in enumerate(handle, start=1):
|
|
264
|
+
sanitized, did_redact = sanitizer.sanitize(raw_line) # type: ignore[attr-defined]
|
|
265
|
+
if did_redact:
|
|
266
|
+
redacted += 1
|
|
267
|
+
if collect_all or start <= total_lines <= end:
|
|
268
|
+
selected.append(sanitized)
|
|
269
|
+
return selected, total_lines, redacted
|
|
270
|
+
|
|
271
|
+
|
|
238
272
|
def byte_len(text: str) -> int:
|
|
239
273
|
return len(text.encode("utf-8", errors="replace"))
|
|
240
274
|
|
|
@@ -342,13 +376,150 @@ def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_
|
|
|
342
376
|
return text
|
|
343
377
|
|
|
344
378
|
|
|
345
|
-
def
|
|
379
|
+
def normalized_link_target(anchor: Path, raw_target: str) -> Path:
|
|
380
|
+
target = Path(raw_target)
|
|
381
|
+
if not target.is_absolute():
|
|
382
|
+
target = anchor / target
|
|
383
|
+
return Path(os.path.normpath(str(target)))
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
|
|
387
|
+
"""Normalize common macOS absolute path aliases before no-follow traversal."""
|
|
388
|
+
|
|
389
|
+
if not path.is_absolute() or len(path.parts) < 2:
|
|
390
|
+
return path
|
|
391
|
+
first = path.parts[1]
|
|
392
|
+
expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(first)
|
|
393
|
+
if expected is None:
|
|
394
|
+
return path
|
|
395
|
+
link = Path(path.anchor) / first
|
|
396
|
+
try:
|
|
397
|
+
if not stat.S_ISLNK(os.lstat(link).st_mode):
|
|
398
|
+
return path
|
|
399
|
+
if normalized_link_target(Path(path.anchor), os.readlink(link)) != expected:
|
|
400
|
+
return path
|
|
401
|
+
except OSError:
|
|
402
|
+
return path
|
|
403
|
+
return expected.joinpath(*path.parts[2:])
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def manifest_safe_read_supported() -> bool:
|
|
407
|
+
return hasattr(os, "O_NOFOLLOW") and os.open in getattr(os, "supports_dir_fd", set())
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def manifest_directory_open_flags(*, follow_final: bool = False) -> int:
|
|
411
|
+
flags = os.O_RDONLY
|
|
412
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
413
|
+
flags |= os.O_DIRECTORY
|
|
414
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
415
|
+
flags |= os.O_CLOEXEC
|
|
416
|
+
if not follow_final:
|
|
417
|
+
flags |= os.O_NOFOLLOW
|
|
418
|
+
return flags
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def manifest_file_open_flags() -> int:
|
|
422
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
423
|
+
for name in ("O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
|
|
424
|
+
flags |= getattr(os, name, 0)
|
|
425
|
+
return flags
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def manifest_leaf_name(path: Path) -> str:
|
|
429
|
+
name = path.name
|
|
430
|
+
if name in {"", ".", ".."}:
|
|
431
|
+
raise PackError("manifest path must name a regular file")
|
|
432
|
+
return name
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def open_manifest_parent_no_follow(path: Path) -> int:
|
|
436
|
+
if not manifest_safe_read_supported():
|
|
437
|
+
raise PackError("safe manifest reads require O_NOFOLLOW and dir_fd support")
|
|
438
|
+
path = path.expanduser()
|
|
439
|
+
if any(part == ".." for part in path.parts):
|
|
440
|
+
raise PackError("manifest path must not contain parent traversal")
|
|
441
|
+
if path.is_absolute():
|
|
442
|
+
path = normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(path))))
|
|
443
|
+
current_fd = os.open(path.anchor or os.sep, manifest_directory_open_flags(follow_final=True))
|
|
444
|
+
parts = path.parts[1:-1]
|
|
445
|
+
else:
|
|
446
|
+
path = Path(os.path.normpath(str(path)))
|
|
447
|
+
current_fd = os.open(".", manifest_directory_open_flags())
|
|
448
|
+
parts = path.parts[:-1]
|
|
449
|
+
try:
|
|
450
|
+
for part in parts:
|
|
451
|
+
if part in {"", "."}:
|
|
452
|
+
continue
|
|
453
|
+
if part == "..":
|
|
454
|
+
raise PackError("manifest path must not contain parent traversal")
|
|
455
|
+
next_fd = -1
|
|
456
|
+
try:
|
|
457
|
+
next_fd = os.open(part, manifest_directory_open_flags(), dir_fd=current_fd)
|
|
458
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
459
|
+
raise PackError("manifest path must not traverse non-directory components")
|
|
460
|
+
except (OSError, PackError):
|
|
461
|
+
if next_fd >= 0:
|
|
462
|
+
try:
|
|
463
|
+
os.close(next_fd)
|
|
464
|
+
except OSError:
|
|
465
|
+
pass
|
|
466
|
+
raise
|
|
467
|
+
os.close(current_fd)
|
|
468
|
+
current_fd = next_fd
|
|
469
|
+
owned_fd = current_fd
|
|
470
|
+
current_fd = -1
|
|
471
|
+
return owned_fd
|
|
472
|
+
finally:
|
|
473
|
+
if current_fd >= 0:
|
|
474
|
+
try:
|
|
475
|
+
os.close(current_fd)
|
|
476
|
+
except OSError:
|
|
477
|
+
pass
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def read_manifest_bytes_no_follow(path: Path) -> bytes:
|
|
481
|
+
parent_fd = -1
|
|
482
|
+
fd = -1
|
|
346
483
|
try:
|
|
347
|
-
|
|
484
|
+
leaf = manifest_leaf_name(path.expanduser())
|
|
485
|
+
parent_fd = open_manifest_parent_no_follow(path)
|
|
486
|
+
fd = os.open(leaf, manifest_file_open_flags(), dir_fd=parent_fd)
|
|
487
|
+
st = os.fstat(fd)
|
|
488
|
+
if not stat.S_ISREG(st.st_mode):
|
|
489
|
+
raise PackError("manifest must be a regular file")
|
|
490
|
+
if st.st_size > MAX_MANIFEST_BYTES:
|
|
491
|
+
raise PackError(f"manifest exceeds trusted size cap: {st.st_size} > {MAX_MANIFEST_BYTES}")
|
|
492
|
+
chunks: list[bytes] = []
|
|
493
|
+
remaining = MAX_MANIFEST_BYTES + 1
|
|
494
|
+
while remaining > 0:
|
|
495
|
+
chunk = os.read(fd, min(64 * 1024, remaining))
|
|
496
|
+
if not chunk:
|
|
497
|
+
break
|
|
498
|
+
chunks.append(chunk)
|
|
499
|
+
remaining -= len(chunk)
|
|
500
|
+
raw = b"".join(chunks)
|
|
501
|
+
if len(raw) > MAX_MANIFEST_BYTES:
|
|
502
|
+
raise PackError(f"manifest exceeds trusted size cap: {len(raw)} > {MAX_MANIFEST_BYTES}")
|
|
503
|
+
return raw
|
|
504
|
+
except PackError:
|
|
505
|
+
raise
|
|
348
506
|
except OSError as exc:
|
|
349
507
|
raise PackError(f"could not read manifest: {exc.strerror or exc.__class__.__name__}") from exc
|
|
350
|
-
|
|
351
|
-
|
|
508
|
+
finally:
|
|
509
|
+
if fd >= 0:
|
|
510
|
+
try:
|
|
511
|
+
os.close(fd)
|
|
512
|
+
except OSError:
|
|
513
|
+
pass
|
|
514
|
+
if parent_fd >= 0:
|
|
515
|
+
try:
|
|
516
|
+
os.close(parent_fd)
|
|
517
|
+
except OSError:
|
|
518
|
+
pass
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def read_manifest(path: Path) -> list[SourceSpec]:
|
|
522
|
+
raw = read_manifest_bytes_no_follow(path)
|
|
352
523
|
try:
|
|
353
524
|
data = json.loads(raw.decode("utf-8"))
|
|
354
525
|
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
|
@@ -582,19 +753,15 @@ def resolve_source(root: Path, spec: SourceSpec) -> tuple[ResolvedSource | None,
|
|
|
582
753
|
return None, omission(spec, reason, path=display, redacted_path=redacted_path)
|
|
583
754
|
try:
|
|
584
755
|
with handle:
|
|
585
|
-
|
|
756
|
+
requested = spec.lines
|
|
757
|
+
selected, total_lines, redacted_lines = sanitize_source_lines(handle, requested)
|
|
586
758
|
except OSError:
|
|
587
759
|
return None, omission(spec, "unsafe_path", path=display, redacted_path=redacted_path)
|
|
588
|
-
|
|
589
|
-
all_lines = sanitized.splitlines(True)
|
|
590
|
-
if not all_lines:
|
|
760
|
+
if total_lines <= 0:
|
|
591
761
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
592
|
-
|
|
593
|
-
requested = spec.lines or LineRange(1, total_lines)
|
|
762
|
+
requested = requested or LineRange(1, total_lines)
|
|
594
763
|
if requested.start > total_lines:
|
|
595
764
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
596
|
-
end = min(requested.end, total_lines)
|
|
597
|
-
selected = all_lines[requested.start - 1:end]
|
|
598
765
|
if not selected:
|
|
599
766
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
600
767
|
return ResolvedSource(
|
|
@@ -645,7 +812,11 @@ def retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacte
|
|
|
645
812
|
return retrieval_cli(safe_root, display_path, lines), None
|
|
646
813
|
|
|
647
814
|
|
|
648
|
-
|
|
815
|
+
BLOCK_OPEN = "\n\n```text\n"
|
|
816
|
+
BLOCK_CLOSE = "```\n\n"
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def render_block_header(source: ResolvedSource, *, root_arg: str, status: str, included: LineRange) -> str:
|
|
649
820
|
title = source.spec.label or source.display_path
|
|
650
821
|
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
651
822
|
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
@@ -661,7 +832,11 @@ def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, sta
|
|
|
661
832
|
header.append(f"Retrieval: `{retrieval}`")
|
|
662
833
|
elif retrieval_omitted_reason:
|
|
663
834
|
header.append(f"Retrieval omitted: {retrieval_omitted_reason}")
|
|
664
|
-
return "\n".join(header)
|
|
835
|
+
return "\n".join(header)
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, status: str, included: LineRange) -> str:
|
|
839
|
+
return render_block_header(source, root_arg=root_arg, status=status, included=included) + BLOCK_OPEN + "".join(lines) + ("" if not lines or lines[-1].endswith("\n") else "\n") + BLOCK_CLOSE
|
|
665
840
|
|
|
666
841
|
|
|
667
842
|
def source_metadata(source: ResolvedSource, *, status: str, lines: list[str], included: LineRange, root_arg: str) -> dict[str, Any]:
|
|
@@ -701,21 +876,63 @@ def budget_omission(source: ResolvedSource, *, root_arg: str) -> dict[str, Any]:
|
|
|
701
876
|
return item
|
|
702
877
|
|
|
703
878
|
|
|
704
|
-
def
|
|
879
|
+
def included_range_for_line_count(source: ResolvedSource, line_count: int) -> LineRange:
|
|
880
|
+
start = source.requested_lines.start if source.requested_lines else 1
|
|
881
|
+
return LineRange(start, start + line_count - 1)
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def line_byte_prefixes(lines: list[str]) -> list[int]:
|
|
885
|
+
prefixes = [0]
|
|
886
|
+
total = 0
|
|
887
|
+
for line in lines:
|
|
888
|
+
total += byte_len(line)
|
|
889
|
+
prefixes.append(total)
|
|
890
|
+
return prefixes
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def render_block_byte_len(
|
|
894
|
+
source: ResolvedSource,
|
|
895
|
+
line_count: int,
|
|
896
|
+
line_prefixes: list[int],
|
|
897
|
+
*,
|
|
898
|
+
root_arg: str,
|
|
899
|
+
status: str,
|
|
900
|
+
included: LineRange,
|
|
901
|
+
) -> int:
|
|
902
|
+
body_bytes = line_prefixes[line_count]
|
|
903
|
+
if line_count > 0 and not source.selected_lines[line_count - 1].endswith("\n"):
|
|
904
|
+
body_bytes += 1
|
|
905
|
+
return byte_len(render_block_header(source, root_arg=root_arg, status=status, included=included)) + byte_len(BLOCK_OPEN) + body_bytes + byte_len(BLOCK_CLOSE)
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def fit_partial_lines(
|
|
909
|
+
source: ResolvedSource,
|
|
910
|
+
remaining: int,
|
|
911
|
+
*,
|
|
912
|
+
root_arg: str,
|
|
913
|
+
line_prefixes: list[int] | None = None,
|
|
914
|
+
) -> tuple[list[str], str | None, LineRange | None]:
|
|
705
915
|
if remaining <= 0:
|
|
706
916
|
return [], None, None
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
917
|
+
if not source.selected_lines:
|
|
918
|
+
return [], None, None
|
|
919
|
+
prefixes = line_prefixes if line_prefixes is not None else line_byte_prefixes(source.selected_lines)
|
|
920
|
+
best = 0
|
|
921
|
+
low = 1
|
|
922
|
+
high = len(source.selected_lines)
|
|
923
|
+
while low <= high:
|
|
924
|
+
mid = (low + high) // 2
|
|
925
|
+
included = included_range_for_line_count(source, mid)
|
|
926
|
+
block_bytes = render_block_byte_len(source, mid, prefixes, root_arg=root_arg, status="partial", included=included)
|
|
927
|
+
if block_bytes <= remaining:
|
|
928
|
+
best = mid
|
|
929
|
+
low = mid + 1
|
|
714
930
|
else:
|
|
715
|
-
|
|
716
|
-
if
|
|
931
|
+
high = mid - 1
|
|
932
|
+
if best <= 0:
|
|
717
933
|
return [], None, None
|
|
718
|
-
|
|
934
|
+
picked = source.selected_lines[:best]
|
|
935
|
+
included = included_range_for_line_count(source, best)
|
|
719
936
|
return picked, render_block(source, picked, root_arg=root_arg, status="partial", included=included), included
|
|
720
937
|
|
|
721
938
|
|
|
@@ -988,17 +1205,17 @@ def build_pack(root: Path, specs: list[SourceSpec], *, budget_bytes: int, root_a
|
|
|
988
1205
|
parts.append(header)
|
|
989
1206
|
current_pack_bytes += header_bytes
|
|
990
1207
|
for source in resolved:
|
|
991
|
-
|
|
992
|
-
included_range =
|
|
993
|
-
|
|
994
|
-
full_block_bytes = byte_len(full_block)
|
|
1208
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
1209
|
+
included_range = included_range_for_line_count(source, len(source.selected_lines))
|
|
1210
|
+
full_block_bytes = render_block_byte_len(source, len(source.selected_lines), line_prefixes, root_arg=root_arg, status="included", included=included_range)
|
|
995
1211
|
remaining = budget_bytes - current_pack_bytes
|
|
996
1212
|
if full_block_bytes <= remaining:
|
|
1213
|
+
full_block = render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included_range)
|
|
997
1214
|
parts.append(full_block)
|
|
998
1215
|
current_pack_bytes += full_block_bytes
|
|
999
1216
|
included.append(source_metadata(source, status="included", lines=source.selected_lines, included=included_range, root_arg=root_arg))
|
|
1000
1217
|
continue
|
|
1001
|
-
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
1218
|
+
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg, line_prefixes=line_prefixes)
|
|
1002
1219
|
if partial_block is not None and partial_range is not None:
|
|
1003
1220
|
parts.append(partial_block)
|
|
1004
1221
|
current_pack_bytes += byte_len(partial_block)
|
|
@@ -1358,7 +1575,8 @@ def source_selected_range(source: ResolvedSource) -> LineRange:
|
|
|
1358
1575
|
|
|
1359
1576
|
def resolved_block_bytes(source: ResolvedSource, *, root_arg: str) -> int:
|
|
1360
1577
|
included = source_selected_range(source)
|
|
1361
|
-
|
|
1578
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
1579
|
+
return render_block_byte_len(source, len(source.selected_lines), line_prefixes, root_arg=root_arg, status="included", included=included)
|
|
1362
1580
|
|
|
1363
1581
|
|
|
1364
1582
|
def manifest_source_for_candidate(source: ResolvedSource, *, priority: int, label: str | None) -> dict[str, Any]:
|
|
@@ -1638,6 +1856,136 @@ def suggest_build_hint(root_arg: str, manifest_path: str | None, budget: int) ->
|
|
|
1638
1856
|
return f"cd {shlex.quote(safe_root)} && {command}", None
|
|
1639
1857
|
|
|
1640
1858
|
|
|
1859
|
+
def percentile_int(values: list[int], numerator: int, denominator: int) -> int:
|
|
1860
|
+
if not values:
|
|
1861
|
+
return 0
|
|
1862
|
+
if denominator <= 0:
|
|
1863
|
+
return values[0]
|
|
1864
|
+
index = min(len(values) - 1, max(0, (len(values) - 1) * numerator // denominator))
|
|
1865
|
+
return values[index]
|
|
1866
|
+
|
|
1867
|
+
|
|
1868
|
+
def score_gap_advice(scores: list[int], requested_top: int) -> tuple[int, dict[str, Any], list[str]]:
|
|
1869
|
+
if not scores:
|
|
1870
|
+
return 0, {"after_rank": 0, "delta": 0, "ratio": 0.0}, ["no_candidates"]
|
|
1871
|
+
if len(scores) == 1:
|
|
1872
|
+
return 1, {"after_rank": 1, "delta": 0, "ratio": 0.0}, ["single_candidate"]
|
|
1873
|
+
gaps = [max(0, scores[index] - scores[index + 1]) for index in range(len(scores) - 1)]
|
|
1874
|
+
max_gap = max(gaps)
|
|
1875
|
+
gap_index = gaps.index(max_gap)
|
|
1876
|
+
top_score = max(1, scores[0])
|
|
1877
|
+
ratio = round(max_gap / top_score, 4)
|
|
1878
|
+
if max_gap >= max(250, top_score // 5):
|
|
1879
|
+
elbow_k = gap_index + 1
|
|
1880
|
+
reasons = ["score_elbow"] if elbow_k <= requested_top else ["score_elbow_after_requested_top"]
|
|
1881
|
+
else:
|
|
1882
|
+
elbow_k = min(MAX_SUGGEST_TOP, len(scores))
|
|
1883
|
+
reasons = ["no_strong_score_elbow"]
|
|
1884
|
+
return max(1, elbow_k), {"after_rank": gap_index + 1, "delta": max_gap, "ratio": ratio}, reasons
|
|
1885
|
+
|
|
1886
|
+
|
|
1887
|
+
def build_adaptive_k_advisory(
|
|
1888
|
+
*,
|
|
1889
|
+
candidates: list[SuggestCandidate],
|
|
1890
|
+
selected: list[dict[str, Any]],
|
|
1891
|
+
omitted: list[dict[str, Any]],
|
|
1892
|
+
requested_top: int,
|
|
1893
|
+
budget_bytes: int,
|
|
1894
|
+
estimated_pack_bytes: int,
|
|
1895
|
+
) -> dict[str, Any]:
|
|
1896
|
+
sampled_candidates = candidates[:MAX_ADAPTIVE_K_SCORE_SAMPLES]
|
|
1897
|
+
scores = [max(0, int(candidate.score)) for candidate in sampled_candidates]
|
|
1898
|
+
score_elbow_k, max_gap_details, reason_codes = score_gap_advice(scores, requested_top)
|
|
1899
|
+
selected_count = len(selected)
|
|
1900
|
+
selected_scores = [max(0, int(item.get("score", item.get("priority", 0)) or 0)) for item in selected]
|
|
1901
|
+
selected_score_mass = sum(selected_scores)
|
|
1902
|
+
analyzed_score_mass = sum(scores)
|
|
1903
|
+
budget_omitted_count = sum(1 for item in omitted if item.get("reason") == "budget_exhausted")
|
|
1904
|
+
budget_limited = bool(budget_omitted_count or estimated_pack_bytes > budget_bytes)
|
|
1905
|
+
remaining_bytes = budget_bytes - estimated_pack_bytes
|
|
1906
|
+
average_selected_bytes = int(estimated_pack_bytes / selected_count) if selected_count else 0
|
|
1907
|
+
if budget_limited:
|
|
1908
|
+
reason_codes.append("budget_limited")
|
|
1909
|
+
if len(candidates) > len(sampled_candidates):
|
|
1910
|
+
reason_codes.append("candidate_sample_capped")
|
|
1911
|
+
if selected_count < min(requested_top, len(candidates)):
|
|
1912
|
+
reason_codes.append("selected_below_requested_top")
|
|
1913
|
+
if selected_count == 0:
|
|
1914
|
+
budget_fit_k = 0
|
|
1915
|
+
if candidates:
|
|
1916
|
+
reason_codes.append("no_budget_fit" if budget_limited else "no_selected_sources")
|
|
1917
|
+
elif budget_limited:
|
|
1918
|
+
budget_fit_k = selected_count
|
|
1919
|
+
else:
|
|
1920
|
+
additional_by_budget = max(0, remaining_bytes // max(1, average_selected_bytes))
|
|
1921
|
+
budget_fit_k = min(MAX_SUGGEST_TOP, len(candidates), selected_count + additional_by_budget)
|
|
1922
|
+
if budget_fit_k > requested_top:
|
|
1923
|
+
reason_codes.append("budget_headroom_expand")
|
|
1924
|
+
if not candidates:
|
|
1925
|
+
recommended_k = 0
|
|
1926
|
+
else:
|
|
1927
|
+
recommended_k = min(
|
|
1928
|
+
max(0, score_elbow_k),
|
|
1929
|
+
max(0, budget_fit_k),
|
|
1930
|
+
len(candidates),
|
|
1931
|
+
MAX_SUGGEST_TOP,
|
|
1932
|
+
)
|
|
1933
|
+
score_values_asc = sorted(scores)
|
|
1934
|
+
top_score = score_values_asc[-1] if score_values_asc else 0
|
|
1935
|
+
return {
|
|
1936
|
+
"schema_version": ADAPTIVE_K_SCHEMA_VERSION,
|
|
1937
|
+
"mode": "advisory",
|
|
1938
|
+
"requested_top": requested_top,
|
|
1939
|
+
"recommended_k": recommended_k,
|
|
1940
|
+
"recommendation": {
|
|
1941
|
+
"apply": False,
|
|
1942
|
+
"reason_codes": sorted(set(reason_codes)),
|
|
1943
|
+
"next_step": "rerun with --top recommended_k if you accept this local proxy advisory",
|
|
1944
|
+
},
|
|
1945
|
+
"score_distribution": {
|
|
1946
|
+
"candidate_count": len(candidates),
|
|
1947
|
+
"analyzed_candidate_count": len(sampled_candidates),
|
|
1948
|
+
"sample_capped": len(candidates) > len(sampled_candidates),
|
|
1949
|
+
"top_score": top_score,
|
|
1950
|
+
"p50_score": percentile_int(score_values_asc, 1, 2),
|
|
1951
|
+
"p90_score": percentile_int(score_values_asc, 9, 10),
|
|
1952
|
+
"min_score": score_values_asc[0] if score_values_asc else 0,
|
|
1953
|
+
"max_gap_details": max_gap_details,
|
|
1954
|
+
"score_elbow_k": score_elbow_k,
|
|
1955
|
+
},
|
|
1956
|
+
"budget_fit": {
|
|
1957
|
+
"budget_bytes": budget_bytes,
|
|
1958
|
+
"estimated_pack_bytes": estimated_pack_bytes,
|
|
1959
|
+
"remaining_bytes": remaining_bytes,
|
|
1960
|
+
"selected_count": selected_count,
|
|
1961
|
+
"budget_omitted_count": budget_omitted_count,
|
|
1962
|
+
"budget_limited": budget_limited,
|
|
1963
|
+
"average_selected_bytes": average_selected_bytes,
|
|
1964
|
+
"budget_fit_k": budget_fit_k,
|
|
1965
|
+
},
|
|
1966
|
+
"recall_precision_proxy": {
|
|
1967
|
+
"measurement": "local_score_mass_proxy",
|
|
1968
|
+
"selected_score_mass": selected_score_mass,
|
|
1969
|
+
"analyzed_score_mass": analyzed_score_mass,
|
|
1970
|
+
"recall_proxy": round(selected_score_mass / analyzed_score_mass, 4) if analyzed_score_mass else 0.0,
|
|
1971
|
+
"precision_proxy": (
|
|
1972
|
+
round((selected_score_mass / max(1, selected_count)) / max(1, top_score), 4)
|
|
1973
|
+
if selected_count
|
|
1974
|
+
else 0.0
|
|
1975
|
+
),
|
|
1976
|
+
"selected_count": selected_count,
|
|
1977
|
+
"candidate_count": len(candidates),
|
|
1978
|
+
},
|
|
1979
|
+
"claim_boundary": {
|
|
1980
|
+
"deterministic_local_only": True,
|
|
1981
|
+
"no_model_network_or_embedding": True,
|
|
1982
|
+
"token_counts_are_estimated_proxies": True,
|
|
1983
|
+
"provider_token_or_cost_savings_claim_allowed": False,
|
|
1984
|
+
"advisory_does_not_change_manifest_or_pack": True,
|
|
1985
|
+
},
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
|
|
1641
1989
|
def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[dict[str, Any], int]:
|
|
1642
1990
|
query_text, _query_redactions = sanitize_text(args.query or "")
|
|
1643
1991
|
query = " ".join(query_text.split())
|
|
@@ -1713,11 +2061,19 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1713
2061
|
})
|
|
1714
2062
|
continue
|
|
1715
2063
|
final_seen.add(final_identity)
|
|
1716
|
-
|
|
2064
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
2065
|
+
source_bytes = render_block_byte_len(
|
|
2066
|
+
source,
|
|
2067
|
+
len(source.selected_lines),
|
|
2068
|
+
line_prefixes,
|
|
2069
|
+
root_arg=root_arg,
|
|
2070
|
+
status="included",
|
|
2071
|
+
included=source_selected_range(source),
|
|
2072
|
+
)
|
|
1717
2073
|
remaining = budget - current_bytes
|
|
1718
2074
|
if source_bytes > remaining:
|
|
1719
2075
|
if not selected and remaining > 0:
|
|
1720
|
-
partial_lines, _partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
2076
|
+
partial_lines, _partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg, line_prefixes=line_prefixes)
|
|
1721
2077
|
if partial_range is not None and partial_lines:
|
|
1722
2078
|
partial_spec = SourceSpec(
|
|
1723
2079
|
path=candidate.path,
|
|
@@ -1734,7 +2090,15 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1734
2090
|
omitted.append(omitted_item)
|
|
1735
2091
|
continue
|
|
1736
2092
|
assert source is not None
|
|
1737
|
-
|
|
2093
|
+
partial_prefixes = line_byte_prefixes(source.selected_lines)
|
|
2094
|
+
source_bytes = render_block_byte_len(
|
|
2095
|
+
source,
|
|
2096
|
+
len(source.selected_lines),
|
|
2097
|
+
partial_prefixes,
|
|
2098
|
+
root_arg=root_arg,
|
|
2099
|
+
status="included",
|
|
2100
|
+
included=source_selected_range(source),
|
|
2101
|
+
)
|
|
1738
2102
|
else:
|
|
1739
2103
|
omitted.append({"path": source.display_path, "status": "omitted", "reason": "budget_exhausted", "priority": candidate.score})
|
|
1740
2104
|
continue
|
|
@@ -1780,6 +2144,15 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1780
2144
|
}
|
|
1781
2145
|
if build_hint_omitted_reason:
|
|
1782
2146
|
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
2147
|
+
if getattr(args, "adaptive_k", False):
|
|
2148
|
+
payload["adaptive_k"] = build_adaptive_k_advisory(
|
|
2149
|
+
candidates=candidates,
|
|
2150
|
+
selected=selected,
|
|
2151
|
+
omitted=omitted,
|
|
2152
|
+
requested_top=top,
|
|
2153
|
+
budget_bytes=budget,
|
|
2154
|
+
estimated_pack_bytes=estimated_pack_bytes,
|
|
2155
|
+
)
|
|
1783
2156
|
return payload, 0
|
|
1784
2157
|
|
|
1785
2158
|
|
|
@@ -1893,20 +2266,53 @@ def read_repo_map_text(root: Path, rel_path: str) -> tuple[dict[str, Any] | None
|
|
|
1893
2266
|
}, None
|
|
1894
2267
|
|
|
1895
2268
|
|
|
1896
|
-
def
|
|
2269
|
+
def repo_map_path_scan_priority(rel_path: str, *, seed_paths: set[str], query_terms: set[str], input_index: int) -> tuple[int, int, str]:
|
|
2270
|
+
rel, reason = lexical_rel(rel_path)
|
|
2271
|
+
display = repo_map_safe_raw_path_label(rel_path)
|
|
2272
|
+
redacted = False
|
|
2273
|
+
if rel is not None and not reason:
|
|
2274
|
+
display, redacted = repo_map_display_rel_path(rel.as_posix())
|
|
2275
|
+
score = 0
|
|
2276
|
+
if not redacted and display in seed_paths:
|
|
2277
|
+
score += 1_000_000
|
|
2278
|
+
if is_repo_map_text_path(display):
|
|
2279
|
+
score += 10_000
|
|
2280
|
+
score += suggest_score_path(display, query_terms)
|
|
2281
|
+
if Path(display).name.lower() in {"readme", "readme.md", "readme.mdx"}:
|
|
2282
|
+
score += 250
|
|
2283
|
+
return (-score, input_index, display)
|
|
2284
|
+
|
|
2285
|
+
|
|
2286
|
+
def repo_map_scan_paths(paths: list[str], *, seed_paths: set[str], query_terms: set[str]) -> list[str]:
|
|
2287
|
+
ranked = sorted(
|
|
2288
|
+
enumerate(paths[:MAX_REPO_MAP_FILES]),
|
|
2289
|
+
key=lambda item: repo_map_path_scan_priority(item[1], seed_paths=seed_paths, query_terms=query_terms, input_index=item[0]),
|
|
2290
|
+
)
|
|
2291
|
+
return [path for _index, path in ranked[:MAX_REPO_MAP_SCAN_FILES]]
|
|
2292
|
+
|
|
2293
|
+
|
|
2294
|
+
def repo_map_records(root: Path, *, seed_paths: set[str], query_terms: set[str]) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
|
|
1897
2295
|
paths = git_ls_files(root)
|
|
2296
|
+
candidate_paths = paths[:MAX_REPO_MAP_FILES]
|
|
1898
2297
|
path_cap_reached = len(paths) > MAX_REPO_MAP_FILES
|
|
2298
|
+
scan_paths = repo_map_scan_paths(candidate_paths, seed_paths=seed_paths, query_terms=query_terms)
|
|
2299
|
+
scan_cap_reached = len(candidate_paths) > len(scan_paths)
|
|
1899
2300
|
records: list[dict[str, Any]] = []
|
|
1900
2301
|
omitted: list[dict[str, Any]] = []
|
|
1901
|
-
for rel_path in
|
|
2302
|
+
for rel_path in scan_paths:
|
|
1902
2303
|
record, omission_item = read_repo_map_text(root, rel_path)
|
|
1903
2304
|
if record is not None:
|
|
1904
2305
|
records.append(record)
|
|
1905
2306
|
elif omission_item is not None and omission_item.get("reason") != "unsupported_file_type":
|
|
1906
2307
|
omitted.append({key: value for key, value in omission_item.items() if value is not None})
|
|
1907
2308
|
caps = {
|
|
1908
|
-
"max_files":
|
|
1909
|
-
"files_capped": path_cap_reached,
|
|
2309
|
+
"max_files": MAX_REPO_MAP_SCAN_FILES,
|
|
2310
|
+
"files_capped": path_cap_reached or scan_cap_reached,
|
|
2311
|
+
"max_candidate_files": MAX_REPO_MAP_FILES,
|
|
2312
|
+
"candidate_files": len(candidate_paths),
|
|
2313
|
+
"candidate_files_capped": path_cap_reached,
|
|
2314
|
+
"scan_files": len(scan_paths),
|
|
2315
|
+
"scan_files_capped": scan_cap_reached,
|
|
1910
2316
|
"max_bytes_per_file": MAX_REPO_MAP_BYTES_PER_FILE,
|
|
1911
2317
|
"bytes_per_file_capped_count": sum(1 for item in records if item.get("bytes_capped")),
|
|
1912
2318
|
"max_tree_entries": MAX_REPO_MAP_TREE_ENTRIES,
|
|
@@ -2256,18 +2662,19 @@ def build_repo_map_payload(
|
|
|
2256
2662
|
*,
|
|
2257
2663
|
root_arg: str,
|
|
2258
2664
|
) -> dict[str, Any]:
|
|
2259
|
-
|
|
2665
|
+
query_terms = suggest_tokens(str(suggest_payload.get("query", "")))
|
|
2666
|
+
seed_paths = repo_map_seed_paths(args, suggest_payload, build_payload)
|
|
2667
|
+
records, omitted, caps = repo_map_records(root, seed_paths=seed_paths, query_terms=query_terms)
|
|
2260
2668
|
record_by_path = {str(record["path"]): record for record in records}
|
|
2261
2669
|
signatures = extract_signatures(records)
|
|
2262
2670
|
secret_scan = build_secret_scan(records)
|
|
2263
2671
|
edges = collect_import_edges(records)
|
|
2264
|
-
query_terms = suggest_tokens(str(suggest_payload.get("query", "")))
|
|
2265
2672
|
graph_rank = build_graph_rank(
|
|
2266
2673
|
records,
|
|
2267
2674
|
signatures,
|
|
2268
2675
|
edges,
|
|
2269
2676
|
query_terms=query_terms,
|
|
2270
|
-
seed_paths=
|
|
2677
|
+
seed_paths=seed_paths,
|
|
2271
2678
|
secret_scan=secret_scan,
|
|
2272
2679
|
)
|
|
2273
2680
|
retrieval = repo_map_retrieval(record_by_path, signatures, graph_rank, root_arg=root_arg)
|
|
@@ -2312,6 +2719,90 @@ def build_repo_map_payload(
|
|
|
2312
2719
|
}
|
|
2313
2720
|
|
|
2314
2721
|
|
|
2722
|
+
def line_identity_from_dict(value: object) -> str:
|
|
2723
|
+
if not isinstance(value, dict):
|
|
2724
|
+
return "all"
|
|
2725
|
+
return f"{value.get('start')}:{value.get('end')}"
|
|
2726
|
+
|
|
2727
|
+
|
|
2728
|
+
def build_symbol_memory_payload(repo_map: dict[str, Any]) -> dict[str, Any]:
|
|
2729
|
+
retrieval_by_path_lines: dict[tuple[str, str], dict[str, Any]] = {}
|
|
2730
|
+
for item in repo_map.get("retrieval", []):
|
|
2731
|
+
if not isinstance(item, dict):
|
|
2732
|
+
continue
|
|
2733
|
+
path = str(item.get("path", ""))
|
|
2734
|
+
retrieval_by_path_lines[(path, line_identity_from_dict(item.get("lines")))] = item
|
|
2735
|
+
|
|
2736
|
+
symbols: list[dict[str, Any]] = []
|
|
2737
|
+
for signature in repo_map.get("signature_index", []):
|
|
2738
|
+
if not isinstance(signature, dict):
|
|
2739
|
+
continue
|
|
2740
|
+
path = str(signature.get("path", ""))
|
|
2741
|
+
lines = copy.deepcopy(signature.get("lines"))
|
|
2742
|
+
retrieval = retrieval_by_path_lines.get((path, line_identity_from_dict(lines)))
|
|
2743
|
+
symbol: dict[str, Any] = {
|
|
2744
|
+
"path": path,
|
|
2745
|
+
"kind": signature.get("kind"),
|
|
2746
|
+
"name": signature.get("name"),
|
|
2747
|
+
"signature": signature.get("signature"),
|
|
2748
|
+
"line": signature.get("line"),
|
|
2749
|
+
"lines": lines,
|
|
2750
|
+
"source": "repo_map.signature_index",
|
|
2751
|
+
"exact_source_verification_required": True,
|
|
2752
|
+
}
|
|
2753
|
+
if isinstance(retrieval, dict):
|
|
2754
|
+
for key in ("slice_cli", "symbol_cli", "retrieval_omitted_reason"):
|
|
2755
|
+
if retrieval.get(key):
|
|
2756
|
+
symbol[key] = retrieval[key]
|
|
2757
|
+
symbols.append({key: value for key, value in symbol.items() if value is not None})
|
|
2758
|
+
if len(symbols) >= MAX_SYMBOL_MEMORY_ITEMS:
|
|
2759
|
+
break
|
|
2760
|
+
|
|
2761
|
+
graph_context: list[dict[str, Any]] = []
|
|
2762
|
+
for item in repo_map.get("graph_rank", []):
|
|
2763
|
+
if not isinstance(item, dict):
|
|
2764
|
+
continue
|
|
2765
|
+
graph_context.append({
|
|
2766
|
+
"path": item.get("path"),
|
|
2767
|
+
"score": item.get("score"),
|
|
2768
|
+
"components": copy.deepcopy(item.get("components", {})),
|
|
2769
|
+
"line_count": item.get("line_count"),
|
|
2770
|
+
"exact_source_verification_required": True,
|
|
2771
|
+
})
|
|
2772
|
+
if len(graph_context) >= MAX_SYMBOL_MEMORY_GRAPH_ITEMS:
|
|
2773
|
+
break
|
|
2774
|
+
|
|
2775
|
+
summary = repo_map.get("summary", {}) if isinstance(repo_map.get("summary"), dict) else {}
|
|
2776
|
+
retrieval = repo_map.get("retrieval", []) if isinstance(repo_map.get("retrieval"), list) else []
|
|
2777
|
+
return {
|
|
2778
|
+
"schema_version": SYMBOL_MEMORY_SCHEMA_VERSION,
|
|
2779
|
+
"mode": "advisory",
|
|
2780
|
+
"source": "contextguard.pack-repo-map.v1",
|
|
2781
|
+
"summary": {
|
|
2782
|
+
"symbols": len(symbols),
|
|
2783
|
+
"graph_context": len(graph_context),
|
|
2784
|
+
"files_scanned": int(summary.get("files_scanned", 0) or 0),
|
|
2785
|
+
"graph_edges": int(summary.get("graph_edges", 0) or 0),
|
|
2786
|
+
"retrieval_hints": len(retrieval),
|
|
2787
|
+
},
|
|
2788
|
+
"symbols": symbols,
|
|
2789
|
+
"graph_context": graph_context,
|
|
2790
|
+
"source_verification": {
|
|
2791
|
+
"requires_exact_source_before_edits": True,
|
|
2792
|
+
"verified_by": ["slice_cli", "symbol_cli"],
|
|
2793
|
+
"retrieval_hint_count": len(retrieval),
|
|
2794
|
+
"missing_retrieval_hint_count": max(0, len(symbols) - sum(1 for item in symbols if item.get("slice_cli") or item.get("symbol_cli"))),
|
|
2795
|
+
},
|
|
2796
|
+
"claim_boundary": {
|
|
2797
|
+
"deterministic_local_only": True,
|
|
2798
|
+
"no_network_model_embedding_lsp_or_tree_sitter_dependency": True,
|
|
2799
|
+
"advisory_does_not_change_manifest_pack_or_receipt": True,
|
|
2800
|
+
"graph_rank_is_explain_only": True,
|
|
2801
|
+
"provider_token_or_cost_savings_claim_allowed": False,
|
|
2802
|
+
},
|
|
2803
|
+
}
|
|
2804
|
+
|
|
2805
|
+
|
|
2315
2806
|
def build_auto_explain_payload(
|
|
2316
2807
|
args: argparse.Namespace,
|
|
2317
2808
|
suggest_payload: dict[str, Any],
|
|
@@ -2320,6 +2811,7 @@ def build_auto_explain_payload(
|
|
|
2320
2811
|
*,
|
|
2321
2812
|
root: Path | None = None,
|
|
2322
2813
|
root_arg: str = ".",
|
|
2814
|
+
repo_map_payload: dict[str, Any] | None = None,
|
|
2323
2815
|
) -> dict[str, Any]:
|
|
2324
2816
|
build_sources = [
|
|
2325
2817
|
item
|
|
@@ -2447,7 +2939,9 @@ def build_auto_explain_payload(
|
|
|
2447
2939
|
"raw_test_output_embedded": False,
|
|
2448
2940
|
},
|
|
2449
2941
|
}
|
|
2450
|
-
if
|
|
2942
|
+
if repo_map_payload is not None:
|
|
2943
|
+
explain["repo_map"] = copy.deepcopy(repo_map_payload)
|
|
2944
|
+
elif root is not None:
|
|
2451
2945
|
explain["repo_map"] = build_repo_map_payload(root, args, suggest_payload, build_payload, root_arg=root_arg)
|
|
2452
2946
|
return explain
|
|
2453
2947
|
|
|
@@ -2534,11 +3028,70 @@ def auto_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[d
|
|
|
2534
3028
|
}
|
|
2535
3029
|
if build_hint_omitted_reason:
|
|
2536
3030
|
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
3031
|
+
if getattr(args, "adaptive_k", False) and isinstance(suggest_payload.get("adaptive_k"), dict):
|
|
3032
|
+
payload["adaptive_k"] = copy.deepcopy(suggest_payload["adaptive_k"])
|
|
3033
|
+
repo_map_payload: dict[str, Any] | None = None
|
|
3034
|
+
if getattr(args, "symbol_memory", False) or args.explain:
|
|
3035
|
+
repo_map_payload = build_repo_map_payload(root, args, suggest_payload, build_payload, root_arg=root_arg)
|
|
3036
|
+
if getattr(args, "symbol_memory", False) and isinstance(repo_map_payload, dict):
|
|
3037
|
+
payload["symbol_memory"] = build_symbol_memory_payload(repo_map_payload)
|
|
2537
3038
|
if args.explain:
|
|
2538
|
-
payload["explain"] = build_auto_explain_payload(
|
|
3039
|
+
payload["explain"] = build_auto_explain_payload(
|
|
3040
|
+
args,
|
|
3041
|
+
suggest_payload,
|
|
3042
|
+
build_payload,
|
|
3043
|
+
payload,
|
|
3044
|
+
root=root,
|
|
3045
|
+
root_arg=root_arg,
|
|
3046
|
+
repo_map_payload=repo_map_payload,
|
|
3047
|
+
)
|
|
2539
3048
|
return payload, rc
|
|
2540
3049
|
|
|
2541
3050
|
|
|
3051
|
+
def print_adaptive_k_text(payload: dict[str, Any]) -> None:
|
|
3052
|
+
adaptive = payload.get("adaptive_k")
|
|
3053
|
+
if not isinstance(adaptive, dict):
|
|
3054
|
+
return
|
|
3055
|
+
recommendation = (
|
|
3056
|
+
adaptive.get("recommendation", {})
|
|
3057
|
+
if isinstance(adaptive.get("recommendation"), dict)
|
|
3058
|
+
else {}
|
|
3059
|
+
)
|
|
3060
|
+
score_distribution = (
|
|
3061
|
+
adaptive.get("score_distribution", {})
|
|
3062
|
+
if isinstance(adaptive.get("score_distribution"), dict)
|
|
3063
|
+
else {}
|
|
3064
|
+
)
|
|
3065
|
+
budget_fit = adaptive.get("budget_fit", {}) if isinstance(adaptive.get("budget_fit"), dict) else {}
|
|
3066
|
+
reason_codes = recommendation.get("reason_codes", [])
|
|
3067
|
+
if isinstance(reason_codes, list):
|
|
3068
|
+
reason_text = ",".join(str(item) for item in reason_codes[:5])
|
|
3069
|
+
else:
|
|
3070
|
+
reason_text = str(reason_codes)
|
|
3071
|
+
print(
|
|
3072
|
+
"adaptive-k: "
|
|
3073
|
+
f"recommended={adaptive.get('recommended_k', 0)}/{adaptive.get('requested_top', 0)} "
|
|
3074
|
+
f"candidates={score_distribution.get('candidate_count', 0)} "
|
|
3075
|
+
f"budget_limited={budget_fit.get('budget_limited', False)} "
|
|
3076
|
+
f"apply=false reasons={reason_text or 'none'}"
|
|
3077
|
+
)
|
|
3078
|
+
|
|
3079
|
+
|
|
3080
|
+
def print_symbol_memory_text(payload: dict[str, Any]) -> None:
|
|
3081
|
+
symbol_memory = payload.get("symbol_memory")
|
|
3082
|
+
if not isinstance(symbol_memory, dict):
|
|
3083
|
+
return
|
|
3084
|
+
summary = symbol_memory.get("summary", {}) if isinstance(symbol_memory.get("summary"), dict) else {}
|
|
3085
|
+
verification = symbol_memory.get("source_verification", {}) if isinstance(symbol_memory.get("source_verification"), dict) else {}
|
|
3086
|
+
print(
|
|
3087
|
+
"symbol-memory: "
|
|
3088
|
+
f"symbols={summary.get('symbols', 0)} "
|
|
3089
|
+
f"graph_context={summary.get('graph_context', 0)} "
|
|
3090
|
+
f"retrieval_hints={summary.get('retrieval_hints', 0)} "
|
|
3091
|
+
f"verify_before_edits={str(verification.get('requires_exact_source_before_edits', True)).lower()}"
|
|
3092
|
+
)
|
|
3093
|
+
|
|
3094
|
+
|
|
2542
3095
|
def print_suggest_text(payload: dict[str, Any]) -> None:
|
|
2543
3096
|
print(
|
|
2544
3097
|
f"context-guard-pack suggest: {len(payload['sources'])} source(s), "
|
|
@@ -2554,6 +3107,7 @@ def print_suggest_text(payload: dict[str, Any]) -> None:
|
|
|
2554
3107
|
print(f"build: {payload['build_hint']}")
|
|
2555
3108
|
elif payload.get("build_hint_omitted_reason"):
|
|
2556
3109
|
print(f"build hint omitted: {payload['build_hint_omitted_reason']}")
|
|
3110
|
+
print_adaptive_k_text(payload)
|
|
2557
3111
|
|
|
2558
3112
|
|
|
2559
3113
|
def print_auto_text(payload: dict[str, Any]) -> None:
|
|
@@ -2598,6 +3152,8 @@ def print_auto_text(payload: dict[str, Any]) -> None:
|
|
|
2598
3152
|
reason_counts[reason] = reason_counts.get(reason, 0) + 1
|
|
2599
3153
|
reason_text = ", ".join(f"{reason}={count}" for reason, count in sorted(reason_counts.items()))
|
|
2600
3154
|
print(f"omitted reasons: {reason_text}")
|
|
3155
|
+
print_adaptive_k_text(payload)
|
|
3156
|
+
print_symbol_memory_text(payload)
|
|
2601
3157
|
if payload.get("manifest_path"):
|
|
2602
3158
|
print(f"manifest: {payload['manifest_path']}")
|
|
2603
3159
|
if payload.get("pack_path"):
|
|
@@ -2633,6 +3189,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2633
3189
|
suggest.add_argument("--top", type=int, default=DEFAULT_SUGGEST_TOP, help="maximum suggested sources")
|
|
2634
3190
|
suggest.add_argument("--context-lines", type=int, default=DEFAULT_SUGGEST_CONTEXT_LINES, help="line context around diff/output hits")
|
|
2635
3191
|
suggest.add_argument("--manifest-out", help="write the suggested build manifest to this relative path under root")
|
|
3192
|
+
suggest.add_argument("--adaptive-k", action="store_true", help="include local score/budget top-k advisory metadata without changing the manifest")
|
|
2636
3193
|
suggest.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2637
3194
|
auto = sub.add_parser("auto", help="suggest a context pack manifest and build the budgeted pack in one local step")
|
|
2638
3195
|
auto.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
@@ -2649,6 +3206,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2649
3206
|
auto.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2650
3207
|
auto.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
|
|
2651
3208
|
auto.add_argument("--explain", action="store_true", help="include deterministic local selection/build explanation metadata")
|
|
3209
|
+
auto.add_argument("--adaptive-k", action="store_true", help="include local score/budget top-k advisory metadata without changing the manifest or pack")
|
|
3210
|
+
auto.add_argument("--symbol-memory", action="store_true", help="include repo-map derived symbol/graph advisory metadata with exact source verification hints")
|
|
2652
3211
|
return parser
|
|
2653
3212
|
|
|
2654
3213
|
|