@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -43,6 +43,8 @@ SUGGEST_SCHEMA_VERSION = "contextguard.pack-suggest.v1"
|
|
|
43
43
|
AUTO_SCHEMA_VERSION = "contextguard.pack-auto.v1"
|
|
44
44
|
AUTO_EXPLAIN_SCHEMA_VERSION = "contextguard.pack-auto-explain.v1"
|
|
45
45
|
REPO_MAP_SCHEMA_VERSION = "contextguard.pack-repo-map.v1"
|
|
46
|
+
ADAPTIVE_K_SCHEMA_VERSION = "contextguard.pack-adaptive-k.v1"
|
|
47
|
+
SYMBOL_MEMORY_SCHEMA_VERSION = "contextguard.pack-symbol-memory.v1"
|
|
46
48
|
DEFAULT_SUGGEST_TOP = 8
|
|
47
49
|
MAX_SUGGEST_TOP = 50
|
|
48
50
|
DEFAULT_SUGGEST_CONTEXT_LINES = 20
|
|
@@ -51,15 +53,30 @@ SUGGEST_WHOLE_FILE_MAX_LINES = 120
|
|
|
51
53
|
MAX_SUGGEST_INPUT_BYTES = 256_000
|
|
52
54
|
MAX_QUERY_SCAN_FILES = 2_000
|
|
53
55
|
MAX_QUERY_SCAN_BYTES_PER_FILE = 200_000
|
|
56
|
+
MAX_GIT_LS_FILES_OUTPUT_BYTES = MAX_QUERY_SCAN_FILES * 512
|
|
57
|
+
GIT_LS_FILES_READ_CHUNK_BYTES = 64 * 1024
|
|
54
58
|
MAX_REPO_MAP_FILES = 1_000
|
|
59
|
+
MAX_REPO_MAP_SCAN_FILES = 160
|
|
55
60
|
MAX_REPO_MAP_BYTES_PER_FILE = 120_000
|
|
56
61
|
MAX_REPO_MAP_TREE_ENTRIES = 30
|
|
57
62
|
MAX_REPO_MAP_SIGNATURE_ENTRIES = 40
|
|
58
63
|
MAX_REPO_MAP_GRAPH_RANK_ENTRIES = 30
|
|
59
64
|
MAX_REPO_MAP_RETRIEVAL_HINTS = 30
|
|
60
65
|
MAX_REPO_MAP_SECRET_RISK_FILES = 20
|
|
66
|
+
MAX_ADAPTIVE_K_SCORE_SAMPLES = 200
|
|
67
|
+
MAX_ADAPTIVE_K_SELECTED_EVIDENCE = 12
|
|
68
|
+
MAX_ADAPTIVE_K_OMITTED_EVIDENCE = 12
|
|
69
|
+
MAX_ADAPTIVE_K_REASON_COUNTS = 12
|
|
70
|
+
MAX_ADAPTIVE_K_VERIFICATION_HINTS = 12
|
|
71
|
+
ADAPTIVE_K_POLICIES = ("balanced", "recall", "precision")
|
|
72
|
+
MAX_SYMBOL_MEMORY_ITEMS = 12
|
|
73
|
+
MAX_SYMBOL_MEMORY_GRAPH_ITEMS = 12
|
|
61
74
|
PACK_DIR = ".context-guard/packs"
|
|
62
75
|
REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
|
|
76
|
+
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
77
|
+
"tmp": Path("/private/tmp"),
|
|
78
|
+
"var": Path("/private/var"),
|
|
79
|
+
}
|
|
63
80
|
CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f-\x9f]")
|
|
64
81
|
SECRET_CONTENT_RE = re.compile(
|
|
65
82
|
r"(?is)("
|
|
@@ -235,6 +252,30 @@ def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
|
|
|
235
252
|
return "".join(out), redacted
|
|
236
253
|
|
|
237
254
|
|
|
255
|
+
def sanitize_source_lines(handle: Any, requested: LineRange | None) -> tuple[list[str], int, int]:
|
|
256
|
+
"""Sanitize a source stream while retaining only the requested line window.
|
|
257
|
+
|
|
258
|
+
Explicit line-window retrieval still scans the complete file so global
|
|
259
|
+
redaction counts and total line counts stay compatible with previous
|
|
260
|
+
outputs, but it no longer materializes a sanitized all-lines list before
|
|
261
|
+
slicing.
|
|
262
|
+
"""
|
|
263
|
+
sanitizer = load_line_sanitizer()
|
|
264
|
+
selected: list[str] = []
|
|
265
|
+
redacted = 0
|
|
266
|
+
total_lines = 0
|
|
267
|
+
collect_all = requested is None
|
|
268
|
+
start = requested.start if requested is not None else 1
|
|
269
|
+
end = requested.end if requested is not None else 0
|
|
270
|
+
for total_lines, raw_line in enumerate(handle, start=1):
|
|
271
|
+
sanitized, did_redact = sanitizer.sanitize(raw_line) # type: ignore[attr-defined]
|
|
272
|
+
if did_redact:
|
|
273
|
+
redacted += 1
|
|
274
|
+
if collect_all or start <= total_lines <= end:
|
|
275
|
+
selected.append(sanitized)
|
|
276
|
+
return selected, total_lines, redacted
|
|
277
|
+
|
|
278
|
+
|
|
238
279
|
def byte_len(text: str) -> int:
|
|
239
280
|
return len(text.encode("utf-8", errors="replace"))
|
|
240
281
|
|
|
@@ -330,6 +371,16 @@ def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
|
330
371
|
return min(max(number, minimum), maximum)
|
|
331
372
|
|
|
332
373
|
|
|
374
|
+
def adaptive_k_threshold(value: object) -> float:
|
|
375
|
+
try:
|
|
376
|
+
number = float(value)
|
|
377
|
+
except (TypeError, ValueError, OverflowError) as exc:
|
|
378
|
+
raise argparse.ArgumentTypeError("adaptive-k threshold must be a number between 0.0 and 1.0") from exc
|
|
379
|
+
if not 0.0 <= number <= 1.0:
|
|
380
|
+
raise argparse.ArgumentTypeError("adaptive-k threshold must be between 0.0 and 1.0")
|
|
381
|
+
return number
|
|
382
|
+
|
|
383
|
+
|
|
333
384
|
def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_CHARS) -> str | None:
|
|
334
385
|
if value is None:
|
|
335
386
|
return default
|
|
@@ -342,13 +393,150 @@ def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_
|
|
|
342
393
|
return text
|
|
343
394
|
|
|
344
395
|
|
|
345
|
-
def
|
|
396
|
+
def normalized_link_target(anchor: Path, raw_target: str) -> Path:
|
|
397
|
+
target = Path(raw_target)
|
|
398
|
+
if not target.is_absolute():
|
|
399
|
+
target = anchor / target
|
|
400
|
+
return Path(os.path.normpath(str(target)))
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
|
|
404
|
+
"""Normalize common macOS absolute path aliases before no-follow traversal."""
|
|
405
|
+
|
|
406
|
+
if not path.is_absolute() or len(path.parts) < 2:
|
|
407
|
+
return path
|
|
408
|
+
first = path.parts[1]
|
|
409
|
+
expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(first)
|
|
410
|
+
if expected is None:
|
|
411
|
+
return path
|
|
412
|
+
link = Path(path.anchor) / first
|
|
346
413
|
try:
|
|
347
|
-
|
|
414
|
+
if not stat.S_ISLNK(os.lstat(link).st_mode):
|
|
415
|
+
return path
|
|
416
|
+
if normalized_link_target(Path(path.anchor), os.readlink(link)) != expected:
|
|
417
|
+
return path
|
|
418
|
+
except OSError:
|
|
419
|
+
return path
|
|
420
|
+
return expected.joinpath(*path.parts[2:])
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def manifest_safe_read_supported() -> bool:
|
|
424
|
+
return hasattr(os, "O_NOFOLLOW") and os.open in getattr(os, "supports_dir_fd", set())
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def manifest_directory_open_flags(*, follow_final: bool = False) -> int:
|
|
428
|
+
flags = os.O_RDONLY
|
|
429
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
430
|
+
flags |= os.O_DIRECTORY
|
|
431
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
432
|
+
flags |= os.O_CLOEXEC
|
|
433
|
+
if not follow_final:
|
|
434
|
+
flags |= os.O_NOFOLLOW
|
|
435
|
+
return flags
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def manifest_file_open_flags() -> int:
|
|
439
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
440
|
+
for name in ("O_CLOEXEC", "O_NONBLOCK", "O_NOCTTY"):
|
|
441
|
+
flags |= getattr(os, name, 0)
|
|
442
|
+
return flags
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def manifest_leaf_name(path: Path) -> str:
|
|
446
|
+
name = path.name
|
|
447
|
+
if name in {"", ".", ".."}:
|
|
448
|
+
raise PackError("manifest path must name a regular file")
|
|
449
|
+
return name
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def open_manifest_parent_no_follow(path: Path) -> int:
|
|
453
|
+
if not manifest_safe_read_supported():
|
|
454
|
+
raise PackError("safe manifest reads require O_NOFOLLOW and dir_fd support")
|
|
455
|
+
path = path.expanduser()
|
|
456
|
+
if any(part == ".." for part in path.parts):
|
|
457
|
+
raise PackError("manifest path must not contain parent traversal")
|
|
458
|
+
if path.is_absolute():
|
|
459
|
+
path = normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(path))))
|
|
460
|
+
current_fd = os.open(path.anchor or os.sep, manifest_directory_open_flags(follow_final=True))
|
|
461
|
+
parts = path.parts[1:-1]
|
|
462
|
+
else:
|
|
463
|
+
path = Path(os.path.normpath(str(path)))
|
|
464
|
+
current_fd = os.open(".", manifest_directory_open_flags())
|
|
465
|
+
parts = path.parts[:-1]
|
|
466
|
+
try:
|
|
467
|
+
for part in parts:
|
|
468
|
+
if part in {"", "."}:
|
|
469
|
+
continue
|
|
470
|
+
if part == "..":
|
|
471
|
+
raise PackError("manifest path must not contain parent traversal")
|
|
472
|
+
next_fd = -1
|
|
473
|
+
try:
|
|
474
|
+
next_fd = os.open(part, manifest_directory_open_flags(), dir_fd=current_fd)
|
|
475
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
476
|
+
raise PackError("manifest path must not traverse non-directory components")
|
|
477
|
+
except (OSError, PackError):
|
|
478
|
+
if next_fd >= 0:
|
|
479
|
+
try:
|
|
480
|
+
os.close(next_fd)
|
|
481
|
+
except OSError:
|
|
482
|
+
pass
|
|
483
|
+
raise
|
|
484
|
+
os.close(current_fd)
|
|
485
|
+
current_fd = next_fd
|
|
486
|
+
owned_fd = current_fd
|
|
487
|
+
current_fd = -1
|
|
488
|
+
return owned_fd
|
|
489
|
+
finally:
|
|
490
|
+
if current_fd >= 0:
|
|
491
|
+
try:
|
|
492
|
+
os.close(current_fd)
|
|
493
|
+
except OSError:
|
|
494
|
+
pass
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def read_manifest_bytes_no_follow(path: Path) -> bytes:
|
|
498
|
+
parent_fd = -1
|
|
499
|
+
fd = -1
|
|
500
|
+
try:
|
|
501
|
+
leaf = manifest_leaf_name(path.expanduser())
|
|
502
|
+
parent_fd = open_manifest_parent_no_follow(path)
|
|
503
|
+
fd = os.open(leaf, manifest_file_open_flags(), dir_fd=parent_fd)
|
|
504
|
+
st = os.fstat(fd)
|
|
505
|
+
if not stat.S_ISREG(st.st_mode):
|
|
506
|
+
raise PackError("manifest must be a regular file")
|
|
507
|
+
if st.st_size > MAX_MANIFEST_BYTES:
|
|
508
|
+
raise PackError(f"manifest exceeds trusted size cap: {st.st_size} > {MAX_MANIFEST_BYTES}")
|
|
509
|
+
chunks: list[bytes] = []
|
|
510
|
+
remaining = MAX_MANIFEST_BYTES + 1
|
|
511
|
+
while remaining > 0:
|
|
512
|
+
chunk = os.read(fd, min(64 * 1024, remaining))
|
|
513
|
+
if not chunk:
|
|
514
|
+
break
|
|
515
|
+
chunks.append(chunk)
|
|
516
|
+
remaining -= len(chunk)
|
|
517
|
+
raw = b"".join(chunks)
|
|
518
|
+
if len(raw) > MAX_MANIFEST_BYTES:
|
|
519
|
+
raise PackError(f"manifest exceeds trusted size cap: {len(raw)} > {MAX_MANIFEST_BYTES}")
|
|
520
|
+
return raw
|
|
521
|
+
except PackError:
|
|
522
|
+
raise
|
|
348
523
|
except OSError as exc:
|
|
349
524
|
raise PackError(f"could not read manifest: {exc.strerror or exc.__class__.__name__}") from exc
|
|
350
|
-
|
|
351
|
-
|
|
525
|
+
finally:
|
|
526
|
+
if fd >= 0:
|
|
527
|
+
try:
|
|
528
|
+
os.close(fd)
|
|
529
|
+
except OSError:
|
|
530
|
+
pass
|
|
531
|
+
if parent_fd >= 0:
|
|
532
|
+
try:
|
|
533
|
+
os.close(parent_fd)
|
|
534
|
+
except OSError:
|
|
535
|
+
pass
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def read_manifest(path: Path) -> list[SourceSpec]:
|
|
539
|
+
raw = read_manifest_bytes_no_follow(path)
|
|
352
540
|
try:
|
|
353
541
|
data = json.loads(raw.decode("utf-8"))
|
|
354
542
|
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
|
@@ -582,19 +770,15 @@ def resolve_source(root: Path, spec: SourceSpec) -> tuple[ResolvedSource | None,
|
|
|
582
770
|
return None, omission(spec, reason, path=display, redacted_path=redacted_path)
|
|
583
771
|
try:
|
|
584
772
|
with handle:
|
|
585
|
-
|
|
773
|
+
requested = spec.lines
|
|
774
|
+
selected, total_lines, redacted_lines = sanitize_source_lines(handle, requested)
|
|
586
775
|
except OSError:
|
|
587
776
|
return None, omission(spec, "unsafe_path", path=display, redacted_path=redacted_path)
|
|
588
|
-
|
|
589
|
-
all_lines = sanitized.splitlines(True)
|
|
590
|
-
if not all_lines:
|
|
777
|
+
if total_lines <= 0:
|
|
591
778
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
592
|
-
|
|
593
|
-
requested = spec.lines or LineRange(1, total_lines)
|
|
779
|
+
requested = requested or LineRange(1, total_lines)
|
|
594
780
|
if requested.start > total_lines:
|
|
595
781
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
596
|
-
end = min(requested.end, total_lines)
|
|
597
|
-
selected = all_lines[requested.start - 1:end]
|
|
598
782
|
if not selected:
|
|
599
783
|
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
600
784
|
return ResolvedSource(
|
|
@@ -645,7 +829,11 @@ def retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacte
|
|
|
645
829
|
return retrieval_cli(safe_root, display_path, lines), None
|
|
646
830
|
|
|
647
831
|
|
|
648
|
-
|
|
832
|
+
BLOCK_OPEN = "\n\n```text\n"
|
|
833
|
+
BLOCK_CLOSE = "```\n\n"
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def render_block_header(source: ResolvedSource, *, root_arg: str, status: str, included: LineRange) -> str:
|
|
649
837
|
title = source.spec.label or source.display_path
|
|
650
838
|
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
651
839
|
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
@@ -661,7 +849,11 @@ def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, sta
|
|
|
661
849
|
header.append(f"Retrieval: `{retrieval}`")
|
|
662
850
|
elif retrieval_omitted_reason:
|
|
663
851
|
header.append(f"Retrieval omitted: {retrieval_omitted_reason}")
|
|
664
|
-
return "\n".join(header)
|
|
852
|
+
return "\n".join(header)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, status: str, included: LineRange) -> str:
|
|
856
|
+
return render_block_header(source, root_arg=root_arg, status=status, included=included) + BLOCK_OPEN + "".join(lines) + ("" if not lines or lines[-1].endswith("\n") else "\n") + BLOCK_CLOSE
|
|
665
857
|
|
|
666
858
|
|
|
667
859
|
def source_metadata(source: ResolvedSource, *, status: str, lines: list[str], included: LineRange, root_arg: str) -> dict[str, Any]:
|
|
@@ -701,21 +893,63 @@ def budget_omission(source: ResolvedSource, *, root_arg: str) -> dict[str, Any]:
|
|
|
701
893
|
return item
|
|
702
894
|
|
|
703
895
|
|
|
704
|
-
def
|
|
896
|
+
def included_range_for_line_count(source: ResolvedSource, line_count: int) -> LineRange:
|
|
897
|
+
start = source.requested_lines.start if source.requested_lines else 1
|
|
898
|
+
return LineRange(start, start + line_count - 1)
|
|
899
|
+
|
|
900
|
+
|
|
901
|
+
def line_byte_prefixes(lines: list[str]) -> list[int]:
|
|
902
|
+
prefixes = [0]
|
|
903
|
+
total = 0
|
|
904
|
+
for line in lines:
|
|
905
|
+
total += byte_len(line)
|
|
906
|
+
prefixes.append(total)
|
|
907
|
+
return prefixes
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def render_block_byte_len(
|
|
911
|
+
source: ResolvedSource,
|
|
912
|
+
line_count: int,
|
|
913
|
+
line_prefixes: list[int],
|
|
914
|
+
*,
|
|
915
|
+
root_arg: str,
|
|
916
|
+
status: str,
|
|
917
|
+
included: LineRange,
|
|
918
|
+
) -> int:
|
|
919
|
+
body_bytes = line_prefixes[line_count]
|
|
920
|
+
if line_count > 0 and not source.selected_lines[line_count - 1].endswith("\n"):
|
|
921
|
+
body_bytes += 1
|
|
922
|
+
return byte_len(render_block_header(source, root_arg=root_arg, status=status, included=included)) + byte_len(BLOCK_OPEN) + body_bytes + byte_len(BLOCK_CLOSE)
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def fit_partial_lines(
|
|
926
|
+
source: ResolvedSource,
|
|
927
|
+
remaining: int,
|
|
928
|
+
*,
|
|
929
|
+
root_arg: str,
|
|
930
|
+
line_prefixes: list[int] | None = None,
|
|
931
|
+
) -> tuple[list[str], str | None, LineRange | None]:
|
|
705
932
|
if remaining <= 0:
|
|
706
933
|
return [], None, None
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
934
|
+
if not source.selected_lines:
|
|
935
|
+
return [], None, None
|
|
936
|
+
prefixes = line_prefixes if line_prefixes is not None else line_byte_prefixes(source.selected_lines)
|
|
937
|
+
best = 0
|
|
938
|
+
low = 1
|
|
939
|
+
high = len(source.selected_lines)
|
|
940
|
+
while low <= high:
|
|
941
|
+
mid = (low + high) // 2
|
|
942
|
+
included = included_range_for_line_count(source, mid)
|
|
943
|
+
block_bytes = render_block_byte_len(source, mid, prefixes, root_arg=root_arg, status="partial", included=included)
|
|
944
|
+
if block_bytes <= remaining:
|
|
945
|
+
best = mid
|
|
946
|
+
low = mid + 1
|
|
714
947
|
else:
|
|
715
|
-
|
|
716
|
-
if
|
|
948
|
+
high = mid - 1
|
|
949
|
+
if best <= 0:
|
|
717
950
|
return [], None, None
|
|
718
|
-
|
|
951
|
+
picked = source.selected_lines[:best]
|
|
952
|
+
included = included_range_for_line_count(source, best)
|
|
719
953
|
return picked, render_block(source, picked, root_arg=root_arg, status="partial", included=included), included
|
|
720
954
|
|
|
721
955
|
|
|
@@ -988,17 +1222,17 @@ def build_pack(root: Path, specs: list[SourceSpec], *, budget_bytes: int, root_a
|
|
|
988
1222
|
parts.append(header)
|
|
989
1223
|
current_pack_bytes += header_bytes
|
|
990
1224
|
for source in resolved:
|
|
991
|
-
|
|
992
|
-
included_range =
|
|
993
|
-
|
|
994
|
-
full_block_bytes = byte_len(full_block)
|
|
1225
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
1226
|
+
included_range = included_range_for_line_count(source, len(source.selected_lines))
|
|
1227
|
+
full_block_bytes = render_block_byte_len(source, len(source.selected_lines), line_prefixes, root_arg=root_arg, status="included", included=included_range)
|
|
995
1228
|
remaining = budget_bytes - current_pack_bytes
|
|
996
1229
|
if full_block_bytes <= remaining:
|
|
1230
|
+
full_block = render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included_range)
|
|
997
1231
|
parts.append(full_block)
|
|
998
1232
|
current_pack_bytes += full_block_bytes
|
|
999
1233
|
included.append(source_metadata(source, status="included", lines=source.selected_lines, included=included_range, root_arg=root_arg))
|
|
1000
1234
|
continue
|
|
1001
|
-
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
1235
|
+
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg, line_prefixes=line_prefixes)
|
|
1002
1236
|
if partial_block is not None and partial_range is not None:
|
|
1003
1237
|
parts.append(partial_block)
|
|
1004
1238
|
current_pack_bytes += byte_len(partial_block)
|
|
@@ -1271,19 +1505,81 @@ def collect_output_candidates(
|
|
|
1271
1505
|
|
|
1272
1506
|
|
|
1273
1507
|
def git_ls_files(root: Path) -> list[str]:
|
|
1508
|
+
def read_stdout_capped(proc: subprocess.Popen[bytes], limit: int, timeout_seconds: float) -> tuple[bytes, bool]:
|
|
1509
|
+
if proc.stdout is None:
|
|
1510
|
+
return b"", False
|
|
1511
|
+
chunks: list[bytes] = []
|
|
1512
|
+
total = 0
|
|
1513
|
+
capped = False
|
|
1514
|
+
timed_out = False
|
|
1515
|
+
|
|
1516
|
+
def reader() -> None:
|
|
1517
|
+
nonlocal total, capped
|
|
1518
|
+
try:
|
|
1519
|
+
while total <= limit:
|
|
1520
|
+
chunk = proc.stdout.read(min(GIT_LS_FILES_READ_CHUNK_BYTES, limit + 1 - total))
|
|
1521
|
+
if not chunk:
|
|
1522
|
+
break
|
|
1523
|
+
chunks.append(chunk)
|
|
1524
|
+
total += len(chunk)
|
|
1525
|
+
if total > limit:
|
|
1526
|
+
capped = True
|
|
1527
|
+
break
|
|
1528
|
+
finally:
|
|
1529
|
+
if capped and proc.poll() is None:
|
|
1530
|
+
try:
|
|
1531
|
+
proc.terminate()
|
|
1532
|
+
except OSError:
|
|
1533
|
+
pass
|
|
1534
|
+
try:
|
|
1535
|
+
proc.stdout.close()
|
|
1536
|
+
except OSError:
|
|
1537
|
+
pass
|
|
1538
|
+
|
|
1539
|
+
thread = threading.Thread(target=reader, daemon=True)
|
|
1540
|
+
thread.start()
|
|
1541
|
+
thread.join(timeout_seconds)
|
|
1542
|
+
if thread.is_alive() and proc.poll() is None:
|
|
1543
|
+
timed_out = True
|
|
1544
|
+
try:
|
|
1545
|
+
proc.kill()
|
|
1546
|
+
except OSError:
|
|
1547
|
+
pass
|
|
1548
|
+
try:
|
|
1549
|
+
proc.wait(timeout=2)
|
|
1550
|
+
except subprocess.TimeoutExpired:
|
|
1551
|
+
try:
|
|
1552
|
+
proc.kill()
|
|
1553
|
+
except OSError:
|
|
1554
|
+
pass
|
|
1555
|
+
try:
|
|
1556
|
+
proc.wait(timeout=2)
|
|
1557
|
+
except subprocess.TimeoutExpired:
|
|
1558
|
+
pass
|
|
1559
|
+
thread.join(0.2)
|
|
1560
|
+
raw_output = b"".join(chunks)[:limit]
|
|
1561
|
+
complete = proc.returncode == 0 and not capped and not timed_out and raw_output.endswith(b"\0")
|
|
1562
|
+
return raw_output, complete
|
|
1563
|
+
|
|
1564
|
+
raw = b""
|
|
1565
|
+
git_returncode: int | None = None
|
|
1274
1566
|
try:
|
|
1275
|
-
proc = subprocess.
|
|
1567
|
+
proc = subprocess.Popen(
|
|
1276
1568
|
["git", "-C", str(root), "ls-files", "-z"],
|
|
1569
|
+
stdout=subprocess.PIPE,
|
|
1570
|
+
stderr=subprocess.DEVNULL,
|
|
1277
1571
|
text=False,
|
|
1278
|
-
capture_output=True,
|
|
1279
|
-
timeout=10,
|
|
1280
|
-
check=False,
|
|
1281
1572
|
)
|
|
1573
|
+
raw, _git_complete = read_stdout_capped(proc, MAX_GIT_LS_FILES_OUTPUT_BYTES, 10)
|
|
1574
|
+
git_returncode = proc.returncode
|
|
1282
1575
|
except (OSError, subprocess.TimeoutExpired):
|
|
1283
1576
|
proc = None
|
|
1284
|
-
if
|
|
1285
|
-
|
|
1577
|
+
if raw:
|
|
1578
|
+
if not raw.endswith(b"\0"):
|
|
1579
|
+
raw = raw.rsplit(b"\0", 1)[0] if b"\0" in raw else b""
|
|
1286
1580
|
return [part.decode("utf-8", "replace") for part in raw.split(b"\0") if part][:MAX_QUERY_SCAN_FILES]
|
|
1581
|
+
if git_returncode == 0 or (git_returncode is not None and git_returncode < 0):
|
|
1582
|
+
return []
|
|
1287
1583
|
out: list[str] = []
|
|
1288
1584
|
skip_dirs = {".git", ".omx", ".context-guard", "node_modules", "dist", "build", "__pycache__"}
|
|
1289
1585
|
for current, dirs, files in os.walk(root):
|
|
@@ -1358,7 +1654,8 @@ def source_selected_range(source: ResolvedSource) -> LineRange:
|
|
|
1358
1654
|
|
|
1359
1655
|
def resolved_block_bytes(source: ResolvedSource, *, root_arg: str) -> int:
|
|
1360
1656
|
included = source_selected_range(source)
|
|
1361
|
-
|
|
1657
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
1658
|
+
return render_block_byte_len(source, len(source.selected_lines), line_prefixes, root_arg=root_arg, status="included", included=included)
|
|
1362
1659
|
|
|
1363
1660
|
|
|
1364
1661
|
def manifest_source_for_candidate(source: ResolvedSource, *, priority: int, label: str | None) -> dict[str, Any]:
|
|
@@ -1638,6 +1935,328 @@ def suggest_build_hint(root_arg: str, manifest_path: str | None, budget: int) ->
|
|
|
1638
1935
|
return f"cd {shlex.quote(safe_root)} && {command}", None
|
|
1639
1936
|
|
|
1640
1937
|
|
|
1938
|
+
def percentile_int(values: list[int], numerator: int, denominator: int) -> int:
|
|
1939
|
+
if not values:
|
|
1940
|
+
return 0
|
|
1941
|
+
if denominator <= 0:
|
|
1942
|
+
return values[0]
|
|
1943
|
+
index = min(len(values) - 1, max(0, (len(values) - 1) * numerator // denominator))
|
|
1944
|
+
return values[index]
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
def score_gap_advice(scores: list[int], requested_top: int) -> tuple[int, dict[str, Any], list[str]]:
|
|
1948
|
+
if not scores:
|
|
1949
|
+
return 0, {"after_rank": 0, "delta": 0, "ratio": 0.0}, ["no_candidates"]
|
|
1950
|
+
if len(scores) == 1:
|
|
1951
|
+
return 1, {"after_rank": 1, "delta": 0, "ratio": 0.0}, ["single_candidate"]
|
|
1952
|
+
gaps = [max(0, scores[index] - scores[index + 1]) for index in range(len(scores) - 1)]
|
|
1953
|
+
max_gap = max(gaps)
|
|
1954
|
+
gap_index = gaps.index(max_gap)
|
|
1955
|
+
top_score = max(1, scores[0])
|
|
1956
|
+
ratio = round(max_gap / top_score, 4)
|
|
1957
|
+
if max_gap >= max(250, top_score // 5):
|
|
1958
|
+
elbow_k = gap_index + 1
|
|
1959
|
+
reasons = ["score_elbow"] if elbow_k <= requested_top else ["score_elbow_after_requested_top"]
|
|
1960
|
+
else:
|
|
1961
|
+
elbow_k = min(MAX_SUGGEST_TOP, len(scores))
|
|
1962
|
+
reasons = ["no_strong_score_elbow"]
|
|
1963
|
+
return max(1, elbow_k), {"after_rank": gap_index + 1, "delta": max_gap, "ratio": ratio}, reasons
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
def clamp_proxy(value: float) -> float:
|
|
1967
|
+
return min(1.0, max(0.0, round(value, 4)))
|
|
1968
|
+
|
|
1969
|
+
|
|
1970
|
+
def adaptive_policy_recommended_k(
|
|
1971
|
+
*,
|
|
1972
|
+
policy: str,
|
|
1973
|
+
requested_top: int,
|
|
1974
|
+
score_elbow_k: int,
|
|
1975
|
+
budget_fit_k: int,
|
|
1976
|
+
candidate_count: int,
|
|
1977
|
+
) -> int:
|
|
1978
|
+
candidate_limit = min(max(0, candidate_count), MAX_SUGGEST_TOP)
|
|
1979
|
+
if candidate_limit == 0 or budget_fit_k <= 0:
|
|
1980
|
+
return 0
|
|
1981
|
+
if policy == "recall":
|
|
1982
|
+
policy_k = max(requested_top, score_elbow_k)
|
|
1983
|
+
elif policy == "precision":
|
|
1984
|
+
policy_k = min(score_elbow_k, requested_top)
|
|
1985
|
+
else:
|
|
1986
|
+
policy_k = score_elbow_k
|
|
1987
|
+
return min(max(0, policy_k), max(0, budget_fit_k), candidate_limit)
|
|
1988
|
+
|
|
1989
|
+
|
|
1990
|
+
def adaptive_path_label(value: object) -> str:
|
|
1991
|
+
raw = "" if value is None else str(value)
|
|
1992
|
+
if CONTROL_CHAR_RE.search(raw) or SECRET_CONTENT_RE.search(raw) or SECRET_PATH_COMPONENT_RE.search(raw):
|
|
1993
|
+
return f"redacted-path#path:{sha256_text(raw)[:12]}"
|
|
1994
|
+
rel, _reason = lexical_rel(raw)
|
|
1995
|
+
if rel is None:
|
|
1996
|
+
return safe_raw_path_label(raw)
|
|
1997
|
+
display, _redacted = display_rel_path(rel.as_posix())
|
|
1998
|
+
return display
|
|
1999
|
+
|
|
2000
|
+
|
|
2001
|
+
def actionable_adaptive_path(value: object) -> tuple[str | None, str | None]:
|
|
2002
|
+
raw = "" if value is None else str(value)
|
|
2003
|
+
if not raw:
|
|
2004
|
+
return None, "missing_path"
|
|
2005
|
+
if REDACTED_PATH_COMPONENT in raw or "[REDACTED" in raw:
|
|
2006
|
+
return None, "redacted_path"
|
|
2007
|
+
if CONTROL_CHAR_RE.search(raw) or SECRET_CONTENT_RE.search(raw) or SECRET_PATH_COMPONENT_RE.search(raw):
|
|
2008
|
+
return None, "unsafe_path"
|
|
2009
|
+
rel, reason = lexical_rel(raw)
|
|
2010
|
+
if rel is None:
|
|
2011
|
+
return None, reason or "unsafe_path"
|
|
2012
|
+
return rel.as_posix(), None
|
|
2013
|
+
|
|
2014
|
+
|
|
2015
|
+
def adaptive_lines(value: object) -> dict[str, int] | None:
|
|
2016
|
+
if not isinstance(value, dict):
|
|
2017
|
+
return None
|
|
2018
|
+
try:
|
|
2019
|
+
start = int(value.get("start"))
|
|
2020
|
+
end = int(value.get("end"))
|
|
2021
|
+
except (TypeError, ValueError, OverflowError):
|
|
2022
|
+
return None
|
|
2023
|
+
if start < 1 or end < start:
|
|
2024
|
+
return None
|
|
2025
|
+
return {"start": start, "end": end}
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
def adaptive_retrieval_hint(item: dict[str, Any]) -> dict[str, Any]:
|
|
2029
|
+
path, path_reason = actionable_adaptive_path(item.get("path"))
|
|
2030
|
+
lines = adaptive_lines(item.get("lines") or item.get("included_lines") or item.get("requested_lines"))
|
|
2031
|
+
omitted_reason = item.get("retrieval_omitted_reason")
|
|
2032
|
+
if path_reason:
|
|
2033
|
+
return {"type": "slice", "available": False, "reason": str(omitted_reason or path_reason)}
|
|
2034
|
+
if lines is None:
|
|
2035
|
+
return {"type": "slice", "available": False, "reason": "missing_lines"}
|
|
2036
|
+
if not item.get("retrieval_cli"):
|
|
2037
|
+
return {"type": "slice", "available": False, "reason": str(omitted_reason or "missing_retrieval_hint")}
|
|
2038
|
+
return {"type": "slice", "available": True, "path": path, "lines": lines}
|
|
2039
|
+
|
|
2040
|
+
|
|
2041
|
+
def adaptive_selected_evidence(selected: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
2042
|
+
evidence: list[dict[str, Any]] = []
|
|
2043
|
+
for rank, item in enumerate(selected[:MAX_ADAPTIVE_K_SELECTED_EVIDENCE], start=1):
|
|
2044
|
+
entry: dict[str, Any] = {
|
|
2045
|
+
"rank": rank,
|
|
2046
|
+
"path": adaptive_path_label(item.get("path")),
|
|
2047
|
+
"score": max(0, int(item.get("score", item.get("priority", 0)) or 0)),
|
|
2048
|
+
"reason": cap_label(item.get("reason"), default="local heuristic", limit=MAX_REASON_CHARS),
|
|
2049
|
+
"retrieval_hint": adaptive_retrieval_hint(item),
|
|
2050
|
+
}
|
|
2051
|
+
lines = adaptive_lines(item.get("lines"))
|
|
2052
|
+
if lines is not None:
|
|
2053
|
+
entry["lines"] = lines
|
|
2054
|
+
evidence.append(entry)
|
|
2055
|
+
return evidence
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def adaptive_omitted_evidence(omitted: list[dict[str, Any]]) -> dict[str, Any]:
|
|
2059
|
+
reason_counts: dict[str, int] = {}
|
|
2060
|
+
sources: list[dict[str, Any]] = []
|
|
2061
|
+
for item in omitted:
|
|
2062
|
+
reason = cap_label(item.get("reason"), default="unknown", limit=MAX_REASON_CHARS) or "unknown"
|
|
2063
|
+
reason_counts[reason] = reason_counts.get(reason, 0) + 1
|
|
2064
|
+
if len(sources) >= MAX_ADAPTIVE_K_OMITTED_EVIDENCE:
|
|
2065
|
+
continue
|
|
2066
|
+
source: dict[str, Any] = {
|
|
2067
|
+
"path": adaptive_path_label(item.get("path")),
|
|
2068
|
+
"reason": reason,
|
|
2069
|
+
"priority": max(0, int(item.get("priority", 0) or 0)),
|
|
2070
|
+
}
|
|
2071
|
+
lines = adaptive_lines(item.get("requested_lines") or item.get("lines"))
|
|
2072
|
+
if lines is not None:
|
|
2073
|
+
source["lines"] = lines
|
|
2074
|
+
hint = adaptive_retrieval_hint(item)
|
|
2075
|
+
if hint.get("available") or hint.get("reason") in {"redacted_path", "unsafe_root_path", "unsafe_path"}:
|
|
2076
|
+
source["retrieval_hint"] = hint
|
|
2077
|
+
sources.append(source)
|
|
2078
|
+
counts = [
|
|
2079
|
+
{"reason": reason, "count": count}
|
|
2080
|
+
for reason, count in sorted(reason_counts.items(), key=lambda pair: (-pair[1], pair[0]))[:MAX_ADAPTIVE_K_REASON_COUNTS]
|
|
2081
|
+
]
|
|
2082
|
+
return {
|
|
2083
|
+
"omitted_count": len(omitted),
|
|
2084
|
+
"sources_capped": len(omitted) > len(sources),
|
|
2085
|
+
"sources": sources,
|
|
2086
|
+
"reason_counts": counts,
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
|
|
2090
|
+
def adaptive_source_verification(selected: list[dict[str, Any]]) -> dict[str, Any]:
|
|
2091
|
+
hints: list[dict[str, Any]] = []
|
|
2092
|
+
available = 0
|
|
2093
|
+
for rank, item in enumerate(selected[:MAX_ADAPTIVE_K_VERIFICATION_HINTS], start=1):
|
|
2094
|
+
hint = adaptive_retrieval_hint(item)
|
|
2095
|
+
if hint.get("available"):
|
|
2096
|
+
available += 1
|
|
2097
|
+
record: dict[str, Any] = {
|
|
2098
|
+
"rank": rank,
|
|
2099
|
+
"path": adaptive_path_label(item.get("path")),
|
|
2100
|
+
"retrieval_hint": hint,
|
|
2101
|
+
}
|
|
2102
|
+
hints.append(record)
|
|
2103
|
+
return {
|
|
2104
|
+
"requires_exact_source_before_edits": True,
|
|
2105
|
+
"format": "structured_relative_slice_hints",
|
|
2106
|
+
"selected_count": len(selected),
|
|
2107
|
+
"hint_count": len(hints),
|
|
2108
|
+
"hints_capped": len(selected) > len(hints),
|
|
2109
|
+
"available_hint_count": available,
|
|
2110
|
+
"omitted_hint_count": len(hints) - available,
|
|
2111
|
+
"hints": hints,
|
|
2112
|
+
}
|
|
2113
|
+
|
|
2114
|
+
|
|
2115
|
+
def build_adaptive_k_advisory(
|
|
2116
|
+
*,
|
|
2117
|
+
candidates: list[SuggestCandidate],
|
|
2118
|
+
selected: list[dict[str, Any]],
|
|
2119
|
+
omitted: list[dict[str, Any]],
|
|
2120
|
+
requested_top: int,
|
|
2121
|
+
budget_bytes: int,
|
|
2122
|
+
estimated_pack_bytes: int,
|
|
2123
|
+
policy: str = "balanced",
|
|
2124
|
+
min_recall_proxy: float = 0.0,
|
|
2125
|
+
min_precision_proxy: float = 0.0,
|
|
2126
|
+
) -> dict[str, Any]:
|
|
2127
|
+
if policy not in ADAPTIVE_K_POLICIES:
|
|
2128
|
+
policy = "balanced"
|
|
2129
|
+
sampled_candidates = candidates[:MAX_ADAPTIVE_K_SCORE_SAMPLES]
|
|
2130
|
+
scores = [max(0, int(candidate.score)) for candidate in sampled_candidates]
|
|
2131
|
+
score_elbow_k, max_gap_details, reason_codes = score_gap_advice(scores, requested_top)
|
|
2132
|
+
selected_count = len(selected)
|
|
2133
|
+
selected_scores = [max(0, int(item.get("score", item.get("priority", 0)) or 0)) for item in selected]
|
|
2134
|
+
selected_score_mass = sum(selected_scores)
|
|
2135
|
+
analyzed_score_mass = sum(scores)
|
|
2136
|
+
budget_omitted_count = sum(1 for item in omitted if item.get("reason") == "budget_exhausted")
|
|
2137
|
+
budget_limited = bool(budget_omitted_count or estimated_pack_bytes > budget_bytes)
|
|
2138
|
+
remaining_bytes = budget_bytes - estimated_pack_bytes
|
|
2139
|
+
average_selected_bytes = int(estimated_pack_bytes / selected_count) if selected_count else 0
|
|
2140
|
+
if budget_limited:
|
|
2141
|
+
reason_codes.append("budget_limited")
|
|
2142
|
+
if len(candidates) > len(sampled_candidates):
|
|
2143
|
+
reason_codes.append("candidate_sample_capped")
|
|
2144
|
+
if selected_count < min(requested_top, len(candidates)):
|
|
2145
|
+
reason_codes.append("selected_below_requested_top")
|
|
2146
|
+
if selected_count == 0:
|
|
2147
|
+
budget_fit_k = 0
|
|
2148
|
+
if candidates:
|
|
2149
|
+
reason_codes.append("no_budget_fit" if budget_limited else "no_selected_sources")
|
|
2150
|
+
elif budget_limited:
|
|
2151
|
+
budget_fit_k = selected_count
|
|
2152
|
+
else:
|
|
2153
|
+
additional_by_budget = max(0, remaining_bytes // max(1, average_selected_bytes))
|
|
2154
|
+
budget_fit_k = min(MAX_SUGGEST_TOP, len(candidates), selected_count + additional_by_budget)
|
|
2155
|
+
if budget_fit_k > requested_top:
|
|
2156
|
+
reason_codes.append("budget_headroom_expand")
|
|
2157
|
+
if not candidates:
|
|
2158
|
+
recommended_k = 0
|
|
2159
|
+
else:
|
|
2160
|
+
recommended_k = adaptive_policy_recommended_k(
|
|
2161
|
+
policy=policy,
|
|
2162
|
+
requested_top=requested_top,
|
|
2163
|
+
score_elbow_k=score_elbow_k,
|
|
2164
|
+
budget_fit_k=budget_fit_k,
|
|
2165
|
+
candidate_count=len(candidates),
|
|
2166
|
+
)
|
|
2167
|
+
score_values_asc = sorted(scores)
|
|
2168
|
+
top_score = score_values_asc[-1] if score_values_asc else 0
|
|
2169
|
+
recall_proxy = clamp_proxy(selected_score_mass / analyzed_score_mass) if analyzed_score_mass else 0.0
|
|
2170
|
+
precision_proxy = (
|
|
2171
|
+
clamp_proxy((selected_score_mass / max(1, selected_count)) / max(1, top_score))
|
|
2172
|
+
if selected_count
|
|
2173
|
+
else 0.0
|
|
2174
|
+
)
|
|
2175
|
+
recall_gate_passed = recall_proxy >= min_recall_proxy
|
|
2176
|
+
precision_gate_passed = precision_proxy >= min_precision_proxy
|
|
2177
|
+
gate_status = "pass" if recall_gate_passed and precision_gate_passed else "failed"
|
|
2178
|
+
return {
|
|
2179
|
+
"schema_version": ADAPTIVE_K_SCHEMA_VERSION,
|
|
2180
|
+
"mode": "advisory",
|
|
2181
|
+
"requested_top": requested_top,
|
|
2182
|
+
"recommended_k": recommended_k,
|
|
2183
|
+
"policy": {
|
|
2184
|
+
"name": policy,
|
|
2185
|
+
"available_policies": list(ADAPTIVE_K_POLICIES),
|
|
2186
|
+
"changes_manifest_or_pack": False,
|
|
2187
|
+
"measurement_basis": "current_selected_sources_not_policy_applied_rebuild",
|
|
2188
|
+
"status": "evaluated",
|
|
2189
|
+
},
|
|
2190
|
+
"recommendation": {
|
|
2191
|
+
"apply": False,
|
|
2192
|
+
"reason_codes": sorted(set(reason_codes)),
|
|
2193
|
+
"next_step": "rerun with --top recommended_k if you accept this local proxy advisory",
|
|
2194
|
+
},
|
|
2195
|
+
"score_distribution": {
|
|
2196
|
+
"candidate_count": len(candidates),
|
|
2197
|
+
"analyzed_candidate_count": len(sampled_candidates),
|
|
2198
|
+
"sample_capped": len(candidates) > len(sampled_candidates),
|
|
2199
|
+
"top_score": top_score,
|
|
2200
|
+
"p50_score": percentile_int(score_values_asc, 1, 2),
|
|
2201
|
+
"p90_score": percentile_int(score_values_asc, 9, 10),
|
|
2202
|
+
"min_score": score_values_asc[0] if score_values_asc else 0,
|
|
2203
|
+
"max_gap_details": max_gap_details,
|
|
2204
|
+
"score_elbow_k": score_elbow_k,
|
|
2205
|
+
},
|
|
2206
|
+
"budget_fit": {
|
|
2207
|
+
"budget_bytes": budget_bytes,
|
|
2208
|
+
"estimated_pack_bytes": estimated_pack_bytes,
|
|
2209
|
+
"remaining_bytes": remaining_bytes,
|
|
2210
|
+
"selected_count": selected_count,
|
|
2211
|
+
"budget_omitted_count": budget_omitted_count,
|
|
2212
|
+
"budget_limited": budget_limited,
|
|
2213
|
+
"average_selected_bytes": average_selected_bytes,
|
|
2214
|
+
"budget_fit_k": budget_fit_k,
|
|
2215
|
+
},
|
|
2216
|
+
"regression_gates": {
|
|
2217
|
+
"status": gate_status,
|
|
2218
|
+
"measurement_basis": "current_selected_sources_not_policy_applied_rebuild",
|
|
2219
|
+
"comparison": "observed_greater_than_or_equal_threshold",
|
|
2220
|
+
"recall_proxy": {
|
|
2221
|
+
"observed": recall_proxy,
|
|
2222
|
+
"minimum": min_recall_proxy,
|
|
2223
|
+
"passed": recall_gate_passed,
|
|
2224
|
+
},
|
|
2225
|
+
"precision_proxy": {
|
|
2226
|
+
"observed": precision_proxy,
|
|
2227
|
+
"minimum": min_precision_proxy,
|
|
2228
|
+
"passed": precision_gate_passed,
|
|
2229
|
+
},
|
|
2230
|
+
},
|
|
2231
|
+
"recall_precision_proxy": {
|
|
2232
|
+
"measurement": "local_score_mass_proxy",
|
|
2233
|
+
"range": "clamped_0_1",
|
|
2234
|
+
"measurement_basis": "current_selected_sources_not_policy_applied_rebuild",
|
|
2235
|
+
"selected_score_mass": selected_score_mass,
|
|
2236
|
+
"analyzed_score_mass": analyzed_score_mass,
|
|
2237
|
+
"recall_proxy": recall_proxy,
|
|
2238
|
+
"precision_proxy": precision_proxy,
|
|
2239
|
+
"selected_count": selected_count,
|
|
2240
|
+
"candidate_count": len(candidates),
|
|
2241
|
+
},
|
|
2242
|
+
"selected_evidence": {
|
|
2243
|
+
"selected_count": selected_count,
|
|
2244
|
+
"items_capped": selected_count > MAX_ADAPTIVE_K_SELECTED_EVIDENCE,
|
|
2245
|
+
"items": adaptive_selected_evidence(selected),
|
|
2246
|
+
},
|
|
2247
|
+
"omitted_evidence": adaptive_omitted_evidence(omitted),
|
|
2248
|
+
"source_verification": adaptive_source_verification(selected),
|
|
2249
|
+
"claim_boundary": {
|
|
2250
|
+
"deterministic_local_only": True,
|
|
2251
|
+
"no_model_network_or_embedding": True,
|
|
2252
|
+
"token_counts_are_estimated_proxies": True,
|
|
2253
|
+
"provider_token_or_cost_savings_claim_allowed": False,
|
|
2254
|
+
"advisory_does_not_change_manifest_or_pack": True,
|
|
2255
|
+
"selectable_policy_changes_manifest_or_pack": False,
|
|
2256
|
+
},
|
|
2257
|
+
}
|
|
2258
|
+
|
|
2259
|
+
|
|
1641
2260
|
def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[dict[str, Any], int]:
|
|
1642
2261
|
query_text, _query_redactions = sanitize_text(args.query or "")
|
|
1643
2262
|
query = " ".join(query_text.split())
|
|
@@ -1713,11 +2332,19 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1713
2332
|
})
|
|
1714
2333
|
continue
|
|
1715
2334
|
final_seen.add(final_identity)
|
|
1716
|
-
|
|
2335
|
+
line_prefixes = line_byte_prefixes(source.selected_lines)
|
|
2336
|
+
source_bytes = render_block_byte_len(
|
|
2337
|
+
source,
|
|
2338
|
+
len(source.selected_lines),
|
|
2339
|
+
line_prefixes,
|
|
2340
|
+
root_arg=root_arg,
|
|
2341
|
+
status="included",
|
|
2342
|
+
included=source_selected_range(source),
|
|
2343
|
+
)
|
|
1717
2344
|
remaining = budget - current_bytes
|
|
1718
2345
|
if source_bytes > remaining:
|
|
1719
2346
|
if not selected and remaining > 0:
|
|
1720
|
-
partial_lines, _partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
2347
|
+
partial_lines, _partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg, line_prefixes=line_prefixes)
|
|
1721
2348
|
if partial_range is not None and partial_lines:
|
|
1722
2349
|
partial_spec = SourceSpec(
|
|
1723
2350
|
path=candidate.path,
|
|
@@ -1734,7 +2361,15 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1734
2361
|
omitted.append(omitted_item)
|
|
1735
2362
|
continue
|
|
1736
2363
|
assert source is not None
|
|
1737
|
-
|
|
2364
|
+
partial_prefixes = line_byte_prefixes(source.selected_lines)
|
|
2365
|
+
source_bytes = render_block_byte_len(
|
|
2366
|
+
source,
|
|
2367
|
+
len(source.selected_lines),
|
|
2368
|
+
partial_prefixes,
|
|
2369
|
+
root_arg=root_arg,
|
|
2370
|
+
status="included",
|
|
2371
|
+
included=source_selected_range(source),
|
|
2372
|
+
)
|
|
1738
2373
|
else:
|
|
1739
2374
|
omitted.append({"path": source.display_path, "status": "omitted", "reason": "budget_exhausted", "priority": candidate.score})
|
|
1740
2375
|
continue
|
|
@@ -1780,6 +2415,18 @@ def suggest_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tupl
|
|
|
1780
2415
|
}
|
|
1781
2416
|
if build_hint_omitted_reason:
|
|
1782
2417
|
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
2418
|
+
if getattr(args, "adaptive_k", False):
|
|
2419
|
+
payload["adaptive_k"] = build_adaptive_k_advisory(
|
|
2420
|
+
candidates=candidates,
|
|
2421
|
+
selected=selected,
|
|
2422
|
+
omitted=omitted,
|
|
2423
|
+
requested_top=top,
|
|
2424
|
+
budget_bytes=budget,
|
|
2425
|
+
estimated_pack_bytes=estimated_pack_bytes,
|
|
2426
|
+
policy=getattr(args, "adaptive_k_policy", "balanced"),
|
|
2427
|
+
min_recall_proxy=float(getattr(args, "adaptive_k_min_recall_proxy", 0.0) or 0.0),
|
|
2428
|
+
min_precision_proxy=float(getattr(args, "adaptive_k_min_precision_proxy", 0.0) or 0.0),
|
|
2429
|
+
)
|
|
1783
2430
|
return payload, 0
|
|
1784
2431
|
|
|
1785
2432
|
|
|
@@ -1893,20 +2540,53 @@ def read_repo_map_text(root: Path, rel_path: str) -> tuple[dict[str, Any] | None
|
|
|
1893
2540
|
}, None
|
|
1894
2541
|
|
|
1895
2542
|
|
|
1896
|
-
def
|
|
2543
|
+
def repo_map_path_scan_priority(rel_path: str, *, seed_paths: set[str], query_terms: set[str], input_index: int) -> tuple[int, int, str]:
|
|
2544
|
+
rel, reason = lexical_rel(rel_path)
|
|
2545
|
+
display = repo_map_safe_raw_path_label(rel_path)
|
|
2546
|
+
redacted = False
|
|
2547
|
+
if rel is not None and not reason:
|
|
2548
|
+
display, redacted = repo_map_display_rel_path(rel.as_posix())
|
|
2549
|
+
score = 0
|
|
2550
|
+
if not redacted and display in seed_paths:
|
|
2551
|
+
score += 1_000_000
|
|
2552
|
+
if is_repo_map_text_path(display):
|
|
2553
|
+
score += 10_000
|
|
2554
|
+
score += suggest_score_path(display, query_terms)
|
|
2555
|
+
if Path(display).name.lower() in {"readme", "readme.md", "readme.mdx"}:
|
|
2556
|
+
score += 250
|
|
2557
|
+
return (-score, input_index, display)
|
|
2558
|
+
|
|
2559
|
+
|
|
2560
|
+
def repo_map_scan_paths(paths: list[str], *, seed_paths: set[str], query_terms: set[str]) -> list[str]:
|
|
2561
|
+
ranked = sorted(
|
|
2562
|
+
enumerate(paths[:MAX_REPO_MAP_FILES]),
|
|
2563
|
+
key=lambda item: repo_map_path_scan_priority(item[1], seed_paths=seed_paths, query_terms=query_terms, input_index=item[0]),
|
|
2564
|
+
)
|
|
2565
|
+
return [path for _index, path in ranked[:MAX_REPO_MAP_SCAN_FILES]]
|
|
2566
|
+
|
|
2567
|
+
|
|
2568
|
+
def repo_map_records(root: Path, *, seed_paths: set[str], query_terms: set[str]) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
|
|
1897
2569
|
paths = git_ls_files(root)
|
|
2570
|
+
candidate_paths = paths[:MAX_REPO_MAP_FILES]
|
|
1898
2571
|
path_cap_reached = len(paths) > MAX_REPO_MAP_FILES
|
|
2572
|
+
scan_paths = repo_map_scan_paths(candidate_paths, seed_paths=seed_paths, query_terms=query_terms)
|
|
2573
|
+
scan_cap_reached = len(candidate_paths) > len(scan_paths)
|
|
1899
2574
|
records: list[dict[str, Any]] = []
|
|
1900
2575
|
omitted: list[dict[str, Any]] = []
|
|
1901
|
-
for rel_path in
|
|
2576
|
+
for rel_path in scan_paths:
|
|
1902
2577
|
record, omission_item = read_repo_map_text(root, rel_path)
|
|
1903
2578
|
if record is not None:
|
|
1904
2579
|
records.append(record)
|
|
1905
2580
|
elif omission_item is not None and omission_item.get("reason") != "unsupported_file_type":
|
|
1906
2581
|
omitted.append({key: value for key, value in omission_item.items() if value is not None})
|
|
1907
2582
|
caps = {
|
|
1908
|
-
"max_files":
|
|
1909
|
-
"files_capped": path_cap_reached,
|
|
2583
|
+
"max_files": MAX_REPO_MAP_SCAN_FILES,
|
|
2584
|
+
"files_capped": path_cap_reached or scan_cap_reached,
|
|
2585
|
+
"max_candidate_files": MAX_REPO_MAP_FILES,
|
|
2586
|
+
"candidate_files": len(candidate_paths),
|
|
2587
|
+
"candidate_files_capped": path_cap_reached,
|
|
2588
|
+
"scan_files": len(scan_paths),
|
|
2589
|
+
"scan_files_capped": scan_cap_reached,
|
|
1910
2590
|
"max_bytes_per_file": MAX_REPO_MAP_BYTES_PER_FILE,
|
|
1911
2591
|
"bytes_per_file_capped_count": sum(1 for item in records if item.get("bytes_capped")),
|
|
1912
2592
|
"max_tree_entries": MAX_REPO_MAP_TREE_ENTRIES,
|
|
@@ -2256,18 +2936,19 @@ def build_repo_map_payload(
|
|
|
2256
2936
|
*,
|
|
2257
2937
|
root_arg: str,
|
|
2258
2938
|
) -> dict[str, Any]:
|
|
2259
|
-
|
|
2939
|
+
query_terms = suggest_tokens(str(suggest_payload.get("query", "")))
|
|
2940
|
+
seed_paths = repo_map_seed_paths(args, suggest_payload, build_payload)
|
|
2941
|
+
records, omitted, caps = repo_map_records(root, seed_paths=seed_paths, query_terms=query_terms)
|
|
2260
2942
|
record_by_path = {str(record["path"]): record for record in records}
|
|
2261
2943
|
signatures = extract_signatures(records)
|
|
2262
2944
|
secret_scan = build_secret_scan(records)
|
|
2263
2945
|
edges = collect_import_edges(records)
|
|
2264
|
-
query_terms = suggest_tokens(str(suggest_payload.get("query", "")))
|
|
2265
2946
|
graph_rank = build_graph_rank(
|
|
2266
2947
|
records,
|
|
2267
2948
|
signatures,
|
|
2268
2949
|
edges,
|
|
2269
2950
|
query_terms=query_terms,
|
|
2270
|
-
seed_paths=
|
|
2951
|
+
seed_paths=seed_paths,
|
|
2271
2952
|
secret_scan=secret_scan,
|
|
2272
2953
|
)
|
|
2273
2954
|
retrieval = repo_map_retrieval(record_by_path, signatures, graph_rank, root_arg=root_arg)
|
|
@@ -2312,6 +2993,90 @@ def build_repo_map_payload(
|
|
|
2312
2993
|
}
|
|
2313
2994
|
|
|
2314
2995
|
|
|
2996
|
+
def line_identity_from_dict(value: object) -> str:
|
|
2997
|
+
if not isinstance(value, dict):
|
|
2998
|
+
return "all"
|
|
2999
|
+
return f"{value.get('start')}:{value.get('end')}"
|
|
3000
|
+
|
|
3001
|
+
|
|
3002
|
+
def build_symbol_memory_payload(repo_map: dict[str, Any]) -> dict[str, Any]:
|
|
3003
|
+
retrieval_by_path_lines: dict[tuple[str, str], dict[str, Any]] = {}
|
|
3004
|
+
for item in repo_map.get("retrieval", []):
|
|
3005
|
+
if not isinstance(item, dict):
|
|
3006
|
+
continue
|
|
3007
|
+
path = str(item.get("path", ""))
|
|
3008
|
+
retrieval_by_path_lines[(path, line_identity_from_dict(item.get("lines")))] = item
|
|
3009
|
+
|
|
3010
|
+
symbols: list[dict[str, Any]] = []
|
|
3011
|
+
for signature in repo_map.get("signature_index", []):
|
|
3012
|
+
if not isinstance(signature, dict):
|
|
3013
|
+
continue
|
|
3014
|
+
path = str(signature.get("path", ""))
|
|
3015
|
+
lines = copy.deepcopy(signature.get("lines"))
|
|
3016
|
+
retrieval = retrieval_by_path_lines.get((path, line_identity_from_dict(lines)))
|
|
3017
|
+
symbol: dict[str, Any] = {
|
|
3018
|
+
"path": path,
|
|
3019
|
+
"kind": signature.get("kind"),
|
|
3020
|
+
"name": signature.get("name"),
|
|
3021
|
+
"signature": signature.get("signature"),
|
|
3022
|
+
"line": signature.get("line"),
|
|
3023
|
+
"lines": lines,
|
|
3024
|
+
"source": "repo_map.signature_index",
|
|
3025
|
+
"exact_source_verification_required": True,
|
|
3026
|
+
}
|
|
3027
|
+
if isinstance(retrieval, dict):
|
|
3028
|
+
for key in ("slice_cli", "symbol_cli", "retrieval_omitted_reason"):
|
|
3029
|
+
if retrieval.get(key):
|
|
3030
|
+
symbol[key] = retrieval[key]
|
|
3031
|
+
symbols.append({key: value for key, value in symbol.items() if value is not None})
|
|
3032
|
+
if len(symbols) >= MAX_SYMBOL_MEMORY_ITEMS:
|
|
3033
|
+
break
|
|
3034
|
+
|
|
3035
|
+
graph_context: list[dict[str, Any]] = []
|
|
3036
|
+
for item in repo_map.get("graph_rank", []):
|
|
3037
|
+
if not isinstance(item, dict):
|
|
3038
|
+
continue
|
|
3039
|
+
graph_context.append({
|
|
3040
|
+
"path": item.get("path"),
|
|
3041
|
+
"score": item.get("score"),
|
|
3042
|
+
"components": copy.deepcopy(item.get("components", {})),
|
|
3043
|
+
"line_count": item.get("line_count"),
|
|
3044
|
+
"exact_source_verification_required": True,
|
|
3045
|
+
})
|
|
3046
|
+
if len(graph_context) >= MAX_SYMBOL_MEMORY_GRAPH_ITEMS:
|
|
3047
|
+
break
|
|
3048
|
+
|
|
3049
|
+
summary = repo_map.get("summary", {}) if isinstance(repo_map.get("summary"), dict) else {}
|
|
3050
|
+
retrieval = repo_map.get("retrieval", []) if isinstance(repo_map.get("retrieval"), list) else []
|
|
3051
|
+
return {
|
|
3052
|
+
"schema_version": SYMBOL_MEMORY_SCHEMA_VERSION,
|
|
3053
|
+
"mode": "advisory",
|
|
3054
|
+
"source": "contextguard.pack-repo-map.v1",
|
|
3055
|
+
"summary": {
|
|
3056
|
+
"symbols": len(symbols),
|
|
3057
|
+
"graph_context": len(graph_context),
|
|
3058
|
+
"files_scanned": int(summary.get("files_scanned", 0) or 0),
|
|
3059
|
+
"graph_edges": int(summary.get("graph_edges", 0) or 0),
|
|
3060
|
+
"retrieval_hints": len(retrieval),
|
|
3061
|
+
},
|
|
3062
|
+
"symbols": symbols,
|
|
3063
|
+
"graph_context": graph_context,
|
|
3064
|
+
"source_verification": {
|
|
3065
|
+
"requires_exact_source_before_edits": True,
|
|
3066
|
+
"verified_by": ["slice_cli", "symbol_cli"],
|
|
3067
|
+
"retrieval_hint_count": len(retrieval),
|
|
3068
|
+
"missing_retrieval_hint_count": max(0, len(symbols) - sum(1 for item in symbols if item.get("slice_cli") or item.get("symbol_cli"))),
|
|
3069
|
+
},
|
|
3070
|
+
"claim_boundary": {
|
|
3071
|
+
"deterministic_local_only": True,
|
|
3072
|
+
"no_network_model_embedding_lsp_or_tree_sitter_dependency": True,
|
|
3073
|
+
"advisory_does_not_change_manifest_pack_or_receipt": True,
|
|
3074
|
+
"graph_rank_is_explain_only": True,
|
|
3075
|
+
"provider_token_or_cost_savings_claim_allowed": False,
|
|
3076
|
+
},
|
|
3077
|
+
}
|
|
3078
|
+
|
|
3079
|
+
|
|
2315
3080
|
def build_auto_explain_payload(
|
|
2316
3081
|
args: argparse.Namespace,
|
|
2317
3082
|
suggest_payload: dict[str, Any],
|
|
@@ -2320,6 +3085,7 @@ def build_auto_explain_payload(
|
|
|
2320
3085
|
*,
|
|
2321
3086
|
root: Path | None = None,
|
|
2322
3087
|
root_arg: str = ".",
|
|
3088
|
+
repo_map_payload: dict[str, Any] | None = None,
|
|
2323
3089
|
) -> dict[str, Any]:
|
|
2324
3090
|
build_sources = [
|
|
2325
3091
|
item
|
|
@@ -2447,7 +3213,9 @@ def build_auto_explain_payload(
|
|
|
2447
3213
|
"raw_test_output_embedded": False,
|
|
2448
3214
|
},
|
|
2449
3215
|
}
|
|
2450
|
-
if
|
|
3216
|
+
if repo_map_payload is not None:
|
|
3217
|
+
explain["repo_map"] = copy.deepcopy(repo_map_payload)
|
|
3218
|
+
elif root is not None:
|
|
2451
3219
|
explain["repo_map"] = build_repo_map_payload(root, args, suggest_payload, build_payload, root_arg=root_arg)
|
|
2452
3220
|
return explain
|
|
2453
3221
|
|
|
@@ -2534,11 +3302,74 @@ def auto_pack(root: Path, args: argparse.Namespace, *, root_arg: str) -> tuple[d
|
|
|
2534
3302
|
}
|
|
2535
3303
|
if build_hint_omitted_reason:
|
|
2536
3304
|
payload["build_hint_omitted_reason"] = build_hint_omitted_reason
|
|
3305
|
+
if getattr(args, "adaptive_k", False) and isinstance(suggest_payload.get("adaptive_k"), dict):
|
|
3306
|
+
payload["adaptive_k"] = copy.deepcopy(suggest_payload["adaptive_k"])
|
|
3307
|
+
repo_map_payload: dict[str, Any] | None = None
|
|
3308
|
+
if getattr(args, "symbol_memory", False) or args.explain:
|
|
3309
|
+
repo_map_payload = build_repo_map_payload(root, args, suggest_payload, build_payload, root_arg=root_arg)
|
|
3310
|
+
if getattr(args, "symbol_memory", False) and isinstance(repo_map_payload, dict):
|
|
3311
|
+
payload["symbol_memory"] = build_symbol_memory_payload(repo_map_payload)
|
|
2537
3312
|
if args.explain:
|
|
2538
|
-
payload["explain"] = build_auto_explain_payload(
|
|
3313
|
+
payload["explain"] = build_auto_explain_payload(
|
|
3314
|
+
args,
|
|
3315
|
+
suggest_payload,
|
|
3316
|
+
build_payload,
|
|
3317
|
+
payload,
|
|
3318
|
+
root=root,
|
|
3319
|
+
root_arg=root_arg,
|
|
3320
|
+
repo_map_payload=repo_map_payload,
|
|
3321
|
+
)
|
|
2539
3322
|
return payload, rc
|
|
2540
3323
|
|
|
2541
3324
|
|
|
3325
|
+
def print_adaptive_k_text(payload: dict[str, Any]) -> None:
|
|
3326
|
+
adaptive = payload.get("adaptive_k")
|
|
3327
|
+
if not isinstance(adaptive, dict):
|
|
3328
|
+
return
|
|
3329
|
+
recommendation = (
|
|
3330
|
+
adaptive.get("recommendation", {})
|
|
3331
|
+
if isinstance(adaptive.get("recommendation"), dict)
|
|
3332
|
+
else {}
|
|
3333
|
+
)
|
|
3334
|
+
score_distribution = (
|
|
3335
|
+
adaptive.get("score_distribution", {})
|
|
3336
|
+
if isinstance(adaptive.get("score_distribution"), dict)
|
|
3337
|
+
else {}
|
|
3338
|
+
)
|
|
3339
|
+
budget_fit = adaptive.get("budget_fit", {}) if isinstance(adaptive.get("budget_fit"), dict) else {}
|
|
3340
|
+
policy = adaptive.get("policy", {}) if isinstance(adaptive.get("policy"), dict) else {}
|
|
3341
|
+
regression_gates = adaptive.get("regression_gates", {}) if isinstance(adaptive.get("regression_gates"), dict) else {}
|
|
3342
|
+
reason_codes = recommendation.get("reason_codes", [])
|
|
3343
|
+
if isinstance(reason_codes, list):
|
|
3344
|
+
reason_text = ",".join(str(item) for item in reason_codes[:5])
|
|
3345
|
+
else:
|
|
3346
|
+
reason_text = str(reason_codes)
|
|
3347
|
+
print(
|
|
3348
|
+
"adaptive-k: "
|
|
3349
|
+
f"recommended={adaptive.get('recommended_k', 0)}/{adaptive.get('requested_top', 0)} "
|
|
3350
|
+
f"policy={policy.get('name', 'balanced')} "
|
|
3351
|
+
f"gates={regression_gates.get('status', 'pass')} "
|
|
3352
|
+
f"candidates={score_distribution.get('candidate_count', 0)} "
|
|
3353
|
+
f"budget_limited={budget_fit.get('budget_limited', False)} "
|
|
3354
|
+
f"apply=false reasons={reason_text or 'none'}"
|
|
3355
|
+
)
|
|
3356
|
+
|
|
3357
|
+
|
|
3358
|
+
def print_symbol_memory_text(payload: dict[str, Any]) -> None:
|
|
3359
|
+
symbol_memory = payload.get("symbol_memory")
|
|
3360
|
+
if not isinstance(symbol_memory, dict):
|
|
3361
|
+
return
|
|
3362
|
+
summary = symbol_memory.get("summary", {}) if isinstance(symbol_memory.get("summary"), dict) else {}
|
|
3363
|
+
verification = symbol_memory.get("source_verification", {}) if isinstance(symbol_memory.get("source_verification"), dict) else {}
|
|
3364
|
+
print(
|
|
3365
|
+
"symbol-memory: "
|
|
3366
|
+
f"symbols={summary.get('symbols', 0)} "
|
|
3367
|
+
f"graph_context={summary.get('graph_context', 0)} "
|
|
3368
|
+
f"retrieval_hints={summary.get('retrieval_hints', 0)} "
|
|
3369
|
+
f"verify_before_edits={str(verification.get('requires_exact_source_before_edits', True)).lower()}"
|
|
3370
|
+
)
|
|
3371
|
+
|
|
3372
|
+
|
|
2542
3373
|
def print_suggest_text(payload: dict[str, Any]) -> None:
|
|
2543
3374
|
print(
|
|
2544
3375
|
f"context-guard-pack suggest: {len(payload['sources'])} source(s), "
|
|
@@ -2554,6 +3385,7 @@ def print_suggest_text(payload: dict[str, Any]) -> None:
|
|
|
2554
3385
|
print(f"build: {payload['build_hint']}")
|
|
2555
3386
|
elif payload.get("build_hint_omitted_reason"):
|
|
2556
3387
|
print(f"build hint omitted: {payload['build_hint_omitted_reason']}")
|
|
3388
|
+
print_adaptive_k_text(payload)
|
|
2557
3389
|
|
|
2558
3390
|
|
|
2559
3391
|
def print_auto_text(payload: dict[str, Any]) -> None:
|
|
@@ -2598,6 +3430,8 @@ def print_auto_text(payload: dict[str, Any]) -> None:
|
|
|
2598
3430
|
reason_counts[reason] = reason_counts.get(reason, 0) + 1
|
|
2599
3431
|
reason_text = ", ".join(f"{reason}={count}" for reason, count in sorted(reason_counts.items()))
|
|
2600
3432
|
print(f"omitted reasons: {reason_text}")
|
|
3433
|
+
print_adaptive_k_text(payload)
|
|
3434
|
+
print_symbol_memory_text(payload)
|
|
2601
3435
|
if payload.get("manifest_path"):
|
|
2602
3436
|
print(f"manifest: {payload['manifest_path']}")
|
|
2603
3437
|
if payload.get("pack_path"):
|
|
@@ -2633,6 +3467,10 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2633
3467
|
suggest.add_argument("--top", type=int, default=DEFAULT_SUGGEST_TOP, help="maximum suggested sources")
|
|
2634
3468
|
suggest.add_argument("--context-lines", type=int, default=DEFAULT_SUGGEST_CONTEXT_LINES, help="line context around diff/output hits")
|
|
2635
3469
|
suggest.add_argument("--manifest-out", help="write the suggested build manifest to this relative path under root")
|
|
3470
|
+
suggest.add_argument("--adaptive-k", action="store_true", help="include local score/budget top-k advisory metadata without changing the manifest")
|
|
3471
|
+
suggest.add_argument("--adaptive-k-policy", choices=ADAPTIVE_K_POLICIES, default="balanced", help="local adaptive-k recommendation policy used when --adaptive-k is set")
|
|
3472
|
+
suggest.add_argument("--adaptive-k-min-recall-proxy", type=adaptive_k_threshold, default=0.0, help="metadata-only minimum recall proxy gate for --adaptive-k")
|
|
3473
|
+
suggest.add_argument("--adaptive-k-min-precision-proxy", type=adaptive_k_threshold, default=0.0, help="metadata-only minimum precision proxy gate for --adaptive-k")
|
|
2636
3474
|
suggest.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2637
3475
|
auto = sub.add_parser("auto", help="suggest a context pack manifest and build the budgeted pack in one local step")
|
|
2638
3476
|
auto.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
@@ -2649,6 +3487,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2649
3487
|
auto.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
2650
3488
|
auto.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
|
|
2651
3489
|
auto.add_argument("--explain", action="store_true", help="include deterministic local selection/build explanation metadata")
|
|
3490
|
+
auto.add_argument("--adaptive-k", action="store_true", help="include local score/budget top-k advisory metadata without changing the manifest or pack")
|
|
3491
|
+
auto.add_argument("--adaptive-k-policy", choices=ADAPTIVE_K_POLICIES, default="balanced", help="local adaptive-k recommendation policy used when --adaptive-k is set")
|
|
3492
|
+
auto.add_argument("--adaptive-k-min-recall-proxy", type=adaptive_k_threshold, default=0.0, help="metadata-only minimum recall proxy gate for --adaptive-k")
|
|
3493
|
+
auto.add_argument("--adaptive-k-min-precision-proxy", type=adaptive_k_threshold, default=0.0, help="metadata-only minimum precision proxy gate for --adaptive-k")
|
|
3494
|
+
auto.add_argument("--symbol-memory", action="store_true", help="include repo-map derived symbol/graph advisory metadata with exact source verification hints")
|
|
2652
3495
|
return parser
|
|
2653
3496
|
|
|
2654
3497
|
|