PyPI - claude-toolstack-cli - Versions diffs - 1.0.0__py3-none-any.whl - Mend

claude-toolstack-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

claude_toolstack_cli-1.0.0.dist-info/METADATA +354 -0
claude_toolstack_cli-1.0.0.dist-info/RECORD +48 -0
claude_toolstack_cli-1.0.0.dist-info/WHEEL +5 -0
claude_toolstack_cli-1.0.0.dist-info/entry_points.txt +2 -0
claude_toolstack_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
claude_toolstack_cli-1.0.0.dist-info/top_level.txt +1 -0
cts/__init__.py +3 -0
cts/__main__.py +5 -0
cts/autopilot.py +633 -0
cts/bundle.py +958 -0
cts/cli.py +2858 -0
cts/confidence.py +218 -0
cts/config.py +19 -0
cts/corpus/__init__.py +139 -0
cts/corpus/apply.py +305 -0
cts/corpus/archive.py +309 -0
cts/corpus/baseline.py +294 -0
cts/corpus/evaluate.py +409 -0
cts/corpus/experiment_eval.py +585 -0
cts/corpus/experiment_schema.py +380 -0
cts/corpus/extract.py +353 -0
cts/corpus/load.py +44 -0
cts/corpus/model.py +114 -0
cts/corpus/patch.py +467 -0
cts/corpus/registry.py +420 -0
cts/corpus/report.py +745 -0
cts/corpus/scan.py +87 -0
cts/corpus/store.py +63 -0
cts/corpus/trends.py +478 -0
cts/corpus/tuning_schema.py +313 -0
cts/corpus/variants.py +335 -0
cts/ctags.py +133 -0
cts/diff_context.py +92 -0
cts/errors.py +109 -0
cts/http.py +89 -0
cts/ranking.py +466 -0
cts/render.py +388 -0
cts/schema.py +96 -0
cts/semantic/__init__.py +47 -0
cts/semantic/candidates.py +150 -0
cts/semantic/chunker.py +184 -0
cts/semantic/config.py +120 -0
cts/semantic/embedder.py +151 -0
cts/semantic/indexer.py +159 -0
cts/semantic/search.py +252 -0
cts/semantic/store.py +330 -0
cts/sidecar.py +431 -0
cts/structural.py +305 -0

cts/render.py ADDED Viewed

@@ -0,0 +1,388 @@
+"""Output renderers: --json, --text, --claude (v1 + v2 bundles)."""
+from __future__ import annotations
+import json
+import time
+from typing import Any, Dict, List
+def _strip_meta(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Remove internal _fields from output."""
+    return {k: v for k, v in data.items() if not k.startswith("_")}
+def render_json(data: Dict[str, Any]) -> None:
+    print(json.dumps(_strip_meta(data), indent=2))
+def render_json_with_debug(data: Dict[str, Any]) -> None:
+    """Render full JSON including _debug key (for --debug-json)."""
+    print(json.dumps(data, indent=2, default=str))
+def render_text_status(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    print(f"Request-ID: {rid}")
+    print(f"Gateway v{data.get('version', '?')}  ok={data.get('ok')}")
+    print(f"  Repo root:        {data.get('repo_root')}")
+    print(f"  Cache root:       {data.get('cache_root')}")
+    print(f"  RG threads:       {data.get('rg_threads')}")
+    print(f"  RG concurrency:   {data.get('rg_concurrency')}")
+    print(f"  Job concurrency:  {data.get('job_concurrency')}")
+    print(f"  Max response:     {data.get('max_response_bytes')} bytes")
+    print(f"  Timeout:          {data.get('timeout_sec')}s")
+    print(f"  Docker:           {data.get('docker_host')}")
+    print(f"  Containers:       {', '.join(data.get('allowed_containers', []))}")
+    print(f"  Allowed repos:    {', '.join(data.get('allowed_repos', []))}")
+def render_text_search(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    matches = data.get("matches", [])
+    print(f"Request-ID: {rid}")
+    repo = data.get("repo")
+    cnt = data.get("count")
+    print(f"Search: {data.get('query')!r} in {repo}  ({cnt} matches)")
+    if data.get("truncated"):
+        print("  [truncated — output exceeded 512 KB]")
+    print()
+    for m in matches:
+        path = m.get("path", "?")
+        line = m.get("line", "?")
+        snippet = m.get("snippet", "").rstrip()
+        print(f"  {path}:{line}  {snippet}")
+def render_text_slice(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    print(f"Request-ID: {rid}")
+    path = data.get("path", "?")
+    start = data.get("start", "?")
+    print(f"File: {data.get('repo')}/{path}  (from line {start})")
+    if data.get("truncated"):
+        print("  [truncated]")
+    print()
+    for i, line in enumerate(data.get("lines", []), start=int(start)):
+        print(f"  {i:>6}  {line}")
+def render_text_symbol(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    defs = data.get("defs", [])
+    print(f"Request-ID: {rid}")
+    repo = data.get("repo")
+    cnt = data.get("count")
+    print(f"Symbol: {data.get('symbol')!r} in {repo}  ({cnt} defs)")
+    print()
+    for d in defs:
+        kind = d.get("kind", "?")
+        name = d.get("name", "?")
+        fpath = d.get("file", "?")
+        print(f"  [{kind}] {name}  {fpath}")
+def render_text_job(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    ok = data.get("ok", False)
+    tag = "PASS" if ok else "FAIL"
+    print(f"Request-ID: {rid}")
+    print(
+        f"Job: {data.get('job')} ({data.get('preset')}) on {data.get('repo')}  "
+        f"[{tag}]  exit={data.get('exit_code')}  {data.get('duration_sec')}s"
+    )
+    if data.get("truncated"):
+        print("  [output truncated]")
+    stdout = data.get("stdout", "").rstrip()
+    stderr = data.get("stderr", "").rstrip()
+    if stdout:
+        print("\n--- stdout ---")
+        print(stdout)
+    if stderr:
+        print("\n--- stderr ---")
+        print(stderr)
+def render_text_index(data: Dict[str, Any]) -> None:
+    rid = data.get("_request_id", "")
+    ok = data.get("ok", False)
+    tag = "OK" if ok else "FAIL"
+    print(f"Request-ID: {rid}")
+    print(f"Index ctags: {data.get('repo')}  [{tag}]  {data.get('duration_sec')}s")
+    stderr = data.get("stderr", "").rstrip()
+    if stderr:
+        print(f"  stderr: {stderr[:200]}")
+# ---------------------------------------------------------------------------
+# Claude evidence bundle renderers
+# ---------------------------------------------------------------------------
+def render_claude_search(
+    data: Dict[str, Any], slices: List[Dict[str, Any]] | None = None
+) -> None:
+    """Render a compact evidence bundle for Claude."""
+    rid = data.get("_request_id", "")
+    repo = data.get("repo", "?")
+    query = data.get("query", "?")
+    matches = data.get("matches", [])
+    lines: List[str] = []
+    lines.append("## Evidence Bundle")
+    lines.append(f"repo: {repo}  query: {query!r}  request_id: {rid}")
+    lines.append(f"matches: {data.get('count', 0)}")
+    if data.get("truncated"):
+        lines.append("[search results truncated at 512 KB]")
+    lines.append("")
+    # Match summary
+    lines.append("### Matches")
+    for m in matches:
+        path = m.get("path", "?")
+        line_no = m.get("line", "?")
+        snippet = m.get("snippet", "").rstrip()
+        if len(snippet) > 200:
+            snippet = snippet[:200] + "..."
+        lines.append(f"  {path}:{line_no}  {snippet}")
+    lines.append("")
+    # Inline slices (if provided)
+    if slices:
+        lines.append("### Context Slices")
+        for s in slices:
+            spath = s.get("path", "?")
+            sstart = s.get("start", 0)
+            lines.append(f"--- {s.get('repo', repo)}/{spath} (from line {sstart}) ---")
+            for i, sl in enumerate(s.get("lines", []), start=int(sstart)):
+                lines.append(f"{i:>6}  {sl}")
+            lines.append("")
+    lines.append(
+        "If you need more: request wider slices, more matches, or specific files."
+    )
+    print("\n".join(lines))
+def render_claude_job(data: Dict[str, Any]) -> None:
+    """Render job result as a compact evidence bundle."""
+    rid = data.get("_request_id", "")
+    ok = data.get("ok", False)
+    tag = "PASS" if ok else "FAIL"
+    lines: List[str] = []
+    lines.append("## Job Result")
+    repo = data.get("repo")
+    job = data.get("job")
+    preset = data.get("preset")
+    ec = data.get("exit_code")
+    dur = data.get("duration_sec")
+    lines.append(
+        f"repo: {repo}  job: {job}  preset: {preset}  "
+        f"result: {tag}  exit: {ec}  "
+        f"duration: {dur}s  request_id: {rid}"
+    )
+    stdout = data.get("stdout", "").rstrip()
+    stderr = data.get("stderr", "").rstrip()
+    # Trim to last N lines for Claude
+    max_lines = 80
+    if stdout:
+        stdout_lines = stdout.splitlines()
+        if len(stdout_lines) > max_lines:
+            lines.append(
+                f"\n### stdout (last {max_lines} of {len(stdout_lines)} lines)"
+            )
+            lines.extend(stdout_lines[-max_lines:])
+        else:
+            lines.append("\n### stdout")
+            lines.append(stdout)
+    if stderr:
+        stderr_lines = stderr.splitlines()
+        if len(stderr_lines) > max_lines:
+            lines.append(
+                f"\n### stderr (last {max_lines} of {len(stderr_lines)} lines)"
+            )
+            lines.extend(stderr_lines[-max_lines:])
+        else:
+            lines.append("\n### stderr")
+            lines.append(stderr)
+    if data.get("truncated"):
+        lines.append("[output truncated at 512 KB]")
+    print("\n".join(lines))
+# ---------------------------------------------------------------------------
+# v2 structured bundle renderer
+# ---------------------------------------------------------------------------
+def render_bundle(bundle: Dict[str, Any]) -> None:
+    """Render a v2 structured evidence bundle."""
+    lines: List[str] = []
+    mode = bundle.get("mode", "default")
+    repo = bundle.get("repo", "?")
+    rid = bundle.get("request_id", "")
+    query = bundle.get("query", "")
+    ts = bundle.get("timestamp", 0)
+    ts_str = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(ts))
+    # Header
+    lines.append("# Evidence Bundle")
+    lines.append("")
+    # Metadata
+    lines.append("## Metadata")
+    lines.append(f"  repo: {repo}")
+    lines.append(f"  mode: {mode}")
+    lines.append(f"  request_id: {rid}")
+    lines.append(f"  timestamp: {ts_str}")
+    if bundle.get("truncated"):
+        lines.append("  [search results truncated at 512 KB]")
+    lines.append("")
+    # Query
+    if query:
+        lines.append("## Query")
+        lines.append(f"  {query}")
+        lines.append("")
+    # Ranked evidence sources
+    sources = bundle.get("ranked_sources", [])
+    if sources:
+        lines.append(f"## Ranked Evidence Sources ({len(sources)})")
+        for s in sources:
+            path = s.get("path", "?")
+            line = s.get("line", 0)
+            score = s.get("score", 0.0)
+            extra = ""
+            if s.get("in_trace"):
+                extra = "  [trace]"
+            lines.append(f"  {score:+.2f}  {path}:{line}{extra}")
+        lines.append("")
+    # Top matches
+    matches = bundle.get("matches", [])
+    if matches:
+        lines.append(f"## Top Matches ({len(matches)})")
+        for m in matches:
+            path = m.get("path", "?")
+            line = m.get("line", 0)
+            snippet = m.get("snippet", "")
+            lines.append(f"  {path}:{line}  {snippet}")
+        lines.append("")
+    # File slices
+    slices = bundle.get("slices", [])
+    if slices:
+        lines.append(f"## File Slices ({len(slices)})")
+        for s in slices:
+            spath = s.get("path", "?")
+            srepo = s.get("repo", repo)
+            sstart = s.get("start", 0)
+            lines.append(f"--- {srepo}/{spath} (from line {sstart}) ---")
+            for i, sl in enumerate(s.get("lines", []), start=int(sstart)):
+                lines.append(f"{i:>6}  {sl}")
+            lines.append("")
+    # Symbols (symbol mode)
+    symbols = bundle.get("symbols", [])
+    if symbols:
+        lines.append(f"## Symbols ({len(symbols)})")
+        for sym in symbols:
+            kind = sym.get("kind", "?")
+            name = sym.get("name", "?")
+            fpath = sym.get("file", "?")
+            lines.append(f"  [{kind}] {name}  {fpath}")
+        lines.append("")
+    # Diff (change mode)
+    diff = bundle.get("diff", "")
+    if diff:
+        lines.append("## Diff")
+        # Trim to reasonable size
+        diff_lines = diff.splitlines()
+        if len(diff_lines) > 200:
+            lines.append(f"(showing last 200 of {len(diff_lines)} lines)")
+            lines.extend(diff_lines[-200:])
+        else:
+            lines.extend(diff_lines)
+        lines.append("")
+    # Suggested next commands
+    cmds = bundle.get("suggested_commands", [])
+    if cmds:
+        lines.append("## Suggested Next Commands")
+        for c in cmds:
+            lines.append(f"  {c}")
+        lines.append("")
+    # Notes
+    notes = bundle.get("notes", [])
+    if notes:
+        lines.append("## Notes")
+        for n in notes:
+            lines.append(f"  {n}")
+        lines.append("")
+    # Debug telemetry (when --debug-bundle)
+    debug = bundle.get("_debug")
+    if debug:
+        lines.append("## Debug Telemetry")
+        lines.append("")
+        # Timings
+        timings = debug.get("timings_ms", {})
+        if timings:
+            lines.append("### Timings (ms)")
+            for step, ms in timings.items():
+                lines.append(f"  {step}: {ms:.2f}")
+            lines.append("")
+        # Bundle size
+        if "bundle_bytes" in debug:
+            kb = debug["bundle_bytes"] / 1024
+            lines.append(
+                f"### Bundle Size: {kb:.1f} KB ({debug['bundle_lines']} lines)"
+            )
+            lines.append("")
+        # Section sizes
+        sections = debug.get("sections", {})
+        if sections:
+            lines.append("### Section Sizes")
+            for name, info in sections.items():
+                skb = info.get("bytes", 0) / 1024
+                items = info.get("items", 0)
+                lines.append(f"  {name}: {skb:.1f} KB  ({items} items)")
+            lines.append("")
+        # Limits
+        limits = debug.get("limits", {})
+        if limits:
+            lines.append("### Limits")
+            for k, v in limits.items():
+                lines.append(f"  {k}: {v}")
+            lines.append("")
+        # Score cards
+        cards = debug.get("score_cards", [])
+        if cards:
+            lines.append(f"### Score Cards (top {len(cards)})")
+            for card in cards:
+                cpath = card.get("path", "?")
+                ctotal = card.get("score_total", 0.0)
+                lines.append(f"  {ctotal:+.2f}  {cpath}")
+                signals = card.get("signals", {})
+                nonzero = {k: v for k, v in signals.items() if v != 0.0}
+                if nonzero:
+                    parts = [f"{k}={v:+.2f}" for k, v in nonzero.items()]
+                    lines.append(f"         {', '.join(parts)}")
+            lines.append("")
+    print("\n".join(lines))

cts/schema.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Sidecar JSON schema wrapper for evidence bundles.
+Wraps raw bundles with stable metadata for downstream consumers:
+  - bundle_schema_version: integer, bumped on breaking changes
+  - tool info: CLI version, gateway version
+  - inputs: the original query parameters
+  - passes: list of refinement passes (for autopilot)
+  - final: the final bundle
+Consumers check bundle_schema_version to decide if they can parse
+the payload. Non-breaking additions (new keys) don't bump the version.
+"""
+from __future__ import annotations
+import time
+from typing import Any, Dict, List, Optional
+BUNDLE_SCHEMA_VERSION = 1
+def wrap_bundle(
+    raw_bundle: Dict[str, Any],
+    *,
+    mode: str,
+    request_id: str = "",
+    cli_version: str = "",
+    gateway_version: Optional[str] = None,
+    repo: str = "",
+    query: Optional[str] = None,
+    created_at: Optional[float] = None,
+    inputs: Optional[Dict[str, Any]] = None,
+    debug: bool = False,
+    passes: Optional[List[Dict[str, Any]]] = None,
+) -> Dict[str, Any]:
+    """Wrap a raw evidence bundle in the sidecar schema envelope.
+    Args:
+        raw_bundle: The evidence bundle dict from bundle.py builders.
+        mode: Bundle mode (default, error, symbol, change).
+        request_id: Request identifier.
+        cli_version: cts CLI version string.
+        gateway_version: Gateway version string (None if not available).
+        repo: Repository identifier.
+        query: Original search query or symbol name.
+        created_at: Unix timestamp (defaults to now).
+        inputs: Dict of original CLI inputs / query parameters.
+        debug: Whether debug telemetry was enabled.
+        passes: List of refinement pass dicts (for autopilot).
+    Returns:
+        Sidecar-wrapped dict with stable schema version.
+    """
+    ts = created_at if created_at is not None else time.time()
+    sidecar: Dict[str, Any] = {
+        "bundle_schema_version": BUNDLE_SCHEMA_VERSION,
+        "created_at": ts,
+        "tool": {
+            "name": "cts",
+            "cli_version": cli_version,
+        },
+        "request_id": request_id,
+        "repo": repo,
+        "mode": mode,
+    }
+    if gateway_version is not None:
+        sidecar["tool"]["gateway_version"] = gateway_version
+    if query is not None:
+        sidecar["query"] = query
+    if inputs is not None:
+        sidecar["inputs"] = inputs
+    sidecar["debug"] = debug
+    # Refinement passes (autopilot stores intermediate bundles here)
+    sidecar["passes"] = passes or []
+    # The final bundle payload
+    sidecar["final"] = _strip_debug_if_needed(raw_bundle, debug)
+    return sidecar
+def _strip_debug_if_needed(bundle: Dict[str, Any], keep_debug: bool) -> Dict[str, Any]:
+    """Optionally strip _debug from the final bundle.
+    When debug=False, remove _debug to keep sidecar clean.
+    When debug=True, keep it in the final bundle for introspection.
+    """
+    if keep_debug or "_debug" not in bundle:
+        return bundle
+    return {k: v for k, v in bundle.items() if k != "_debug"}

cts/semantic/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Semantic search augmentation for Claude Toolstack.
+Optional module — requires ``pip install .[semantic]`` for dependencies.
+Provides:
+  - Chunking strategies for source files
+  - SQLite-backed embedding persistence
+  - Pluggable embedder (sentence-transformers default)
+  - Pure Python cosine retrieval
+  - Indexing pipeline (incremental + rebuild)
+"""
+from __future__ import annotations
+SEMANTIC_SCHEMA_VERSION = 1
+# Default knobs (workstation-safe)
+DEFAULTS = {
+    "chunk_lines": 180,
+    "overlap_lines": 30,
+    "topk_chunks": 8,
+    "max_slices": 4,
+    "max_seconds": 4,
+    "max_file_bytes": 512 * 1024,  # 512 KB
+    "confidence_gate": 0.45,
+    "match_gate": 5,
+}
+def _check_deps() -> None:
+    """Verify optional dependencies are installed."""
+    missing = []
+    try:
+        import numpy  # noqa: F401
+    except ImportError:
+        missing.append("numpy")
+    try:
+        import sentence_transformers  # noqa: F401
+    except ImportError:
+        missing.append("sentence-transformers")
+    if missing:
+        deps = ", ".join(missing)
+        raise ImportError(
+            f"Semantic search requires: {deps}. Install with: pip install .[semantic]"
+        )

cts/semantic/candidates.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""Candidate narrowing for semantic search.
+Selects which files semantic search should consider, based on
+lexical ranking signals.  The goal: search only where lexical
+didn't already produce good evidence, reducing both latency
+and irrelevant noise.
+Strategies:
+  - exclude_top_k: drop the top K lexically-ranked files and
+    take the next best candidates (default, conservative)
+  - none: no narrowing — all files are candidates
+All strategies respect max_files and path prefer/avoid filters.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Set
+@dataclass
+class CandidateSelection:
+    """Result of candidate selection with debug metadata."""
+    strategy: str
+    allowed_paths: List[str]
+    excluded_top_k: int = 0
+    candidate_files: int = 0
+    excluded_files_sample: List[str] = field(default_factory=list)
+    candidate_rules_hit: List[str] = field(default_factory=list)
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "strategy": self.strategy,
+            "excluded_top_k": self.excluded_top_k,
+            "candidate_files": self.candidate_files,
+            "excluded_files_sample": self.excluded_files_sample,
+            "candidate_rules_hit": self.candidate_rules_hit,
+        }
+def _extract_paths(
+    ranked_sources: List[Dict[str, Any]],
+) -> List[str]:
+    """Extract unique file paths from ranked sources in order."""
+    seen: Set[str] = set()
+    paths: List[str] = []
+    for src in ranked_sources:
+        p = src.get("path", "")
+        if p and p not in seen:
+            seen.add(p)
+            paths.append(p)
+    return paths
+def _apply_path_filters(
+    paths: List[str],
+    *,
+    prefer_paths: Optional[List[str]] = None,
+    avoid_paths: Optional[List[str]] = None,
+    rules_hit: List[str],
+) -> List[str]:
+    """Filter paths by prefer/avoid patterns.
+    - prefer_paths: if set, only include paths containing at least
+      one preferred prefix (e.g. "src/", "app/")
+    - avoid_paths: exclude paths containing any avoided prefix
+      (e.g. "vendor/", "test/")
+    """
+    result = paths
+    if avoid_paths:
+        before = len(result)
+        result = [p for p in result if not any(avoid in p for avoid in avoid_paths)]
+        if len(result) < before:
+            rules_hit.append(f"avoid_paths removed {before - len(result)}")
+    if prefer_paths:
+        preferred = [p for p in result if any(pref in p for pref in prefer_paths)]
+        if preferred:
+            rules_hit.append(f"prefer_paths kept {len(preferred)} of {len(result)}")
+            result = preferred
+    return result
+def select_candidates(
+    ranked_sources: List[Dict[str, Any]],
+    *,
+    strategy: str = "exclude_top_k",
+    exclude_top_k: int = 10,
+    max_files: int = 200,
+    prefer_paths: Optional[List[str]] = None,
+    avoid_paths: Optional[List[str]] = None,
+) -> CandidateSelection:
+    """Select candidate files for semantic search.
+    Args:
+        ranked_sources: Lexically-ranked source list from the bundle.
+        strategy: Selection strategy ("exclude_top_k" or "none").
+        exclude_top_k: Number of top-ranked files to exclude.
+        max_files: Maximum candidate files to return.
+        prefer_paths: Path prefixes to prefer (e.g. ["src/", "app/"]).
+        avoid_paths: Path prefixes to avoid (e.g. ["vendor/", "test/"]).
+    Returns:
+        CandidateSelection with allowed_paths and debug metadata.
+    """
+    if strategy == "none":
+        return CandidateSelection(
+            strategy="none",
+            allowed_paths=[],  # empty = "search everything"
+            candidate_files=0,
+            candidate_rules_hit=["no narrowing"],
+        )
+    # Default: exclude_top_k
+    all_paths = _extract_paths(ranked_sources)
+    rules_hit: List[str] = []
+    # Split into excluded top-K and remaining candidates
+    k = min(exclude_top_k, len(all_paths))
+    excluded = all_paths[:k]
+    candidates = all_paths[k:]
+    if k > 0:
+        rules_hit.append(f"excluded top {k} lexical files")
+    # Apply path filters
+    candidates = _apply_path_filters(
+        candidates,
+        prefer_paths=prefer_paths,
+        avoid_paths=avoid_paths,
+        rules_hit=rules_hit,
+    )
+    # Cap at max_files
+    if len(candidates) > max_files:
+        rules_hit.append(f"capped at {max_files} files")
+        candidates = candidates[:max_files]
+    return CandidateSelection(
+        strategy="exclude_top_k",
+        allowed_paths=candidates,
+        excluded_top_k=k,
+        candidate_files=len(candidates),
+        excluded_files_sample=excluded[:10],
+        candidate_rules_hit=rules_hit,
+    )