agentpack-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. agentpack/__init__.py +3 -0
  2. agentpack/adapters/__init__.py +0 -0
  3. agentpack/adapters/base.py +22 -0
  4. agentpack/adapters/claude.py +32 -0
  5. agentpack/adapters/codex.py +26 -0
  6. agentpack/adapters/cursor.py +29 -0
  7. agentpack/adapters/generic.py +18 -0
  8. agentpack/adapters/windsurf.py +26 -0
  9. agentpack/analysis/__init__.py +0 -0
  10. agentpack/analysis/dependency_graph.py +80 -0
  11. agentpack/analysis/go_imports.py +32 -0
  12. agentpack/analysis/java_imports.py +19 -0
  13. agentpack/analysis/js_ts_imports.py +53 -0
  14. agentpack/analysis/python_imports.py +45 -0
  15. agentpack/analysis/ranking.py +400 -0
  16. agentpack/analysis/rust_imports.py +32 -0
  17. agentpack/analysis/symbols.py +154 -0
  18. agentpack/analysis/tests.py +30 -0
  19. agentpack/application/__init__.py +0 -0
  20. agentpack/application/pack_service.py +352 -0
  21. agentpack/cli.py +33 -0
  22. agentpack/commands/__init__.py +0 -0
  23. agentpack/commands/_shared.py +13 -0
  24. agentpack/commands/benchmark.py +302 -0
  25. agentpack/commands/claude_cmd.py +55 -0
  26. agentpack/commands/diff.py +46 -0
  27. agentpack/commands/doctor.py +185 -0
  28. agentpack/commands/explain.py +238 -0
  29. agentpack/commands/init.py +79 -0
  30. agentpack/commands/install.py +252 -0
  31. agentpack/commands/monitor.py +105 -0
  32. agentpack/commands/pack.py +188 -0
  33. agentpack/commands/scan.py +51 -0
  34. agentpack/commands/session.py +204 -0
  35. agentpack/commands/stats.py +138 -0
  36. agentpack/commands/status.py +37 -0
  37. agentpack/commands/summarize.py +64 -0
  38. agentpack/commands/watch.py +185 -0
  39. agentpack/core/__init__.py +0 -0
  40. agentpack/core/bootstrap.py +46 -0
  41. agentpack/core/cache.py +41 -0
  42. agentpack/core/config.py +101 -0
  43. agentpack/core/context_pack.py +222 -0
  44. agentpack/core/diff.py +40 -0
  45. agentpack/core/git.py +145 -0
  46. agentpack/core/git_hooks.py +8 -0
  47. agentpack/core/global_install.py +14 -0
  48. agentpack/core/ignore.py +66 -0
  49. agentpack/core/merkle.py +8 -0
  50. agentpack/core/models.py +115 -0
  51. agentpack/core/redactor.py +99 -0
  52. agentpack/core/scanner.py +150 -0
  53. agentpack/core/snapshot.py +60 -0
  54. agentpack/core/token_estimator.py +26 -0
  55. agentpack/core/vscode_tasks.py +5 -0
  56. agentpack/data/agentpack.md +160 -0
  57. agentpack/installers/__init__.py +0 -0
  58. agentpack/installers/claude.py +160 -0
  59. agentpack/installers/codex.py +54 -0
  60. agentpack/installers/cursor.py +76 -0
  61. agentpack/installers/windsurf.py +50 -0
  62. agentpack/integrations/__init__.py +0 -0
  63. agentpack/integrations/git_hooks.py +109 -0
  64. agentpack/integrations/global_install.py +221 -0
  65. agentpack/integrations/vscode_tasks.py +85 -0
  66. agentpack/renderers/__init__.py +3 -0
  67. agentpack/renderers/compact.py +75 -0
  68. agentpack/renderers/markdown.py +144 -0
  69. agentpack/renderers/receipts.py +10 -0
  70. agentpack/session/__init__.py +33 -0
  71. agentpack/session/state.py +105 -0
  72. agentpack/summaries/__init__.py +0 -0
  73. agentpack/summaries/base.py +42 -0
  74. agentpack/summaries/llm.py +100 -0
  75. agentpack/summaries/offline.py +97 -0
  76. agentpack_cli-0.1.0.dist-info/METADATA +1391 -0
  77. agentpack_cli-0.1.0.dist-info/RECORD +80 -0
  78. agentpack_cli-0.1.0.dist-info/WHEEL +4 -0
  79. agentpack_cli-0.1.0.dist-info/entry_points.txt +2 -0
  80. agentpack_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,352 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import time
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from agentpack.core.config import load_config
11
+ from agentpack.core.ignore import load_spec
12
+ from agentpack.core.scanner import scan
13
+ from agentpack.core.snapshot import build_snapshot, save_snapshot, load_snapshot
14
+ from agentpack.core.diff import diff_snapshots
15
+ from agentpack.core import git
16
+ from agentpack.core.context_pack import select_files, save_pack_metadata
17
+ from agentpack.core.models import ContextPack, DependencyGraph, FileInfo, ScanResult, SelectedFile, Receipt
18
+ from agentpack.core.token_estimator import estimate_tokens
19
+ from agentpack.analysis.ranking import score_files, extract_keywords, enrich_keywords_from_files
20
+ from agentpack.analysis.tests import find_related_tests
21
+ from agentpack.analysis import dependency_graph as dep_graph_mod
22
+ from agentpack.summaries.base import build_all_summaries
23
+
24
+
25
+ @dataclass
26
+ class PackRequest:
27
+ root: Path
28
+ agent: str
29
+ task: str
30
+ mode: str
31
+ budget: int
32
+ since: str | None
33
+ refresh: bool
34
+ summary_provider: str
35
+
36
+
37
+ @dataclass
38
+ class PackResult:
39
+ pack: ContextPack
40
+ out_path: Path
41
+ phase_times: dict[str, float]
42
+ packed_tokens: int
43
+ raw_tokens: int
44
+ saving_pct: float
45
+ changed_files: list[str]
46
+ scan_result: ScanResult
47
+
48
+
49
+ @dataclass
50
+ class ChangeSet:
51
+ """Result of change detection: snapshot diff combined with git diff."""
52
+ all_changed: set[str]
53
+ git_staged: set[str]
54
+ recently_modified: list[str]
55
+ current_snap: dict[str, Any] = field(default_factory=dict)
56
+
57
+
58
+ @dataclass
59
+ class RankResult:
60
+ """Result of keyword extraction and file scoring."""
61
+ keywords: set[str]
62
+ scored: list[tuple[Any, float, list[str]]]
63
+
64
+
65
+ @dataclass
66
+ class PackPlan:
67
+ """Shared planning output used by both pack and explain."""
68
+ task: str
69
+ mode: str
70
+ budget: int
71
+ scan_result: ScanResult
72
+ summaries: dict[str, Any]
73
+ dep_graph: DependencyGraph
74
+ all_changed: set[str]
75
+ git_staged: set[str]
76
+ recently_modified: list[str]
77
+ keywords: set[str]
78
+ scored: list[tuple[Any, float, list[str]]]
79
+ selected: list[SelectedFile]
80
+ receipts: list[Receipt]
81
+ phase_times: dict[str, float]
82
+ current_snap: dict[str, Any] = field(default_factory=dict)
83
+
84
+
85
+ class ChangeDetector:
86
+ """Combines snapshot diff + git diff → ChangeSet of changed paths."""
87
+
88
+ def detect(
89
+ self,
90
+ packable: list[FileInfo],
91
+ root: Path,
92
+ since: str | None,
93
+ ) -> ChangeSet:
94
+ current_snap = build_snapshot(packable)
95
+ previous_snap = load_snapshot(root)
96
+ snap_diff = diff_snapshots(previous_snap, current_snap)
97
+ changed_from_snap: set[str] = set(snap_diff.added + snap_diff.modified)
98
+
99
+ git_changed: set[str] = set()
100
+ git_staged: set[str] = set()
101
+ recently_modified: list[str] = []
102
+
103
+ if git.is_git_repo(root):
104
+ if since:
105
+ git_changed = git.changed_files_since(root, since)
106
+ else:
107
+ git_changed = git.changed_files(root)
108
+ git_staged = git_changed
109
+ recently_modified = git.recently_modified_files(root)
110
+
111
+ return ChangeSet(
112
+ all_changed=changed_from_snap | git_changed,
113
+ git_staged=git_staged,
114
+ recently_modified=recently_modified,
115
+ current_snap=current_snap,
116
+ )
117
+
118
+
119
+ class FileRanker:
120
+ """Extracts keywords from the task and scores files against them."""
121
+
122
+ def rank(
123
+ self,
124
+ packable: list[FileInfo],
125
+ changes: ChangeSet,
126
+ dep_graph: DependencyGraph,
127
+ task: str,
128
+ cfg: Any,
129
+ ) -> RankResult:
130
+ keywords = extract_keywords(task)
131
+ keywords = enrich_keywords_from_files(keywords, changes.all_changed, packable)
132
+ all_paths = {f.path for f in packable}
133
+
134
+ for fi in packable:
135
+ tests = find_related_tests(fi.path, all_paths)
136
+ dep_graph.nodes[fi.path].tests = tests
137
+
138
+ scored = score_files(
139
+ packable,
140
+ changed_paths=changes.all_changed,
141
+ staged_paths=changes.git_staged,
142
+ recently_modified=changes.recently_modified,
143
+ dep_graph=dep_graph,
144
+ keywords=keywords,
145
+ include_tests=cfg.context.include_tests,
146
+ include_configs=cfg.context.include_configs,
147
+ weights=cfg.scoring,
148
+ )
149
+ return RankResult(keywords=keywords, scored=scored)
150
+
151
+
152
+ class PackPlanner:
153
+ """Runs scan → summarize → graph → rank → select; shared by pack and explain."""
154
+
155
+ def plan(self, request: PackRequest) -> PackPlan:
156
+ root = request.root
157
+ cfg = load_config(root)
158
+ effective_budget = request.budget if request.budget > 0 else cfg.context.default_budget
159
+ ignore_spec = load_spec(root / cfg.project.ignore_file)
160
+ phase_times: dict[str, float] = {}
161
+
162
+ t0 = time.perf_counter()
163
+ scan_result = scan(root, ignore_spec, cfg.context.max_file_tokens)
164
+ phase_times["scan"] = time.perf_counter() - t0
165
+
166
+ packable = scan_result.packable
167
+
168
+ t0 = time.perf_counter()
169
+ summaries_objs = build_all_summaries(packable, root, request.summary_provider)
170
+ summaries = {p: s.model_dump() for p, s in summaries_objs.items()}
171
+ phase_times["summarize"] = time.perf_counter() - t0
172
+
173
+ t0 = time.perf_counter()
174
+ dep_graph = dep_graph_mod.build(packable, root, summaries=summaries)
175
+ phase_times["deps"] = time.perf_counter() - t0
176
+
177
+ t0 = time.perf_counter()
178
+ changes = ChangeDetector().detect(packable, root, request.since)
179
+ phase_times["changes"] = time.perf_counter() - t0
180
+
181
+ t0 = time.perf_counter()
182
+ rank_result = FileRanker().rank(packable, changes, dep_graph, request.task, cfg)
183
+ phase_times["rank"] = time.perf_counter() - t0
184
+
185
+ t0 = time.perf_counter()
186
+ selected, receipts = select_files(
187
+ files=packable,
188
+ scored=rank_result.scored,
189
+ changed_paths=changes.all_changed,
190
+ summaries=summaries,
191
+ mode=request.mode, # type: ignore[arg-type]
192
+ budget=effective_budget,
193
+ max_file_tokens=cfg.context.max_file_tokens,
194
+ keywords=rank_result.keywords,
195
+ )
196
+ phase_times["select"] = time.perf_counter() - t0
197
+
198
+ return PackPlan(
199
+ task=request.task,
200
+ mode=request.mode,
201
+ budget=effective_budget,
202
+ scan_result=scan_result,
203
+ summaries=summaries,
204
+ dep_graph=dep_graph,
205
+ all_changed=changes.all_changed,
206
+ git_staged=changes.git_staged,
207
+ recently_modified=changes.recently_modified,
208
+ keywords=rank_result.keywords,
209
+ scored=rank_result.scored,
210
+ selected=selected,
211
+ receipts=receipts,
212
+ phase_times=phase_times,
213
+ current_snap=changes.current_snap,
214
+ )
215
+
216
+
217
+ class AdapterRegistry:
218
+ """Maps agent names to adapter instances; extensible without touching PackService."""
219
+
220
+ @staticmethod
221
+ def get(agent: str, cfg: Any) -> Any:
222
+ from agentpack.adapters.claude import ClaudeAdapter
223
+ from agentpack.adapters.codex import CodexAdapter
224
+ from agentpack.adapters.cursor import CursorAdapter
225
+ from agentpack.adapters.windsurf import WindsurfAdapter
226
+ from agentpack.adapters.generic import GenericAdapter
227
+
228
+ adapters = {
229
+ "claude": lambda: ClaudeAdapter(cfg.agents.claude.output),
230
+ "cursor": lambda: CursorAdapter(cfg.agents.generic.output),
231
+ "windsurf": lambda: WindsurfAdapter(cfg.agents.generic.output),
232
+ "codex": lambda: CodexAdapter(cfg.agents.generic.output),
233
+ }
234
+ return adapters.get(agent, lambda: GenericAdapter(cfg.agents.generic.output))()
235
+
236
+
237
+ class PackService:
238
+ """Materializes a plan from PackPlanner into a written context file."""
239
+
240
+ def run(self, request: PackRequest) -> PackResult:
241
+ root = request.root
242
+ cfg = load_config(root)
243
+
244
+ plan = PackPlanner().plan(request)
245
+
246
+ packable = plan.scan_result.packable
247
+ all_tokens = sum(f.estimated_tokens for f in plan.scan_result.all_files)
248
+ raw_tokens = sum(f.estimated_tokens for f in packable)
249
+ packed_tokens = sum(_sf_tokens(sf) for sf in plan.selected)
250
+ saving_pct = (1 - packed_tokens / all_tokens) * 100 if all_tokens > 0 else 0.0
251
+
252
+ all_redaction_warnings = [w for sf in plan.selected for w in sf.redaction_warnings]
253
+
254
+ pack_obj = ContextPack(
255
+ task=request.task,
256
+ agent=request.agent,
257
+ mode=request.mode, # type: ignore[arg-type]
258
+ budget=plan.budget,
259
+ token_estimate=packed_tokens,
260
+ raw_repo_tokens=all_tokens,
261
+ after_ignore_tokens=raw_tokens,
262
+ estimated_savings_percent=saving_pct,
263
+ changed_files=sorted(plan.all_changed),
264
+ selected_files=plan.selected,
265
+ receipts=plan.receipts if cfg.context.include_receipts else [],
266
+ redaction_warnings=all_redaction_warnings,
267
+ stale=False,
268
+ )
269
+
270
+ adapter = AdapterRegistry.get(request.agent, cfg)
271
+
272
+ t0 = time.perf_counter()
273
+ out_path = adapter.write(pack_obj, root)
274
+ plan.phase_times["render"] = time.perf_counter() - t0
275
+
276
+ save_snapshot(plan.current_snap, root)
277
+ save_pack_metadata(
278
+ root,
279
+ context_path=str(out_path.relative_to(root)),
280
+ snapshot_root_hash=plan.current_snap["root_hash"],
281
+ task=request.task,
282
+ agent=request.agent,
283
+ mode=request.mode,
284
+ budget=plan.budget,
285
+ token_estimate=packed_tokens,
286
+ )
287
+ _record_metrics(
288
+ root,
289
+ task=request.task,
290
+ mode=request.mode,
291
+ phase_times=plan.phase_times,
292
+ packed_tokens=packed_tokens,
293
+ raw_tokens=all_tokens,
294
+ saving_pct=saving_pct,
295
+ selected_count=len(plan.selected),
296
+ changed_count=len(plan.all_changed),
297
+ )
298
+
299
+ return PackResult(
300
+ pack=pack_obj,
301
+ out_path=out_path,
302
+ phase_times=plan.phase_times,
303
+ packed_tokens=packed_tokens,
304
+ raw_tokens=all_tokens,
305
+ saving_pct=saving_pct,
306
+ changed_files=sorted(plan.all_changed),
307
+ scan_result=plan.scan_result,
308
+ )
309
+
310
+
311
+ def _sf_tokens(sf: SelectedFile) -> int:
312
+ if sf.content:
313
+ return estimate_tokens(sf.content)
314
+ parts: list[str] = []
315
+ if sf.summary:
316
+ parts.append(sf.summary)
317
+ for sym in sf.symbols:
318
+ if sym.signature:
319
+ parts.append(sym.signature)
320
+ return estimate_tokens("\n".join(parts)) if parts else 50
321
+
322
+
323
+ def _record_metrics(
324
+ root: Path,
325
+ *,
326
+ task: str,
327
+ mode: str,
328
+ phase_times: dict[str, float],
329
+ packed_tokens: int,
330
+ raw_tokens: int,
331
+ saving_pct: float,
332
+ selected_count: int,
333
+ changed_count: int,
334
+ ) -> None:
335
+ metrics_path = root / ".agentpack" / "metrics.jsonl"
336
+ record = {
337
+ "ts": datetime.now(timezone.utc).isoformat(),
338
+ "task": task,
339
+ "mode": mode,
340
+ "packed_tokens": packed_tokens,
341
+ "raw_tokens": raw_tokens,
342
+ "saving_pct": round(saving_pct, 1),
343
+ "selected_files": selected_count,
344
+ "changed_files": changed_count,
345
+ "phases": {k: round(v, 3) for k, v in phase_times.items()},
346
+ "total_s": round(sum(phase_times.values()), 3),
347
+ }
348
+ try:
349
+ with metrics_path.open("a") as fh:
350
+ fh.write(json.dumps(record) + "\n")
351
+ except Exception:
352
+ pass
agentpack/cli.py ADDED
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ import typer
4
+ from agentpack.commands import init, scan, diff, status, stats, summarize, pack, install, monitor, explain, doctor, session, watch, claude_cmd, benchmark
5
+ from agentpack import __version__
6
+
7
+
8
+ def _version_callback(value: bool) -> None:
9
+ if value:
10
+ typer.echo(__version__)
11
+ raise typer.Exit()
12
+
13
+
14
+ app = typer.Typer(help="AgentPack — token-aware context packing for AI coding agents.")
15
+
16
+
17
+ @app.callback()
18
+ def _main(
19
+ version: bool = typer.Option(False, "--version", "-v", callback=_version_callback, is_eager=True, help="Show version and exit."),
20
+ ) -> None:
21
+ pass
22
+
23
+
24
+ for mod in [init, scan, diff, status, stats, summarize, pack, install, monitor, explain, doctor, session, watch, claude_cmd, benchmark]:
25
+ mod.register(app)
26
+
27
+
28
+ def main() -> None:
29
+ app()
30
+
31
+
32
+ if __name__ == "__main__":
33
+ main()
File without changes
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from rich.console import Console
6
+
7
+ console = Console()
8
+
9
+ _ROOT = Path(".")
10
+
11
+
12
+ def _root() -> Path:
13
+ return _ROOT
@@ -0,0 +1,302 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import typer
9
+ from rich.table import Table
10
+ from rich import box
11
+
12
+ from agentpack.commands._shared import console, _root
13
+ from agentpack.commands.pack import _resolve_task
14
+
15
+
16
+ @dataclass
17
+ class BenchmarkCase:
18
+ task: str
19
+ mode: str = "balanced"
20
+ expected_files: list[str] = field(default_factory=list)
21
+
22
+
23
+ @dataclass
24
+ class CaseResult:
25
+ case: BenchmarkCase
26
+ packed_tokens: int
27
+ raw_tokens: int
28
+ saving_pct: float
29
+ selected_paths: list[str]
30
+ changed_covered: int # # changed files that were selected
31
+ changed_total: int # total changed files detected
32
+ total_s: float
33
+ phase_times: dict[str, float]
34
+
35
+
36
+ def _load_cases(path: Path) -> list[BenchmarkCase]:
37
+ try:
38
+ import tomllib
39
+ except ImportError:
40
+ import tomli as tomllib # type: ignore[no-redef]
41
+
42
+ data = tomllib.loads(path.read_text(encoding="utf-8"))
43
+ cases: list[BenchmarkCase] = []
44
+ for raw in data.get("cases", []):
45
+ cases.append(BenchmarkCase(
46
+ task=raw["task"],
47
+ mode=raw.get("mode", "balanced"),
48
+ expected_files=raw.get("expected_files", []),
49
+ ))
50
+ return cases
51
+
52
+
53
+ def _scaffold_cases(root: Path) -> Path:
54
+ out = root / ".agentpack" / "benchmark.toml"
55
+ if out.exists():
56
+ return out
57
+ out.parent.mkdir(parents=True, exist_ok=True)
58
+ out.write_text(
59
+ '# AgentPack benchmark cases\n'
60
+ '# Each case runs a pack and measures token savings, speed, and\n'
61
+ '# selection quality. Add expected_files for precision/recall scoring.\n\n'
62
+ '[[cases]]\n'
63
+ 'task = "fix auth token expiry"\n'
64
+ 'mode = "balanced"\n'
65
+ '# expected_files = [\n'
66
+ '# "src/auth/token.py",\n'
67
+ '# "src/auth/session.py",\n'
68
+ '# ]\n\n'
69
+ '[[cases]]\n'
70
+ 'task = "add rate limiting to API endpoints"\n'
71
+ 'mode = "balanced"\n',
72
+ encoding="utf-8",
73
+ )
74
+ return out
75
+
76
+
77
+ def _run_case(root: Path, case: BenchmarkCase) -> CaseResult:
78
+ from agentpack.application.pack_service import PackPlanner, PackRequest, _sf_tokens
79
+ from agentpack.core.token_estimator import estimate_tokens
80
+
81
+ request = PackRequest(
82
+ root=root,
83
+ agent="generic",
84
+ task=case.task,
85
+ mode=case.mode,
86
+ budget=0,
87
+ since=None,
88
+ refresh=False,
89
+ summary_provider="offline",
90
+ )
91
+
92
+ t0 = time.perf_counter()
93
+ plan = PackPlanner().plan(request)
94
+ total_s = time.perf_counter() - t0
95
+
96
+ packed_tokens = sum(_sf_tokens(sf) for sf in plan.selected)
97
+ raw_tokens = sum(f.estimated_tokens for f in plan.scan_result.all_files)
98
+ saving_pct = (1 - packed_tokens / raw_tokens) * 100 if raw_tokens > 0 else 0.0
99
+
100
+ selected_paths = [sf.path for sf in plan.selected]
101
+ selected_set = set(selected_paths)
102
+
103
+ changed_covered = len(plan.all_changed & selected_set)
104
+ changed_total = len(plan.all_changed)
105
+
106
+ return CaseResult(
107
+ case=case,
108
+ packed_tokens=packed_tokens,
109
+ raw_tokens=raw_tokens,
110
+ saving_pct=saving_pct,
111
+ selected_paths=selected_paths,
112
+ changed_covered=changed_covered,
113
+ changed_total=changed_total,
114
+ total_s=total_s,
115
+ phase_times=plan.phase_times,
116
+ )
117
+
118
+
119
+ def _precision_recall(result: CaseResult) -> tuple[float, float, float]:
120
+ """Returns (precision, recall, f1). Requires expected_files on the case."""
121
+ expected = set(result.case.expected_files)
122
+ if not expected:
123
+ return 0.0, 0.0, 0.0
124
+ selected = set(result.selected_paths)
125
+ tp = len(selected & expected)
126
+ p = tp / len(selected) if selected else 0.0
127
+ r = tp / len(expected)
128
+ f1 = 2 * p * r / (p + r) if (p + r) > 0 else 0.0
129
+ return p, r, f1
130
+
131
+
132
+ def _print_case_detail(result: CaseResult) -> None:
133
+ has_gt = bool(result.case.expected_files)
134
+ p, r, f1 = _precision_recall(result) if has_gt else (0.0, 0.0, 0.0)
135
+
136
+ console.print(f"\n[bold cyan]{result.case.task}[/] [dim]mode={result.case.mode}[/]")
137
+
138
+ tbl = Table(box=box.SIMPLE, show_header=False, padding=(0, 2))
139
+ tbl.add_column(style="dim")
140
+ tbl.add_column(justify="right", style="bold")
141
+ tbl.add_row("packed tokens", f"{result.packed_tokens:,}")
142
+ tbl.add_row("raw tokens", f"{result.raw_tokens:,}")
143
+ tbl.add_row("saving", f"[green]{result.saving_pct:.1f}%[/]")
144
+ tbl.add_row("files selected", str(len(result.selected_paths)))
145
+ if result.changed_total > 0:
146
+ cov_pct = result.changed_covered / result.changed_total * 100
147
+ tbl.add_row("changed files covered", f"{result.changed_covered}/{result.changed_total} ({cov_pct:.0f}%)")
148
+ tbl.add_row("total time", f"{result.total_s:.2f}s")
149
+ console.print(tbl)
150
+
151
+ if result.phase_times:
152
+ phases = Table(box=box.SIMPLE, show_header=True, padding=(0, 2))
153
+ phases.add_column("phase", style="dim")
154
+ phases.add_column("time", justify="right")
155
+ for phase, t in result.phase_times.items():
156
+ phases.add_row(phase, f"{t:.3f}s")
157
+ console.print(phases)
158
+
159
+ if has_gt:
160
+ console.print(
161
+ f" precision [bold]{p:.1%}[/] "
162
+ f"recall [bold]{r:.1%}[/] "
163
+ f"F1 [bold]{f1:.1%}[/]"
164
+ )
165
+ expected_set = set(result.case.expected_files)
166
+ selected_set = set(result.selected_paths)
167
+ hits = expected_set & selected_set
168
+ misses = expected_set - selected_set
169
+ if hits:
170
+ console.print(f" [green]hit:[/] " + ", ".join(sorted(hits)))
171
+ if misses:
172
+ console.print(f" [red]miss:[/] " + ", ".join(sorted(misses)))
173
+
174
+ console.print(f" [dim]top files:[/] " + ", ".join(result.selected_paths[:5]))
175
+
176
+
177
+ def _print_summary_table(results: list[CaseResult]) -> None:
178
+ has_gt = any(r.case.expected_files for r in results)
179
+
180
+ tbl = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
181
+ tbl.add_column("task", max_width=40)
182
+ tbl.add_column("mode", width=9)
183
+ tbl.add_column("tokens", justify="right")
184
+ tbl.add_column("saving", justify="right")
185
+ tbl.add_column("files", justify="right")
186
+ tbl.add_column("time", justify="right")
187
+ if has_gt:
188
+ tbl.add_column("P", justify="right")
189
+ tbl.add_column("R", justify="right")
190
+ tbl.add_column("F1", justify="right")
191
+
192
+ for r in results:
193
+ p, rec, f1 = _precision_recall(r) if r.case.expected_files else (0.0, 0.0, 0.0)
194
+ row = [
195
+ r.case.task[:38],
196
+ r.case.mode,
197
+ f"{r.packed_tokens:,}",
198
+ f"{r.saving_pct:.1f}%",
199
+ str(len(r.selected_paths)),
200
+ f"{r.total_s:.2f}s",
201
+ ]
202
+ if has_gt:
203
+ row += [
204
+ f"{p:.1%}" if r.case.expected_files else "—",
205
+ f"{rec:.1%}" if r.case.expected_files else "—",
206
+ f"{f1:.1%}" if r.case.expected_files else "—",
207
+ ]
208
+ tbl.add_row(*row)
209
+
210
+ console.print()
211
+ console.print(tbl)
212
+
213
+
214
+ def _print_compare_table(task: str, results: list[CaseResult]) -> None:
215
+ """Side-by-side mode comparison for a single task."""
216
+ console.print(f"\n[bold]Mode comparison:[/] [cyan]{task}[/]\n")
217
+
218
+ tbl = Table(box=box.SIMPLE, show_header=True, padding=(0, 2))
219
+ tbl.add_column("mode", width=10)
220
+ tbl.add_column("tokens", justify="right")
221
+ tbl.add_column("saving", justify="right")
222
+ tbl.add_column("files", justify="right")
223
+ tbl.add_column("time", justify="right")
224
+
225
+ for r in results:
226
+ tbl.add_row(
227
+ r.case.mode,
228
+ f"{r.packed_tokens:,}",
229
+ f"{r.saving_pct:.1f}%",
230
+ str(len(r.selected_paths)),
231
+ f"{r.total_s:.2f}s",
232
+ )
233
+ console.print(tbl)
234
+
235
+
236
+ def register(app: typer.Typer) -> None:
237
+ @app.command()
238
+ def benchmark(
239
+ task: str = typer.Option("", "--task", help="Single task to benchmark (skips cases file)."),
240
+ mode: str = typer.Option("balanced", "--mode", help="Mode for single-task run (minimal|balanced|deep)."),
241
+ cases: str = typer.Option("", "--cases", help="Path to TOML cases file (default: .agentpack/benchmark.toml)."),
242
+ compare: bool = typer.Option(False, "--compare", is_flag=True, help="Compare minimal/balanced/deep for each task."),
243
+ init: bool = typer.Option(False, "--init", is_flag=True, help="Scaffold a benchmark.toml and exit."),
244
+ ) -> None:
245
+ """Benchmark file selection quality and token efficiency across tasks."""
246
+ root = _root()
247
+
248
+ if init:
249
+ out = _scaffold_cases(root)
250
+ console.print(f"[green]✓[/] Created [bold]{out}[/]")
251
+ console.print(" Edit the file to add your tasks and expected files, then run [bold]agentpack benchmark[/].")
252
+ return
253
+
254
+ # Build case list
255
+ if task:
256
+ resolved = _resolve_task(task) if task == "auto" else task
257
+ bench_cases = [BenchmarkCase(task=resolved, mode=mode)]
258
+ else:
259
+ cases_path = Path(cases) if cases else root / ".agentpack" / "benchmark.toml"
260
+ if not cases_path.exists():
261
+ console.print(f"[yellow]No cases file found at {cases_path}[/]")
262
+ console.print(" Run [bold]agentpack benchmark --init[/] to scaffold one, or use [bold]--task \"...\"[/]")
263
+ raise typer.Exit(1)
264
+ bench_cases = _load_cases(cases_path)
265
+ if not bench_cases:
266
+ console.print("[yellow]No cases defined in benchmark file.[/]")
267
+ raise typer.Exit(1)
268
+
269
+ # Expand for compare mode
270
+ if compare:
271
+ expanded: list[BenchmarkCase] = []
272
+ for c in bench_cases:
273
+ for m in ("minimal", "balanced", "deep"):
274
+ expanded.append(BenchmarkCase(task=c.task, mode=m, expected_files=c.expected_files))
275
+ bench_cases = expanded
276
+
277
+ console.print(f"\n[bold]Running {len(bench_cases)} benchmark case(s)...[/]\n")
278
+
279
+ results: list[CaseResult] = []
280
+ for i, c in enumerate(bench_cases, 1):
281
+ label = f"[{i}/{len(bench_cases)}] {c.task[:50]} mode={c.mode}"
282
+ with console.status(f"[dim]{label}[/]"):
283
+ try:
284
+ r = _run_case(root, c)
285
+ results.append(r)
286
+ except Exception as e:
287
+ console.print(f"[red]Error on case '{c.task}': {e}[/]")
288
+
289
+ if not results:
290
+ raise typer.Exit(1)
291
+
292
+ # Output
293
+ if compare and len(set(r.case.task for r in results)) == 1:
294
+ _print_compare_table(results[0].case.task, results)
295
+ elif len(results) == 1:
296
+ _print_case_detail(results[0])
297
+ else:
298
+ if not compare:
299
+ for r in results:
300
+ _print_case_detail(r)
301
+ console.print("\n[bold]Summary[/]")
302
+ _print_summary_table(results)