agentpack-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentpack/__init__.py +3 -0
- agentpack/adapters/__init__.py +0 -0
- agentpack/adapters/base.py +22 -0
- agentpack/adapters/claude.py +32 -0
- agentpack/adapters/codex.py +26 -0
- agentpack/adapters/cursor.py +29 -0
- agentpack/adapters/generic.py +18 -0
- agentpack/adapters/windsurf.py +26 -0
- agentpack/analysis/__init__.py +0 -0
- agentpack/analysis/dependency_graph.py +80 -0
- agentpack/analysis/go_imports.py +32 -0
- agentpack/analysis/java_imports.py +19 -0
- agentpack/analysis/js_ts_imports.py +53 -0
- agentpack/analysis/python_imports.py +45 -0
- agentpack/analysis/ranking.py +400 -0
- agentpack/analysis/rust_imports.py +32 -0
- agentpack/analysis/symbols.py +154 -0
- agentpack/analysis/tests.py +30 -0
- agentpack/application/__init__.py +0 -0
- agentpack/application/pack_service.py +352 -0
- agentpack/cli.py +33 -0
- agentpack/commands/__init__.py +0 -0
- agentpack/commands/_shared.py +13 -0
- agentpack/commands/benchmark.py +302 -0
- agentpack/commands/claude_cmd.py +55 -0
- agentpack/commands/diff.py +46 -0
- agentpack/commands/doctor.py +185 -0
- agentpack/commands/explain.py +238 -0
- agentpack/commands/init.py +79 -0
- agentpack/commands/install.py +252 -0
- agentpack/commands/monitor.py +105 -0
- agentpack/commands/pack.py +188 -0
- agentpack/commands/scan.py +51 -0
- agentpack/commands/session.py +204 -0
- agentpack/commands/stats.py +138 -0
- agentpack/commands/status.py +37 -0
- agentpack/commands/summarize.py +64 -0
- agentpack/commands/watch.py +185 -0
- agentpack/core/__init__.py +0 -0
- agentpack/core/bootstrap.py +46 -0
- agentpack/core/cache.py +41 -0
- agentpack/core/config.py +101 -0
- agentpack/core/context_pack.py +222 -0
- agentpack/core/diff.py +40 -0
- agentpack/core/git.py +145 -0
- agentpack/core/git_hooks.py +8 -0
- agentpack/core/global_install.py +14 -0
- agentpack/core/ignore.py +66 -0
- agentpack/core/merkle.py +8 -0
- agentpack/core/models.py +115 -0
- agentpack/core/redactor.py +99 -0
- agentpack/core/scanner.py +150 -0
- agentpack/core/snapshot.py +60 -0
- agentpack/core/token_estimator.py +26 -0
- agentpack/core/vscode_tasks.py +5 -0
- agentpack/data/agentpack.md +160 -0
- agentpack/installers/__init__.py +0 -0
- agentpack/installers/claude.py +160 -0
- agentpack/installers/codex.py +54 -0
- agentpack/installers/cursor.py +76 -0
- agentpack/installers/windsurf.py +50 -0
- agentpack/integrations/__init__.py +0 -0
- agentpack/integrations/git_hooks.py +109 -0
- agentpack/integrations/global_install.py +221 -0
- agentpack/integrations/vscode_tasks.py +85 -0
- agentpack/renderers/__init__.py +3 -0
- agentpack/renderers/compact.py +75 -0
- agentpack/renderers/markdown.py +144 -0
- agentpack/renderers/receipts.py +10 -0
- agentpack/session/__init__.py +33 -0
- agentpack/session/state.py +105 -0
- agentpack/summaries/__init__.py +0 -0
- agentpack/summaries/base.py +42 -0
- agentpack/summaries/llm.py +100 -0
- agentpack/summaries/offline.py +97 -0
- agentpack_cli-0.1.0.dist-info/METADATA +1391 -0
- agentpack_cli-0.1.0.dist-info/RECORD +80 -0
- agentpack_cli-0.1.0.dist-info/WHEEL +4 -0
- agentpack_cli-0.1.0.dist-info/entry_points.txt +2 -0
- agentpack_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from agentpack.core.config import load_config
|
|
11
|
+
from agentpack.core.ignore import load_spec
|
|
12
|
+
from agentpack.core.scanner import scan
|
|
13
|
+
from agentpack.core.snapshot import build_snapshot, save_snapshot, load_snapshot
|
|
14
|
+
from agentpack.core.diff import diff_snapshots
|
|
15
|
+
from agentpack.core import git
|
|
16
|
+
from agentpack.core.context_pack import select_files, save_pack_metadata
|
|
17
|
+
from agentpack.core.models import ContextPack, DependencyGraph, FileInfo, ScanResult, SelectedFile, Receipt
|
|
18
|
+
from agentpack.core.token_estimator import estimate_tokens
|
|
19
|
+
from agentpack.analysis.ranking import score_files, extract_keywords, enrich_keywords_from_files
|
|
20
|
+
from agentpack.analysis.tests import find_related_tests
|
|
21
|
+
from agentpack.analysis import dependency_graph as dep_graph_mod
|
|
22
|
+
from agentpack.summaries.base import build_all_summaries
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PackRequest:
|
|
27
|
+
root: Path
|
|
28
|
+
agent: str
|
|
29
|
+
task: str
|
|
30
|
+
mode: str
|
|
31
|
+
budget: int
|
|
32
|
+
since: str | None
|
|
33
|
+
refresh: bool
|
|
34
|
+
summary_provider: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class PackResult:
|
|
39
|
+
pack: ContextPack
|
|
40
|
+
out_path: Path
|
|
41
|
+
phase_times: dict[str, float]
|
|
42
|
+
packed_tokens: int
|
|
43
|
+
raw_tokens: int
|
|
44
|
+
saving_pct: float
|
|
45
|
+
changed_files: list[str]
|
|
46
|
+
scan_result: ScanResult
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ChangeSet:
|
|
51
|
+
"""Result of change detection: snapshot diff combined with git diff."""
|
|
52
|
+
all_changed: set[str]
|
|
53
|
+
git_staged: set[str]
|
|
54
|
+
recently_modified: list[str]
|
|
55
|
+
current_snap: dict[str, Any] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class RankResult:
|
|
60
|
+
"""Result of keyword extraction and file scoring."""
|
|
61
|
+
keywords: set[str]
|
|
62
|
+
scored: list[tuple[Any, float, list[str]]]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class PackPlan:
|
|
67
|
+
"""Shared planning output used by both pack and explain."""
|
|
68
|
+
task: str
|
|
69
|
+
mode: str
|
|
70
|
+
budget: int
|
|
71
|
+
scan_result: ScanResult
|
|
72
|
+
summaries: dict[str, Any]
|
|
73
|
+
dep_graph: DependencyGraph
|
|
74
|
+
all_changed: set[str]
|
|
75
|
+
git_staged: set[str]
|
|
76
|
+
recently_modified: list[str]
|
|
77
|
+
keywords: set[str]
|
|
78
|
+
scored: list[tuple[Any, float, list[str]]]
|
|
79
|
+
selected: list[SelectedFile]
|
|
80
|
+
receipts: list[Receipt]
|
|
81
|
+
phase_times: dict[str, float]
|
|
82
|
+
current_snap: dict[str, Any] = field(default_factory=dict)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ChangeDetector:
|
|
86
|
+
"""Combines snapshot diff + git diff → ChangeSet of changed paths."""
|
|
87
|
+
|
|
88
|
+
def detect(
|
|
89
|
+
self,
|
|
90
|
+
packable: list[FileInfo],
|
|
91
|
+
root: Path,
|
|
92
|
+
since: str | None,
|
|
93
|
+
) -> ChangeSet:
|
|
94
|
+
current_snap = build_snapshot(packable)
|
|
95
|
+
previous_snap = load_snapshot(root)
|
|
96
|
+
snap_diff = diff_snapshots(previous_snap, current_snap)
|
|
97
|
+
changed_from_snap: set[str] = set(snap_diff.added + snap_diff.modified)
|
|
98
|
+
|
|
99
|
+
git_changed: set[str] = set()
|
|
100
|
+
git_staged: set[str] = set()
|
|
101
|
+
recently_modified: list[str] = []
|
|
102
|
+
|
|
103
|
+
if git.is_git_repo(root):
|
|
104
|
+
if since:
|
|
105
|
+
git_changed = git.changed_files_since(root, since)
|
|
106
|
+
else:
|
|
107
|
+
git_changed = git.changed_files(root)
|
|
108
|
+
git_staged = git_changed
|
|
109
|
+
recently_modified = git.recently_modified_files(root)
|
|
110
|
+
|
|
111
|
+
return ChangeSet(
|
|
112
|
+
all_changed=changed_from_snap | git_changed,
|
|
113
|
+
git_staged=git_staged,
|
|
114
|
+
recently_modified=recently_modified,
|
|
115
|
+
current_snap=current_snap,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class FileRanker:
|
|
120
|
+
"""Extracts keywords from the task and scores files against them."""
|
|
121
|
+
|
|
122
|
+
def rank(
|
|
123
|
+
self,
|
|
124
|
+
packable: list[FileInfo],
|
|
125
|
+
changes: ChangeSet,
|
|
126
|
+
dep_graph: DependencyGraph,
|
|
127
|
+
task: str,
|
|
128
|
+
cfg: Any,
|
|
129
|
+
) -> RankResult:
|
|
130
|
+
keywords = extract_keywords(task)
|
|
131
|
+
keywords = enrich_keywords_from_files(keywords, changes.all_changed, packable)
|
|
132
|
+
all_paths = {f.path for f in packable}
|
|
133
|
+
|
|
134
|
+
for fi in packable:
|
|
135
|
+
tests = find_related_tests(fi.path, all_paths)
|
|
136
|
+
dep_graph.nodes[fi.path].tests = tests
|
|
137
|
+
|
|
138
|
+
scored = score_files(
|
|
139
|
+
packable,
|
|
140
|
+
changed_paths=changes.all_changed,
|
|
141
|
+
staged_paths=changes.git_staged,
|
|
142
|
+
recently_modified=changes.recently_modified,
|
|
143
|
+
dep_graph=dep_graph,
|
|
144
|
+
keywords=keywords,
|
|
145
|
+
include_tests=cfg.context.include_tests,
|
|
146
|
+
include_configs=cfg.context.include_configs,
|
|
147
|
+
weights=cfg.scoring,
|
|
148
|
+
)
|
|
149
|
+
return RankResult(keywords=keywords, scored=scored)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class PackPlanner:
|
|
153
|
+
"""Runs scan → summarize → graph → rank → select; shared by pack and explain."""
|
|
154
|
+
|
|
155
|
+
def plan(self, request: PackRequest) -> PackPlan:
|
|
156
|
+
root = request.root
|
|
157
|
+
cfg = load_config(root)
|
|
158
|
+
effective_budget = request.budget if request.budget > 0 else cfg.context.default_budget
|
|
159
|
+
ignore_spec = load_spec(root / cfg.project.ignore_file)
|
|
160
|
+
phase_times: dict[str, float] = {}
|
|
161
|
+
|
|
162
|
+
t0 = time.perf_counter()
|
|
163
|
+
scan_result = scan(root, ignore_spec, cfg.context.max_file_tokens)
|
|
164
|
+
phase_times["scan"] = time.perf_counter() - t0
|
|
165
|
+
|
|
166
|
+
packable = scan_result.packable
|
|
167
|
+
|
|
168
|
+
t0 = time.perf_counter()
|
|
169
|
+
summaries_objs = build_all_summaries(packable, root, request.summary_provider)
|
|
170
|
+
summaries = {p: s.model_dump() for p, s in summaries_objs.items()}
|
|
171
|
+
phase_times["summarize"] = time.perf_counter() - t0
|
|
172
|
+
|
|
173
|
+
t0 = time.perf_counter()
|
|
174
|
+
dep_graph = dep_graph_mod.build(packable, root, summaries=summaries)
|
|
175
|
+
phase_times["deps"] = time.perf_counter() - t0
|
|
176
|
+
|
|
177
|
+
t0 = time.perf_counter()
|
|
178
|
+
changes = ChangeDetector().detect(packable, root, request.since)
|
|
179
|
+
phase_times["changes"] = time.perf_counter() - t0
|
|
180
|
+
|
|
181
|
+
t0 = time.perf_counter()
|
|
182
|
+
rank_result = FileRanker().rank(packable, changes, dep_graph, request.task, cfg)
|
|
183
|
+
phase_times["rank"] = time.perf_counter() - t0
|
|
184
|
+
|
|
185
|
+
t0 = time.perf_counter()
|
|
186
|
+
selected, receipts = select_files(
|
|
187
|
+
files=packable,
|
|
188
|
+
scored=rank_result.scored,
|
|
189
|
+
changed_paths=changes.all_changed,
|
|
190
|
+
summaries=summaries,
|
|
191
|
+
mode=request.mode, # type: ignore[arg-type]
|
|
192
|
+
budget=effective_budget,
|
|
193
|
+
max_file_tokens=cfg.context.max_file_tokens,
|
|
194
|
+
keywords=rank_result.keywords,
|
|
195
|
+
)
|
|
196
|
+
phase_times["select"] = time.perf_counter() - t0
|
|
197
|
+
|
|
198
|
+
return PackPlan(
|
|
199
|
+
task=request.task,
|
|
200
|
+
mode=request.mode,
|
|
201
|
+
budget=effective_budget,
|
|
202
|
+
scan_result=scan_result,
|
|
203
|
+
summaries=summaries,
|
|
204
|
+
dep_graph=dep_graph,
|
|
205
|
+
all_changed=changes.all_changed,
|
|
206
|
+
git_staged=changes.git_staged,
|
|
207
|
+
recently_modified=changes.recently_modified,
|
|
208
|
+
keywords=rank_result.keywords,
|
|
209
|
+
scored=rank_result.scored,
|
|
210
|
+
selected=selected,
|
|
211
|
+
receipts=receipts,
|
|
212
|
+
phase_times=phase_times,
|
|
213
|
+
current_snap=changes.current_snap,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class AdapterRegistry:
|
|
218
|
+
"""Maps agent names to adapter instances; extensible without touching PackService."""
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def get(agent: str, cfg: Any) -> Any:
|
|
222
|
+
from agentpack.adapters.claude import ClaudeAdapter
|
|
223
|
+
from agentpack.adapters.codex import CodexAdapter
|
|
224
|
+
from agentpack.adapters.cursor import CursorAdapter
|
|
225
|
+
from agentpack.adapters.windsurf import WindsurfAdapter
|
|
226
|
+
from agentpack.adapters.generic import GenericAdapter
|
|
227
|
+
|
|
228
|
+
adapters = {
|
|
229
|
+
"claude": lambda: ClaudeAdapter(cfg.agents.claude.output),
|
|
230
|
+
"cursor": lambda: CursorAdapter(cfg.agents.generic.output),
|
|
231
|
+
"windsurf": lambda: WindsurfAdapter(cfg.agents.generic.output),
|
|
232
|
+
"codex": lambda: CodexAdapter(cfg.agents.generic.output),
|
|
233
|
+
}
|
|
234
|
+
return adapters.get(agent, lambda: GenericAdapter(cfg.agents.generic.output))()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class PackService:
|
|
238
|
+
"""Materializes a plan from PackPlanner into a written context file."""
|
|
239
|
+
|
|
240
|
+
def run(self, request: PackRequest) -> PackResult:
|
|
241
|
+
root = request.root
|
|
242
|
+
cfg = load_config(root)
|
|
243
|
+
|
|
244
|
+
plan = PackPlanner().plan(request)
|
|
245
|
+
|
|
246
|
+
packable = plan.scan_result.packable
|
|
247
|
+
all_tokens = sum(f.estimated_tokens for f in plan.scan_result.all_files)
|
|
248
|
+
raw_tokens = sum(f.estimated_tokens for f in packable)
|
|
249
|
+
packed_tokens = sum(_sf_tokens(sf) for sf in plan.selected)
|
|
250
|
+
saving_pct = (1 - packed_tokens / all_tokens) * 100 if all_tokens > 0 else 0.0
|
|
251
|
+
|
|
252
|
+
all_redaction_warnings = [w for sf in plan.selected for w in sf.redaction_warnings]
|
|
253
|
+
|
|
254
|
+
pack_obj = ContextPack(
|
|
255
|
+
task=request.task,
|
|
256
|
+
agent=request.agent,
|
|
257
|
+
mode=request.mode, # type: ignore[arg-type]
|
|
258
|
+
budget=plan.budget,
|
|
259
|
+
token_estimate=packed_tokens,
|
|
260
|
+
raw_repo_tokens=all_tokens,
|
|
261
|
+
after_ignore_tokens=raw_tokens,
|
|
262
|
+
estimated_savings_percent=saving_pct,
|
|
263
|
+
changed_files=sorted(plan.all_changed),
|
|
264
|
+
selected_files=plan.selected,
|
|
265
|
+
receipts=plan.receipts if cfg.context.include_receipts else [],
|
|
266
|
+
redaction_warnings=all_redaction_warnings,
|
|
267
|
+
stale=False,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
adapter = AdapterRegistry.get(request.agent, cfg)
|
|
271
|
+
|
|
272
|
+
t0 = time.perf_counter()
|
|
273
|
+
out_path = adapter.write(pack_obj, root)
|
|
274
|
+
plan.phase_times["render"] = time.perf_counter() - t0
|
|
275
|
+
|
|
276
|
+
save_snapshot(plan.current_snap, root)
|
|
277
|
+
save_pack_metadata(
|
|
278
|
+
root,
|
|
279
|
+
context_path=str(out_path.relative_to(root)),
|
|
280
|
+
snapshot_root_hash=plan.current_snap["root_hash"],
|
|
281
|
+
task=request.task,
|
|
282
|
+
agent=request.agent,
|
|
283
|
+
mode=request.mode,
|
|
284
|
+
budget=plan.budget,
|
|
285
|
+
token_estimate=packed_tokens,
|
|
286
|
+
)
|
|
287
|
+
_record_metrics(
|
|
288
|
+
root,
|
|
289
|
+
task=request.task,
|
|
290
|
+
mode=request.mode,
|
|
291
|
+
phase_times=plan.phase_times,
|
|
292
|
+
packed_tokens=packed_tokens,
|
|
293
|
+
raw_tokens=all_tokens,
|
|
294
|
+
saving_pct=saving_pct,
|
|
295
|
+
selected_count=len(plan.selected),
|
|
296
|
+
changed_count=len(plan.all_changed),
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
return PackResult(
|
|
300
|
+
pack=pack_obj,
|
|
301
|
+
out_path=out_path,
|
|
302
|
+
phase_times=plan.phase_times,
|
|
303
|
+
packed_tokens=packed_tokens,
|
|
304
|
+
raw_tokens=all_tokens,
|
|
305
|
+
saving_pct=saving_pct,
|
|
306
|
+
changed_files=sorted(plan.all_changed),
|
|
307
|
+
scan_result=plan.scan_result,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _sf_tokens(sf: SelectedFile) -> int:
|
|
312
|
+
if sf.content:
|
|
313
|
+
return estimate_tokens(sf.content)
|
|
314
|
+
parts: list[str] = []
|
|
315
|
+
if sf.summary:
|
|
316
|
+
parts.append(sf.summary)
|
|
317
|
+
for sym in sf.symbols:
|
|
318
|
+
if sym.signature:
|
|
319
|
+
parts.append(sym.signature)
|
|
320
|
+
return estimate_tokens("\n".join(parts)) if parts else 50
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _record_metrics(
|
|
324
|
+
root: Path,
|
|
325
|
+
*,
|
|
326
|
+
task: str,
|
|
327
|
+
mode: str,
|
|
328
|
+
phase_times: dict[str, float],
|
|
329
|
+
packed_tokens: int,
|
|
330
|
+
raw_tokens: int,
|
|
331
|
+
saving_pct: float,
|
|
332
|
+
selected_count: int,
|
|
333
|
+
changed_count: int,
|
|
334
|
+
) -> None:
|
|
335
|
+
metrics_path = root / ".agentpack" / "metrics.jsonl"
|
|
336
|
+
record = {
|
|
337
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
338
|
+
"task": task,
|
|
339
|
+
"mode": mode,
|
|
340
|
+
"packed_tokens": packed_tokens,
|
|
341
|
+
"raw_tokens": raw_tokens,
|
|
342
|
+
"saving_pct": round(saving_pct, 1),
|
|
343
|
+
"selected_files": selected_count,
|
|
344
|
+
"changed_files": changed_count,
|
|
345
|
+
"phases": {k: round(v, 3) for k, v in phase_times.items()},
|
|
346
|
+
"total_s": round(sum(phase_times.values()), 3),
|
|
347
|
+
}
|
|
348
|
+
try:
|
|
349
|
+
with metrics_path.open("a") as fh:
|
|
350
|
+
fh.write(json.dumps(record) + "\n")
|
|
351
|
+
except Exception:
|
|
352
|
+
pass
|
agentpack/cli.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from agentpack.commands import init, scan, diff, status, stats, summarize, pack, install, monitor, explain, doctor, session, watch, claude_cmd, benchmark
|
|
5
|
+
from agentpack import __version__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _version_callback(value: bool) -> None:
|
|
9
|
+
if value:
|
|
10
|
+
typer.echo(__version__)
|
|
11
|
+
raise typer.Exit()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="AgentPack — token-aware context packing for AI coding agents.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.callback()
|
|
18
|
+
def _main(
|
|
19
|
+
version: bool = typer.Option(False, "--version", "-v", callback=_version_callback, is_eager=True, help="Show version and exit."),
|
|
20
|
+
) -> None:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
for mod in [init, scan, diff, status, stats, summarize, pack, install, monitor, explain, doctor, session, watch, claude_cmd, benchmark]:
|
|
25
|
+
mod.register(app)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def main() -> None:
|
|
29
|
+
app()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
from rich import box
|
|
11
|
+
|
|
12
|
+
from agentpack.commands._shared import console, _root
|
|
13
|
+
from agentpack.commands.pack import _resolve_task
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class BenchmarkCase:
|
|
18
|
+
task: str
|
|
19
|
+
mode: str = "balanced"
|
|
20
|
+
expected_files: list[str] = field(default_factory=list)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class CaseResult:
|
|
25
|
+
case: BenchmarkCase
|
|
26
|
+
packed_tokens: int
|
|
27
|
+
raw_tokens: int
|
|
28
|
+
saving_pct: float
|
|
29
|
+
selected_paths: list[str]
|
|
30
|
+
changed_covered: int # # changed files that were selected
|
|
31
|
+
changed_total: int # total changed files detected
|
|
32
|
+
total_s: float
|
|
33
|
+
phase_times: dict[str, float]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _load_cases(path: Path) -> list[BenchmarkCase]:
|
|
37
|
+
try:
|
|
38
|
+
import tomllib
|
|
39
|
+
except ImportError:
|
|
40
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
41
|
+
|
|
42
|
+
data = tomllib.loads(path.read_text(encoding="utf-8"))
|
|
43
|
+
cases: list[BenchmarkCase] = []
|
|
44
|
+
for raw in data.get("cases", []):
|
|
45
|
+
cases.append(BenchmarkCase(
|
|
46
|
+
task=raw["task"],
|
|
47
|
+
mode=raw.get("mode", "balanced"),
|
|
48
|
+
expected_files=raw.get("expected_files", []),
|
|
49
|
+
))
|
|
50
|
+
return cases
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _scaffold_cases(root: Path) -> Path:
|
|
54
|
+
out = root / ".agentpack" / "benchmark.toml"
|
|
55
|
+
if out.exists():
|
|
56
|
+
return out
|
|
57
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
out.write_text(
|
|
59
|
+
'# AgentPack benchmark cases\n'
|
|
60
|
+
'# Each case runs a pack and measures token savings, speed, and\n'
|
|
61
|
+
'# selection quality. Add expected_files for precision/recall scoring.\n\n'
|
|
62
|
+
'[[cases]]\n'
|
|
63
|
+
'task = "fix auth token expiry"\n'
|
|
64
|
+
'mode = "balanced"\n'
|
|
65
|
+
'# expected_files = [\n'
|
|
66
|
+
'# "src/auth/token.py",\n'
|
|
67
|
+
'# "src/auth/session.py",\n'
|
|
68
|
+
'# ]\n\n'
|
|
69
|
+
'[[cases]]\n'
|
|
70
|
+
'task = "add rate limiting to API endpoints"\n'
|
|
71
|
+
'mode = "balanced"\n',
|
|
72
|
+
encoding="utf-8",
|
|
73
|
+
)
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _run_case(root: Path, case: BenchmarkCase) -> CaseResult:
|
|
78
|
+
from agentpack.application.pack_service import PackPlanner, PackRequest, _sf_tokens
|
|
79
|
+
from agentpack.core.token_estimator import estimate_tokens
|
|
80
|
+
|
|
81
|
+
request = PackRequest(
|
|
82
|
+
root=root,
|
|
83
|
+
agent="generic",
|
|
84
|
+
task=case.task,
|
|
85
|
+
mode=case.mode,
|
|
86
|
+
budget=0,
|
|
87
|
+
since=None,
|
|
88
|
+
refresh=False,
|
|
89
|
+
summary_provider="offline",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
t0 = time.perf_counter()
|
|
93
|
+
plan = PackPlanner().plan(request)
|
|
94
|
+
total_s = time.perf_counter() - t0
|
|
95
|
+
|
|
96
|
+
packed_tokens = sum(_sf_tokens(sf) for sf in plan.selected)
|
|
97
|
+
raw_tokens = sum(f.estimated_tokens for f in plan.scan_result.all_files)
|
|
98
|
+
saving_pct = (1 - packed_tokens / raw_tokens) * 100 if raw_tokens > 0 else 0.0
|
|
99
|
+
|
|
100
|
+
selected_paths = [sf.path for sf in plan.selected]
|
|
101
|
+
selected_set = set(selected_paths)
|
|
102
|
+
|
|
103
|
+
changed_covered = len(plan.all_changed & selected_set)
|
|
104
|
+
changed_total = len(plan.all_changed)
|
|
105
|
+
|
|
106
|
+
return CaseResult(
|
|
107
|
+
case=case,
|
|
108
|
+
packed_tokens=packed_tokens,
|
|
109
|
+
raw_tokens=raw_tokens,
|
|
110
|
+
saving_pct=saving_pct,
|
|
111
|
+
selected_paths=selected_paths,
|
|
112
|
+
changed_covered=changed_covered,
|
|
113
|
+
changed_total=changed_total,
|
|
114
|
+
total_s=total_s,
|
|
115
|
+
phase_times=plan.phase_times,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _precision_recall(result: CaseResult) -> tuple[float, float, float]:
|
|
120
|
+
"""Returns (precision, recall, f1). Requires expected_files on the case."""
|
|
121
|
+
expected = set(result.case.expected_files)
|
|
122
|
+
if not expected:
|
|
123
|
+
return 0.0, 0.0, 0.0
|
|
124
|
+
selected = set(result.selected_paths)
|
|
125
|
+
tp = len(selected & expected)
|
|
126
|
+
p = tp / len(selected) if selected else 0.0
|
|
127
|
+
r = tp / len(expected)
|
|
128
|
+
f1 = 2 * p * r / (p + r) if (p + r) > 0 else 0.0
|
|
129
|
+
return p, r, f1
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _print_case_detail(result: CaseResult) -> None:
|
|
133
|
+
has_gt = bool(result.case.expected_files)
|
|
134
|
+
p, r, f1 = _precision_recall(result) if has_gt else (0.0, 0.0, 0.0)
|
|
135
|
+
|
|
136
|
+
console.print(f"\n[bold cyan]{result.case.task}[/] [dim]mode={result.case.mode}[/]")
|
|
137
|
+
|
|
138
|
+
tbl = Table(box=box.SIMPLE, show_header=False, padding=(0, 2))
|
|
139
|
+
tbl.add_column(style="dim")
|
|
140
|
+
tbl.add_column(justify="right", style="bold")
|
|
141
|
+
tbl.add_row("packed tokens", f"{result.packed_tokens:,}")
|
|
142
|
+
tbl.add_row("raw tokens", f"{result.raw_tokens:,}")
|
|
143
|
+
tbl.add_row("saving", f"[green]{result.saving_pct:.1f}%[/]")
|
|
144
|
+
tbl.add_row("files selected", str(len(result.selected_paths)))
|
|
145
|
+
if result.changed_total > 0:
|
|
146
|
+
cov_pct = result.changed_covered / result.changed_total * 100
|
|
147
|
+
tbl.add_row("changed files covered", f"{result.changed_covered}/{result.changed_total} ({cov_pct:.0f}%)")
|
|
148
|
+
tbl.add_row("total time", f"{result.total_s:.2f}s")
|
|
149
|
+
console.print(tbl)
|
|
150
|
+
|
|
151
|
+
if result.phase_times:
|
|
152
|
+
phases = Table(box=box.SIMPLE, show_header=True, padding=(0, 2))
|
|
153
|
+
phases.add_column("phase", style="dim")
|
|
154
|
+
phases.add_column("time", justify="right")
|
|
155
|
+
for phase, t in result.phase_times.items():
|
|
156
|
+
phases.add_row(phase, f"{t:.3f}s")
|
|
157
|
+
console.print(phases)
|
|
158
|
+
|
|
159
|
+
if has_gt:
|
|
160
|
+
console.print(
|
|
161
|
+
f" precision [bold]{p:.1%}[/] "
|
|
162
|
+
f"recall [bold]{r:.1%}[/] "
|
|
163
|
+
f"F1 [bold]{f1:.1%}[/]"
|
|
164
|
+
)
|
|
165
|
+
expected_set = set(result.case.expected_files)
|
|
166
|
+
selected_set = set(result.selected_paths)
|
|
167
|
+
hits = expected_set & selected_set
|
|
168
|
+
misses = expected_set - selected_set
|
|
169
|
+
if hits:
|
|
170
|
+
console.print(f" [green]hit:[/] " + ", ".join(sorted(hits)))
|
|
171
|
+
if misses:
|
|
172
|
+
console.print(f" [red]miss:[/] " + ", ".join(sorted(misses)))
|
|
173
|
+
|
|
174
|
+
console.print(f" [dim]top files:[/] " + ", ".join(result.selected_paths[:5]))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _print_summary_table(results: list[CaseResult]) -> None:
|
|
178
|
+
has_gt = any(r.case.expected_files for r in results)
|
|
179
|
+
|
|
180
|
+
tbl = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
|
|
181
|
+
tbl.add_column("task", max_width=40)
|
|
182
|
+
tbl.add_column("mode", width=9)
|
|
183
|
+
tbl.add_column("tokens", justify="right")
|
|
184
|
+
tbl.add_column("saving", justify="right")
|
|
185
|
+
tbl.add_column("files", justify="right")
|
|
186
|
+
tbl.add_column("time", justify="right")
|
|
187
|
+
if has_gt:
|
|
188
|
+
tbl.add_column("P", justify="right")
|
|
189
|
+
tbl.add_column("R", justify="right")
|
|
190
|
+
tbl.add_column("F1", justify="right")
|
|
191
|
+
|
|
192
|
+
for r in results:
|
|
193
|
+
p, rec, f1 = _precision_recall(r) if r.case.expected_files else (0.0, 0.0, 0.0)
|
|
194
|
+
row = [
|
|
195
|
+
r.case.task[:38],
|
|
196
|
+
r.case.mode,
|
|
197
|
+
f"{r.packed_tokens:,}",
|
|
198
|
+
f"{r.saving_pct:.1f}%",
|
|
199
|
+
str(len(r.selected_paths)),
|
|
200
|
+
f"{r.total_s:.2f}s",
|
|
201
|
+
]
|
|
202
|
+
if has_gt:
|
|
203
|
+
row += [
|
|
204
|
+
f"{p:.1%}" if r.case.expected_files else "—",
|
|
205
|
+
f"{rec:.1%}" if r.case.expected_files else "—",
|
|
206
|
+
f"{f1:.1%}" if r.case.expected_files else "—",
|
|
207
|
+
]
|
|
208
|
+
tbl.add_row(*row)
|
|
209
|
+
|
|
210
|
+
console.print()
|
|
211
|
+
console.print(tbl)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _print_compare_table(task: str, results: list[CaseResult]) -> None:
|
|
215
|
+
"""Side-by-side mode comparison for a single task."""
|
|
216
|
+
console.print(f"\n[bold]Mode comparison:[/] [cyan]{task}[/]\n")
|
|
217
|
+
|
|
218
|
+
tbl = Table(box=box.SIMPLE, show_header=True, padding=(0, 2))
|
|
219
|
+
tbl.add_column("mode", width=10)
|
|
220
|
+
tbl.add_column("tokens", justify="right")
|
|
221
|
+
tbl.add_column("saving", justify="right")
|
|
222
|
+
tbl.add_column("files", justify="right")
|
|
223
|
+
tbl.add_column("time", justify="right")
|
|
224
|
+
|
|
225
|
+
for r in results:
|
|
226
|
+
tbl.add_row(
|
|
227
|
+
r.case.mode,
|
|
228
|
+
f"{r.packed_tokens:,}",
|
|
229
|
+
f"{r.saving_pct:.1f}%",
|
|
230
|
+
str(len(r.selected_paths)),
|
|
231
|
+
f"{r.total_s:.2f}s",
|
|
232
|
+
)
|
|
233
|
+
console.print(tbl)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def register(app: typer.Typer) -> None:
|
|
237
|
+
@app.command()
|
|
238
|
+
def benchmark(
|
|
239
|
+
task: str = typer.Option("", "--task", help="Single task to benchmark (skips cases file)."),
|
|
240
|
+
mode: str = typer.Option("balanced", "--mode", help="Mode for single-task run (minimal|balanced|deep)."),
|
|
241
|
+
cases: str = typer.Option("", "--cases", help="Path to TOML cases file (default: .agentpack/benchmark.toml)."),
|
|
242
|
+
compare: bool = typer.Option(False, "--compare", is_flag=True, help="Compare minimal/balanced/deep for each task."),
|
|
243
|
+
init: bool = typer.Option(False, "--init", is_flag=True, help="Scaffold a benchmark.toml and exit."),
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Benchmark file selection quality and token efficiency across tasks."""
|
|
246
|
+
root = _root()
|
|
247
|
+
|
|
248
|
+
if init:
|
|
249
|
+
out = _scaffold_cases(root)
|
|
250
|
+
console.print(f"[green]✓[/] Created [bold]{out}[/]")
|
|
251
|
+
console.print(" Edit the file to add your tasks and expected files, then run [bold]agentpack benchmark[/].")
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
# Build case list
|
|
255
|
+
if task:
|
|
256
|
+
resolved = _resolve_task(task) if task == "auto" else task
|
|
257
|
+
bench_cases = [BenchmarkCase(task=resolved, mode=mode)]
|
|
258
|
+
else:
|
|
259
|
+
cases_path = Path(cases) if cases else root / ".agentpack" / "benchmark.toml"
|
|
260
|
+
if not cases_path.exists():
|
|
261
|
+
console.print(f"[yellow]No cases file found at {cases_path}[/]")
|
|
262
|
+
console.print(" Run [bold]agentpack benchmark --init[/] to scaffold one, or use [bold]--task \"...\"[/]")
|
|
263
|
+
raise typer.Exit(1)
|
|
264
|
+
bench_cases = _load_cases(cases_path)
|
|
265
|
+
if not bench_cases:
|
|
266
|
+
console.print("[yellow]No cases defined in benchmark file.[/]")
|
|
267
|
+
raise typer.Exit(1)
|
|
268
|
+
|
|
269
|
+
# Expand for compare mode
|
|
270
|
+
if compare:
|
|
271
|
+
expanded: list[BenchmarkCase] = []
|
|
272
|
+
for c in bench_cases:
|
|
273
|
+
for m in ("minimal", "balanced", "deep"):
|
|
274
|
+
expanded.append(BenchmarkCase(task=c.task, mode=m, expected_files=c.expected_files))
|
|
275
|
+
bench_cases = expanded
|
|
276
|
+
|
|
277
|
+
console.print(f"\n[bold]Running {len(bench_cases)} benchmark case(s)...[/]\n")
|
|
278
|
+
|
|
279
|
+
results: list[CaseResult] = []
|
|
280
|
+
for i, c in enumerate(bench_cases, 1):
|
|
281
|
+
label = f"[{i}/{len(bench_cases)}] {c.task[:50]} mode={c.mode}"
|
|
282
|
+
with console.status(f"[dim]{label}[/]"):
|
|
283
|
+
try:
|
|
284
|
+
r = _run_case(root, c)
|
|
285
|
+
results.append(r)
|
|
286
|
+
except Exception as e:
|
|
287
|
+
console.print(f"[red]Error on case '{c.task}': {e}[/]")
|
|
288
|
+
|
|
289
|
+
if not results:
|
|
290
|
+
raise typer.Exit(1)
|
|
291
|
+
|
|
292
|
+
# Output
|
|
293
|
+
if compare and len(set(r.case.task for r in results)) == 1:
|
|
294
|
+
_print_compare_table(results[0].case.task, results)
|
|
295
|
+
elif len(results) == 1:
|
|
296
|
+
_print_case_detail(results[0])
|
|
297
|
+
else:
|
|
298
|
+
if not compare:
|
|
299
|
+
for r in results:
|
|
300
|
+
_print_case_detail(r)
|
|
301
|
+
console.print("\n[bold]Summary[/]")
|
|
302
|
+
_print_summary_table(results)
|