agentpack-cli 0.1.30__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/PKG-INFO +244 -442
  2. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/README.md +243 -441
  3. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/pyproject.toml +1 -1
  4. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/__init__.py +1 -1
  5. agentpack_cli-0.2.1/src/agentpack/analysis/monorepo.py +181 -0
  6. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/ranking.py +164 -2
  7. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/application/pack_service.py +302 -20
  8. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/cli.py +2 -0
  9. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/benchmark.py +159 -8
  10. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/doctor.py +23 -1
  11. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/hook_cmd.py +7 -2
  12. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/install.py +2 -2
  13. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/pack.py +18 -4
  14. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/scan.py +45 -5
  15. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/stats.py +158 -18
  16. agentpack_cli-0.2.1/src/agentpack/commands/tune.py +158 -0
  17. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/config.py +8 -0
  18. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/context_pack.py +24 -0
  19. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/git.py +39 -0
  20. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/data/agentpack.md +6 -7
  21. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/claude.py +2 -1
  22. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/cursor.py +2 -2
  23. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/windsurf.py +1 -1
  24. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/renderers/markdown.py +8 -1
  25. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/.gitignore +0 -0
  26. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/LICENSE +0 -0
  27. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/__init__.py +0 -0
  28. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/antigravity.py +0 -0
  29. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/base.py +0 -0
  30. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/claude.py +0 -0
  31. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/codex.py +0 -0
  32. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/cursor.py +0 -0
  33. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/detect.py +0 -0
  34. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/generic.py +0 -0
  35. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/adapters/windsurf.py +0 -0
  36. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/__init__.py +0 -0
  37. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/dependency_graph.py +0 -0
  38. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/go_imports.py +0 -0
  39. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/java_imports.py +0 -0
  40. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/js_ts_imports.py +0 -0
  41. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/python_imports.py +0 -0
  42. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/repo_map.py +0 -0
  43. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/rust_imports.py +0 -0
  44. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/symbols.py +0 -0
  45. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/task_classifier.py +0 -0
  46. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/analysis/tests.py +0 -0
  47. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/application/__init__.py +0 -0
  48. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/__init__.py +0 -0
  49. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/_shared.py +0 -0
  50. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/claude_cmd.py +0 -0
  51. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/diff.py +0 -0
  52. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/explain.py +0 -0
  53. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/init.py +0 -0
  54. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/mcp_cmd.py +0 -0
  55. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/monitor.py +0 -0
  56. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/quickstart.py +0 -0
  57. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/repair.py +0 -0
  58. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/status.py +0 -0
  59. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/summarize.py +0 -0
  60. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/commands/watch.py +0 -0
  61. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/__init__.py +0 -0
  62. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/bootstrap.py +0 -0
  63. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/cache.py +0 -0
  64. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/diff.py +0 -0
  65. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/git_hooks.py +0 -0
  66. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/global_install.py +0 -0
  67. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/ignore.py +0 -0
  68. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/merkle.py +0 -0
  69. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/models.py +0 -0
  70. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/redactor.py +0 -0
  71. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/scanner.py +0 -0
  72. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/snapshot.py +0 -0
  73. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/token_estimator.py +0 -0
  74. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/core/vscode_tasks.py +0 -0
  75. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/__init__.py +0 -0
  76. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/antigravity.py +0 -0
  77. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/installers/codex.py +0 -0
  78. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/integrations/__init__.py +0 -0
  79. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/integrations/agents.py +0 -0
  80. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/integrations/git_hooks.py +0 -0
  81. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/integrations/global_install.py +0 -0
  82. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/integrations/vscode_tasks.py +0 -0
  83. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/mcp_server.py +0 -0
  84. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/renderers/__init__.py +0 -0
  85. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/renderers/compact.py +0 -0
  86. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/renderers/receipts.py +0 -0
  87. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/session/__init__.py +0 -0
  88. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/session/state.py +0 -0
  89. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/summaries/__init__.py +0 -0
  90. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/summaries/base.py +0 -0
  91. {agentpack_cli-0.1.30 → agentpack_cli-0.2.1}/src/agentpack/summaries/offline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentpack-cli
3
- Version: 0.1.30
3
+ Version: 0.2.1
4
4
  Summary: Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -44,159 +44,131 @@ Description-Content-Type: text/markdown
44
44
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
45
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
46
46
 
47
- > **Status: alpha (v0.1.30).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
47
+ > **Status: alpha (v0.2.1).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
48
48
  >
49
49
  > **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
50
50
 
51
- **Task-aware context packing for AI coding agents.**
51
+ **Local context engine for AI coding agents.**
52
52
 
53
- AgentPack scans a repository, ranks files for the task you are working on, and writes a compact markdown context pack for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, or any LLM workflow.
53
+ AgentPack builds task-focused context packs for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, and any LLM workflow that can read markdown. It scans your repo locally, ranks files for the task, compresses the result into a token budget, and keeps the pack fresh through CLI commands, MCP tools, hooks, and agent integrations.
54
54
 
55
- It is useful when the repo is too large to paste, but you still want the agent to start with more than a blank slate.
55
+ AgentPack is useful when a repo is too large to paste, but a blank agent session wastes time rediscovering the same code structure. It is a context preparation tool, not a coding agent.
56
56
 
57
- **What it is**
58
- - A local CLI for building task-focused context packs
59
- - A summary cache, import graph, ranking engine, and token-budget selector
60
- - Optional integrations for popular coding agents
61
- - An eval harness for measuring whether selected files match files you actually changed
57
+ ## Contents
62
58
 
63
- **What it is not**
64
- - Not a coding agent
65
- - Not a semantic code search engine
66
- - Not a replacement for manual inspection on high-stakes changes
67
- - Not yet proven across a large public benchmark suite
59
+ - [Features](#features)
60
+ - [Install](#install)
61
+ - [Quickstart](#quickstart)
62
+ - [Quality Bar](#quality-bar)
63
+ - [Debugging Selection](#debugging-selection)
64
+ - [Supported Integrations](#supported-integrations)
65
+ - [Commands](#commands)
66
+ - [Architecture](#architecture)
67
+ - [Known Limitations](#known-limitations)
68
+ - [Roadmap](#roadmap)
69
+ - [Development](#development)
68
70
 
69
- ---
70
-
71
- ## The problem
71
+ ## Features
72
72
 
73
- Every time you start a task with an AI coding agent, it has no idea what's in your repo. It either:
73
+ - **Task-focused packing**: ranks files from git changes, task terms, symbols, imports, related tests, configs, churn, and repo history.
74
+ - **Budget-aware compression**: emits `full`, `diff`, `symbols`, `skeleton`, or `summary` views instead of all-or-nothing file dumps.
75
+ - **Semantic repo map**: adds a compact module-level map before file context so agents orient faster.
76
+ - **Freshness and deltas**: records task source, git state, snapshot hashes, selected-file deltas, and stale-context warnings.
77
+ - **Agent integrations**: installs Claude Code, Cursor, Windsurf, Codex, Antigravity, VS Code tasks, git hooks, and MCP configuration.
78
+ - **Local and measurable**: no API calls for scan, summarize, rank, pack, stats, or benchmark; quality is measured with expected-file evals.
74
79
 
75
- 1. **Reads files on demand** (Claude Code, Cursor, Windsurf) — dozens of tool calls, paying exploration cost every session, every turn, forever.
76
- 2. **Gets the whole repo dumped in** (repomix, gitingest) — 50k–500k tokens of noise, most of it irrelevant to the task at hand.
77
- 3. **Gets nothing** — you hand-copy the 5 files you think matter and hope you got it right.
78
-
79
- None of these scale. On a 200-file codebase, option 1 wastes 5–10 turns just orienting. Option 2 degrades output quality (LLMs perform worse on long noisy context). Option 3 misses critical dependencies and configs constantly.
80
+ ## Install
80
81
 
81
- **The root cause:** agents don't know *what's relevant to your current task* without doing the work to figure that out — which costs tokens, time, and money on every session.
82
+ ```bash
83
+ pip install agentpack-cli
84
+ agentpack --version
85
+ ```
82
86
 
83
- ---
87
+ Requires Python 3.10+. The PyPI package is `agentpack-cli`; the command is `agentpack`.
84
88
 
85
- ## The solution
89
+ JavaScript-heavy teams can install the npm wrapper:
86
90
 
87
- AgentPack solves this with a one-time offline analysis pass:
91
+ ```bash
92
+ npm install -g @vishal2612200/agentpack
93
+ agentpack --version
94
+ ```
88
95
 
89
- 1. **Scans your repo once** builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
90
- 2. **On each task** — classifies the task, builds a compact semantic repo map, then uses git diff, import graph traversal, keyword/concept expansion, implementation-role boosts, and cross-layer relatedness to rank every file.
91
- 3. **Packs a tight context document** — changed files get full content when cheap, relevant diff hunks when large, symbol bodies or interface skeletons when tighter, and summaries only when useful.
92
- 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, token-precision metrics, mode-mix reports, delta summaries, and benchmark miss reports show when the pack is broad or missing expected files.
93
- 5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
96
+ The npm package is a Node launcher around the Python implementation. It installs the matching `agentpack-cli` package into a per-version virtual environment on first run.
94
97
 
95
- The result: your agent starts with a focused map of the relevant code. It should reduce blind exploration, not replace the agent's own file reads or your judgment.
98
+ ## Quickstart
96
99
 
97
100
  ```bash
98
- pip install agentpack-cli
99
-
100
- # Show the fastest path for your repo
101
- agentpack quickstart --task "fix auth token expiry"
102
-
103
- # One-time setup per project
104
101
  cd your-project
105
- agentpack init # creates config/session/task.md + detected agent integration
106
-
107
- # Every terminal session
108
- agentpack watch # keeps context fresh automatically — that's it
102
+ agentpack init --agent codex # or claude, cursor, windsurf, antigravity
103
+ printf '%s\n' "fix auth token expiry" > .agentpack/task.md
104
+ agentpack pack
109
105
  ```
110
106
 
111
- Then open Claude Code / Cursor / Windsurf / Codex / Antigravity and write your task normally. AgentPack keeps `.agentpack/context.md` current.
107
+ This creates `.agentpack/` state, installs the requested agent integration, generates a ranked context pack, and writes the adapter output for that agent. For active local work, keep context fresh with:
108
+
109
+ ```bash
110
+ agentpack watch
111
+ ```
112
112
 
113
- For power users who want background repacking on every commit and cd:
113
+ For a guided setup that explains each next step:
114
114
 
115
115
  ```bash
116
- # Advanced: global automation (opt-in repos only — never touches repos without .agentpack/)
117
- agentpack global-install --dry-run # preview first
118
- agentpack global-install
116
+ agentpack quickstart --task "fix auth token expiry"
119
117
  ```
120
118
 
121
- Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM that can read markdown.
119
+ ## Project Scope
122
120
 
123
- ---
121
+ **AgentPack is:**
124
122
 
125
- ## What to expect
123
+ - A local context engine for building task-focused packs for AI coding agents.
124
+ - A CLI, MCP server, hook runner, and integration layer.
125
+ - A summary cache, import graph, ranking engine, semantic repo map, and token-budget selector.
126
+ - An eval harness for measuring whether selected files match files you actually changed.
126
127
 
127
- AgentPack's strongest value is repeatable orientation: it gives the agent a compact first-pass map before tool calls begin.
128
+ **AgentPack is not:**
128
129
 
129
- Typical results on large repos:
130
+ - A coding agent.
131
+ - A hosted service.
132
+ - A semantic code search engine.
133
+ - A replacement for normal source inspection on critical changes.
134
+ - Proven across a large public benchmark suite yet.
135
+
136
+ ## Quality Bar
137
+
138
+ AgentPack is best treated as a **ranked starting map**. It should reduce repeated orientation work, but the agent and reviewer still own correctness.
130
139
 
131
140
  | Signal | What good looks like |
132
141
  |---|---|
133
- | Token reduction | 90-99% smaller than raw repo text |
142
+ | Token reduction | 90-99% smaller than raw repo text on large repos |
134
143
  | Pack size | Usually 8k-25k tokens for a specific task |
135
- | Pack time | Seconds on warm cache; first summarize pass is slower |
136
- | Recall | Should be high for files you later edit; validate with `agentpack benchmark` |
137
- | Precision | Often modest; summaries are cheap but can still add noise |
138
- | Delta overhead | Hooks can emit a tiny changed-file delta instead of the full pack |
144
+ | Pack time | Seconds on a warm cache; first summarize pass is slower |
145
+ | Recall | Expected files appear near the top; validate with `agentpack benchmark --misses` |
146
+ | Precision | Good enough to reduce exploration; summaries and repo maps may still include noise |
147
+ | Freshness | Stale packs are clearly marked by task, git, and snapshot checks |
139
148
 
140
- The compression number is easy to verify, but it is not the same as usefulness. The important question is: **did AgentPack include the files you actually needed?**
141
-
142
- Use the built-in eval flow:
149
+ Use real repo evals instead of trusting compression numbers:
143
150
 
144
151
  ```bash
145
152
  agentpack benchmark --init
146
- # add real historical tasks and files you actually changed
153
+ # add historical tasks and files actually changed
147
154
  agentpack benchmark --compare --misses
155
+ agentpack benchmark --results-template
148
156
  ```
149
157
 
150
- For source checkouts, there is also a small smoke suite:
158
+ ## Debugging Selection
159
+
160
+ When AgentPack misses a file, the next command should explain the miss:
151
161
 
152
162
  ```bash
153
- agentpack benchmark --sample-fixtures --misses
163
+ agentpack benchmark --misses
164
+ agentpack explain --task "fix billing webhook" --file lib/billing/webhook.ts
165
+ agentpack explain --task "fix billing webhook" --omitted
166
+ agentpack explain --task "fix billing webhook" --budget-plan
154
167
  ```
155
168
 
156
- This runs FastAPI, Next.js, and mixed Python/TypeScript fixture tasks. It is a sanity check, not a substitute for real repo evals.
157
-
158
- ### Current quality bar
159
-
160
- AgentPack is best described as a **map, not a compass**. It is already good at token reduction, changed-file inclusion, related tests, imports, configs, and common concepts like auth/cache/rate limiting. Recent ranking work also improves full-stack tasks by pulling service/controller/schema/handler files when UI routes or pages match the same domain.
161
-
162
- The pack now includes a semantic repo map, task class, and "delta since last pack" section. Diff mode scores individual hunks against task keywords, so large dirty files spend tokens on the changed areas most likely to matter. Metrics also learn from previous noisy selections and gently downrank paths that repeatedly failed to predict later edits.
169
+ `benchmark --misses` reports each expected file that was not selected, including whether it was ignored, scored too low, excluded by summary floor, cut by budget, or absent from the scan. `explain --file` shows the exact score signals for one file. `explain --budget-plan` shows how the token budget was spent across full, diff, symbols, skeleton, and summary modes.
163
170
 
164
- Known weak spot: recall can still be low on unfamiliar product domains or cross-language flows. Use `benchmark --misses` and `agentpack explain` when an expected file is absent. Those commands show whether the miss was caused by ignore rules, low score, summary floor, budget cutoff, or missing task signal.
165
-
166
- ### Observed author-run numbers
167
-
168
- These are local author-session numbers, included as anecdotal context rather than a benchmark claim.
169
-
170
- #### Token Compression
171
-
172
- | Metric | Value |
173
- |--------|-------|
174
- | Sessions | 21 |
175
- | Avg compression | 99.3% |
176
- | Min / Max | 98.7% → 99.9% |
177
- | Total raw (theoretical) | 116.9M tokens |
178
- | Total packed (actual) | 683K tokens |
179
-
180
- Per session: ~4.1M raw repo → ~35K packed context.
181
-
182
- #### Cost (Sonnet 4.6, input tokens only)
183
-
184
- | Scenario | Cost |
185
- |----------|------|
186
- | Full repo dumped each run | ~$350 |
187
- | With AgentPack | ~$2.05 |
188
- | **Realistic** (10% manual cherry-pick baseline) | **~$33 saved** |
189
-
190
- > Honest note: raw_tokens = full repo estimate. Real savings depend on how much context you'd pass manually. Compression ratio (99%+) is verifiable; dollar figure is scenario-dependent.
191
-
192
- #### Quality Signal
193
-
194
- - 42 commits in 7 days (~6/day) vs 4.9/day before
195
- - Shift from single-file fixes → multi-system coordinated fixes
196
- - AgentPack provides dependent files + callers in context → fixes root cause, not symptom
197
- - Correlation observed, causation not isolated
198
-
199
- ---
171
+ This is the core reliability loop: pack, measure recall, inspect misses, then tune task wording, `.agentignore`, or scoring weights.
200
172
 
201
173
  ## When it helps
202
174
 
@@ -244,7 +216,7 @@ These tools have native file access via tool calls. Claude reads exactly the fil
244
216
 
245
217
  AgentPack's value here is different: `agentpack init --agent <x>` configures your agent to read or inject a ranked context pack and auto-repack when the repo changes. On large repos where tool-call exploration piles up across turns, this front-loads the cost once instead of paying per-turn.
246
218
 
247
- ### Where agentpack genuinely wins
219
+ ### Where AgentPack Wins
248
220
 
249
221
  | Scenario | repomix | gitingest | code2prompt | aider | agentpack |
250
222
  |---|---|---|---|---|---|
@@ -253,6 +225,7 @@ AgentPack's value here is different: `agentpack init --agent <x>` configures you
253
225
  | Auto task inference from git | ✗ | ✗ | ✗ | partial | ✓ |
254
226
  | Relevance ranking by task | ✗ | ✗ | ✗ | ✗ | ✓ |
255
227
  | Import graph traversal | ✗ | ✗ | ✗ | ✓ | ✓ |
228
+ | Monorepo workspace hints | ✗ | ✗ | ✗ | manual | ✓ |
256
229
  | Token budget enforcement | manual | manual | manual | ✓ | ✓ |
257
230
  | Cursor / Windsurf / Codex / Antigravity install | ✗ | ✗ | ✗ | ✗ | ✓ |
258
231
  | Zero API calls | ✓ | ✓ | ✓ | ✗ | ✓ |
@@ -261,64 +234,17 @@ AgentPack's value here is different: `agentpack init --agent <x>` configures you
261
234
 
262
235
  _*`--agent generic` outputs standard markdown. Claude adapter has richer instructions._
263
236
 
264
- ### What agentpack does NOT do well
237
+ ### What AgentPack Does Not Do Well
265
238
 
266
239
  - **Interactive sessions on small repos**: if your whole repo is <20k tokens, a simple repo dump may be enough
267
240
  - **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for quick read-only exploration
268
241
  - **Guaranteed source-of-truth selection**: AgentPack ranks likely files; it can miss task-critical files. Use `agentpack benchmark --misses`, `agentpack explain`, and normal `rg`/agent file reads for correctness.
269
242
  - **Deep semantic understanding**: keyword/concept scoring, imports, symbols, and path roles help, but they are not an LLM-level code understanding system
243
+ - **Public proof without real cases**: bundled fixtures are smoke tests. Strong claims need historical tasks from real repos and published results.
270
244
 
271
245
  ---
272
246
 
273
- ## Install
274
-
275
- ```bash
276
- pip install agentpack-cli
277
- ```
278
-
279
- Requires Python 3.10+.
280
-
281
- > **PyPI note:** The package is `agentpack-cli` (the name `agentpack` was already taken). The CLI command is still `agentpack`.
282
-
283
- ### npm wrapper
284
-
285
- AgentPack can also be installed from npm:
286
-
287
- ```bash
288
- npm install -g @vishal2612200/agentpack
289
- agentpack --version
290
- ```
291
-
292
- The npm package is a thin Node.js wrapper around the Python CLI. It requires Node.js 18+ and Python 3.10+, then installs the matching `agentpack-cli` PyPI package into a per-version virtual environment on first run. This keeps the implementation single-source while giving JavaScript-heavy teams a familiar install path.
293
-
294
- ---
295
-
296
- ## Start Once, Then Work Normally
297
-
298
- For a guided two-minute path in any repo:
299
-
300
- ```bash
301
- agentpack quickstart --task "fix auth token expiry"
302
- ```
303
-
304
- It shows the exact commands to initialize, set task text, generate a first pack, inspect stats, start watch mode, and scaffold a small benchmark file for your own tasks.
305
-
306
- The full workflow:
307
-
308
- ```bash
309
- # One-time project setup
310
- agentpack init # creates config/session/task.md + detected agent integration
311
-
312
- # Every terminal session — just one command
313
- agentpack watch # auto-resumes session, refreshes context on file/task changes
314
- ```
315
-
316
- Then open Claude Code / Cursor / Codex and write your coding task normally.
317
-
318
- - AgentPack keeps `.agentpack/context.md` and `.agentpack/context.claude.md` fresh while `watch` is running.
319
- - To change the task: edit `.agentpack/task.md` directly, or tell Claude — it updates the file itself. `watch` picks up the change automatically.
320
-
321
- ### Agent integration matrix
247
+ ## Supported Integrations
322
248
 
323
249
  | Agent | Automation level | Method |
324
250
  |---|---|---|
@@ -329,7 +255,7 @@ Then open Claude Code / Cursor / Codex and write your coding task normally.
329
255
  | Antigravity | Medium | `init` writes `GEMINI.md`, VS Code task + git hooks |
330
256
  | Generic | Basic | `watch` mode + read `context.md` |
331
257
 
332
- ### Honest limitations
258
+ ### Integration limitations
333
259
 
334
260
  - AgentPack cannot intercept prompts inside IDEs — Cursor/Windsurf rely on rules being followed.
335
261
  - Claude wrapper (`agentpack claude`) is the most deterministic integration.
@@ -338,29 +264,6 @@ Then open Claude Code / Cursor / Codex and write your coding task normally.
338
264
 
339
265
  ---
340
266
 
341
- ## Quickstart
342
-
343
- ```bash
344
- pip install agentpack-cli
345
- cd your-project
346
- agentpack init # one-time setup: config/session/task.md + detected agent integration
347
- agentpack watch # in another terminal — keeps context fresh automatically
348
- ```
349
-
350
- Then open your agent and write your task normally.
351
-
352
- **Power users (global automation):**
353
-
354
- ```bash
355
- agentpack global-install --dry-run # preview
356
- agentpack global-install # apply
357
- source ~/.zshrc
358
- ```
359
-
360
- Then opt each project in: `cd your-project && agentpack init`. After that repo hooks or shell hooks keep context fresh, and Claude Code gets prompt-time context hints — no manual steps.
361
-
362
- ---
363
-
364
267
  ## Agent setup
365
268
 
366
269
  `agentpack init` is the normal one-command project setup. It creates `.agentpack/` state and installs the detected agent integration. Re-run it any time; integration writes are idempotent and never clobber unrelated config.
@@ -477,7 +380,7 @@ Token counts use tiktoken `cl100k_base` — a close approximation to Claude's ac
477
380
 
478
381
  ## CI/CD: pack per PR
479
382
 
480
- ### agentpack's own CI
383
+ ### AgentPack's Own CI
481
384
 
482
385
  agentpack uses two workflows:
483
386
 
@@ -531,6 +434,35 @@ Reviewers download the artifact and open it in their agent of choice. No repo cl
531
434
 
532
435
  ## Commands
533
436
 
437
+ Most users only need four commands:
438
+
439
+ ```bash
440
+ agentpack init --agent codex
441
+ printf '%s\n' "describe the change" > .agentpack/task.md
442
+ agentpack pack
443
+ agentpack watch
444
+ agentpack doctor --agent all
445
+ ```
446
+
447
+ Command map:
448
+
449
+ | Command | Use when |
450
+ |---|---|
451
+ | `agentpack init` | Set up `.agentpack/` and install one agent integration for a repo |
452
+ | `agentpack install` | Refresh or add an agent integration without changing project state |
453
+ | `agentpack repair` | Restore missing or drifted integration files |
454
+ | `agentpack pack` | Generate a ranked context pack for one task |
455
+ | `agentpack watch` | Keep the context pack fresh while you work |
456
+ | `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
457
+ | `agentpack explain` | Understand why a file was selected or omitted |
458
+ | `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
459
+ | `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
460
+ | `agentpack status` | Inspect current pack freshness and metadata |
461
+ | `agentpack diff` | Show what changed between context snapshots |
462
+ | `agentpack monitor` | Review recent pack runs and quality signals |
463
+ | `agentpack scan` | Inspect packable, ignored, binary, and largest files |
464
+ | `agentpack global-install` | Install opt-in global hooks for initialized repos |
465
+
534
466
  ### `agentpack global-install`
535
467
 
536
468
  Install once — works in every repo from that point on. The recommended first step.
@@ -595,7 +527,7 @@ Example output:
595
527
 
596
528
  ```
597
529
  CLI
598
- ✓ agentpack found at /usr/local/bin/agentpack (0.1.0)
530
+ ✓ agentpack found at /usr/local/bin/agentpack (0.1.x)
599
531
 
600
532
  Git template hooks (~/.git-templates/hooks/)
601
533
  ✓ post-commit
@@ -710,17 +642,21 @@ Summaries are built with parallel AST/regex analysis — no network, no tokens s
710
642
 
711
643
  ### `agentpack pack`
712
644
 
713
- Generate a context pack.
645
+ Generate a context pack. Task text lives in `.agentpack/task.md`; inline task strings are no longer supported on `pack`. `--task auto` remains for old hooks and scripts, and is the default when the flag is omitted.
714
646
 
715
647
  ```bash
716
- agentpack pack --task "fix auth session bug" # auto-detects your IDE
717
- agentpack pack --agent claude --task "fix auth bug" # explicit agent
648
+ printf '%s\n' "fix auth session bug" > .agentpack/task.md
649
+ agentpack pack # auto-detects your IDE
650
+ agentpack pack --agent claude # explicit agent
651
+ agentpack pack --workspace apps/web
718
652
 
719
653
  # Only include changes since a git ref
720
- agentpack pack --task "review these changes" --since main
654
+ printf '%s\n' "review these changes" > .agentpack/task.md
655
+ agentpack pack --since main
721
656
 
722
657
  # Watch mode — re-packs on every file change
723
- agentpack pack --task "refactor auth" --session
658
+ printf '%s\n' "refactor auth" > .agentpack/task.md
659
+ agentpack pack --session
724
660
  ```
725
661
 
726
662
  Options:
@@ -728,9 +664,10 @@ Options:
728
664
  | Flag | Default | Description |
729
665
  |------|---------|-------------|
730
666
  | `--agent` | `auto` | Target agent (`auto` \| `claude` \| `cursor` \| `windsurf` \| `codex` \| `antigravity` \| `generic`). `auto` detects the active IDE from env and project files. |
731
- | `--task` | `auto` | Task description, or `auto` to infer from git |
667
+ | `--task` | `auto` | Backward-compatible task source. Only `auto` is supported; write task text to `.agentpack/task.md`. |
732
668
  | `--mode` | `balanced` | Budget mode: `minimal`, `balanced`, `deep` |
733
669
  | `--budget` | 0 (uses config default 25000) | Token budget |
670
+ | `--workspace` | — | Restrict packing to a monorepo workspace and write `.agentpack/workspaces/<workspace>/context.md` |
734
671
  | `--since` | — | Only include files changed since this git ref |
735
672
  | `--session` | off | Re-pack on every file change (watch mode) |
736
673
  | `--refresh` | off | Force rebuild summaries before packing |
@@ -773,18 +710,12 @@ agentpack quickstart --task "fix auth token expiry" --write
773
710
 
774
711
  ---
775
712
 
776
- ### `agentpack session` _(removed)_
777
-
778
- Session management was removed in v0.1.12. `agentpack init` bootstraps the session automatically. Use `agentpack watch` to keep context current. To change the task, edit `.agentpack/task.md`.
779
-
780
- ---
781
-
782
713
  ### `agentpack watch`
783
714
 
784
715
  Watch for file and task changes, refresh context automatically.
785
716
 
786
717
  ```bash
787
- agentpack watch # uses session agent/mode if session active
718
+ agentpack watch # refresh context on source/task changes
788
719
  agentpack watch --debounce 3.0 # wait 3s after last change before refresh
789
720
  ```
790
721
 
@@ -838,6 +769,10 @@ Register in Claude Code settings (`~/.claude/settings.json`):
838
769
  | `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack for a task. Returns packed markdown, truncated to `max_tokens` (default 20,000). |
839
770
  | `get_context()` | Return the latest pre-built pack instantly (no repack). Prepends a freshness/staleness header so you know if it's stale. |
840
771
  | `refresh()` | Refresh using the current `task.md` or git-inferred task. |
772
+ | `explain_file(path, task)` | Show score, inclusion mode, reasons, symbols, imports, and importers for one file. |
773
+ | `get_related_files(path, depth)` | Return import-graph neighbours and related tests for a file. |
774
+ | `get_delta_context(max_files)` | Return the latest selected-file delta plus top current selected files. Useful for cheap prompt-time refresh checks. |
775
+ | `get_stats()` | Return latest pack stats, savings, selection quality, excluded files, and benchmark-style signals. |
841
776
 
842
777
  **Staleness detection:** `get_context()` compares the snapshot hash from when the pack was built against the current repo snapshot. If files changed since last pack, it prepends:
843
778
  ```
@@ -893,9 +828,11 @@ Measure token efficiency, file selection quality, and speed across tasks.
893
828
  agentpack benchmark --task "fix auth token expiry" # single task
894
829
  agentpack benchmark --task "fix auth bug" --compare # compare minimal/balanced/deep
895
830
  agentpack benchmark --init # scaffold .agentpack/benchmark.toml
831
+ agentpack benchmark --results-template # scaffold publishable results note
896
832
  agentpack benchmark # run all cases in benchmark.toml
897
833
  agentpack benchmark --sample-fixtures # source checkout demo evals
898
834
  agentpack benchmark --misses # explain expected-file misses
835
+ agentpack benchmark --prove-targets # fail if recall/token precision targets miss
899
836
  ```
900
837
 
901
838
  Output per case:
@@ -936,6 +873,7 @@ Mode comparison: fix auth token expiry
936
873
  task = "fix auth token expiry"
937
874
  mode = "balanced"
938
875
  task_type = "backend-api"
876
+ workspace = "apps/api" # optional, for monorepos
939
877
  expected_files = [
940
878
  "src/auth/token.py",
941
879
  "src/auth/session.py",
@@ -949,6 +887,8 @@ expected_files = [
949
887
 
950
888
  Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
951
889
 
890
+ Use `--prove-targets` in CI or release prep when benchmark cases have `expected_files`. By default it requires average recall >=60% and token precision >=50%; tune with `--min-recall` and `--min-token-precision`.
891
+
952
892
  Add `task_type` to group results by workflow area. Benchmark summaries report average precision, recall, F1, and token noise by type, so a repo can show "backend-api is good, frontend-web is noisy" instead of hiding that under one aggregate.
953
893
 
954
894
  ---
@@ -957,6 +897,12 @@ Add `task_type` to group results by workflow area. Benchmark summaries report av
957
897
 
958
898
  Scan the repo and report file statistics.
959
899
 
900
+ ```bash
901
+ agentpack scan
902
+ agentpack scan --largest 20
903
+ agentpack scan --ignored-summary
904
+ ```
905
+
960
906
  ```
961
907
  Files discovered: 1,248
962
908
  Files ignored/binary: 230
@@ -965,6 +911,8 @@ Raw estimated tokens: 940,000
965
911
  Tokens after ignore: 210,000
966
912
  ```
967
913
 
914
+ Use `--largest` to find high-token files still entering packs. Use `--ignored-summary` when repo counts look surprising; it groups ignored and binary files by common directories or file extensions.
915
+
968
916
  ---
969
917
 
970
918
  ### `agentpack stats`
@@ -977,7 +925,7 @@ agentpack stats
977
925
 
978
926
  When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files from the latest pack and avg recall/precision/F1 over the last 10 runs.
979
927
 
980
- Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
928
+ Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." Context precision also credits obvious read-only support context, such as paired tests beside changed source files. `stats` breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible. In monorepos, it also reports selected-file distribution by workspace when workspace metadata exists.
981
929
 
982
930
  To build a real usefulness signal for your repo:
983
931
 
@@ -986,13 +934,31 @@ agentpack benchmark --sample-fixtures
986
934
 
987
935
  agentpack benchmark --init
988
936
  # edit .agentpack/benchmark.toml with real tasks + files you actually changed
989
- agentpack benchmark --compare --misses
937
+ agentpack benchmark --compare --misses --prove-targets
990
938
  ```
991
939
 
992
- `--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
940
+ `--sample-fixtures` runs bundled FastAPI, Next.js, mixed Python/TypeScript, Django REST-style, Go service, and Rails-style fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
993
941
 
994
942
  For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, `task_type` labels, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
995
943
 
944
+ See [benchmarks/README.md](benchmarks/README.md) for the public smoke-suite fixtures, quality gates, and the recommended miss-debugging workflow.
945
+
946
+ ---
947
+
948
+ ### `agentpack tune`
949
+
950
+ Turn noisy `stats` and `benchmark --misses` output into next actions.
951
+
952
+ ```bash
953
+ agentpack tune
954
+ agentpack tune --write
955
+ agentpack tune --no-benchmark
956
+ ```
957
+
958
+ `tune` reads `.agentpack/metrics.jsonl` and, when present, `.agentpack/benchmark_results.jsonl`. It flags low token precision, zero-value summaries, repeated noisy paths, support-context gaps, and benchmark miss patterns. `--write` saves the same guidance to `.agentpack/tuning.md`.
959
+
960
+ This command does not pretend a pack is correct. It gives the next thing to inspect: lower mode, explain noisy files, adjust `.agentignore`, add benchmark cases, or inspect budget/score misses.
961
+
996
962
  ---
997
963
 
998
964
  ### `agentpack status`
@@ -1039,23 +1005,20 @@ agentpack monitor --clear
1039
1005
  ## How it works
1040
1006
 
1041
1007
  ```
1042
- 1. Scan repo → apply .agentignore → hash every file
1043
- 2. Build current snapshotdiff against previous snapshot
1044
- 3. Get git changed/staged files (+ --since <ref> if specified)
1045
- 4. Build import dependency graph (Python/JS/TS: full; Go/Rust/Java: best-effort)
1046
- 5. Detect related test files
1047
- 6. Extract task keywords + concept synonym expansion
1048
- 7. Enrich keywords from changed file content (high-frequency identifiers)
1049
- 8. Score every file, rank by score
1050
- 9. Select within token budget
1051
- 10. For each selected file:
1052
- changed + smallfull content
1053
- changed + largesymbol bodies (ast.get_source_segment)
1054
- unchanged dep summary + signatures
1055
- low-score file summary only
1056
- 11. Generate context receipts (why each file included/excluded)
1057
- 12. Render markdown for target agent → save context pack
1058
- 13. Save snapshot + metadata + metrics
1008
+ 1. Scan repo → apply .agentignore → skip generated AgentPack outputs → hash files
1009
+ 2. Build offline summariesrole, imports, symbols, side effects, public API, errors, test hints
1010
+ 3. Build import dependency graph → Python/JS/TS full, Go/Rust/Java/Kotlin best-effort
1011
+ 4. Detect changed files → snapshot diff + git working tree + staged + optional --since ref
1012
+ 5. Classify task → bugfix / feature / docs / release / infra / audit / test / ui / refactor
1013
+ 6. Extract weighted task terms → literals, variants, concept synonyms, changed-file identifiers
1014
+ 7. Score every file → changes, task terms, symbols, content, deps, tests, configs, churn
1015
+ 8. Apply history learning → gently downrank files that were repeatedly selected as noise
1016
+ 9. Build semantic repo map → compact module/group map reserved inside the token budget
1017
+ 10. Select by value per token → full / diff / symbols / skeleton / summary / omit
1018
+ 11. For large diffsscore hunks against task keywords and keep the most relevant hunks
1019
+ 12. Redact secrets at materializationbefore content reaches any renderer or adapter
1020
+ 13. Render context freshness, task class, repo map, delta since last pack, receipts, files
1021
+ 14. Persist state adapter output, canonical .agentpack/context.md, snapshot, metadata, metrics
1059
1022
  ```
1060
1023
 
1061
1024
  ---
@@ -1188,7 +1151,11 @@ Works like `.gitignore`. Default rules exclude:
1188
1151
  └────────────────────┬────────────────────┘
1189
1152
 
1190
1153
  ┌────────────────────▼────────────────────┐
1191
- ANALYSIS LAYER
1154
+ SUMMARY + ANALYSIS LAYER
1155
+ │ │
1156
+ │ Summary cache ── role, imports, │
1157
+ │ (offline) symbols, side effects, │
1158
+ │ public API, errors │
1192
1159
  │ │
1193
1160
  │ Import graph ── Python AST │
1194
1161
  │ (6 languages) ─ JS/TS regex │
@@ -1205,16 +1172,7 @@ Works like `.gitignore`. Default rules exclude:
1205
1172
  │ Task keywords ── stopwords + variants│
1206
1173
  │ ── concept synonyms │
1207
1174
  │ ── content enrichment │
1208
- └────────────────────┬────────────────────┘
1209
-
1210
- ┌────────────────────▼────────────────────┐
1211
- │ SUMMARY CACHE (offline, local) │
1212
- │ │
1213
- │ key: path + hash + provider + schema │
1214
- │ hit → instant, zero I/O │
1215
- │ miss → build from AST/regex, cache it │
1216
- │ │
1217
- │ offline ── AST / regex extract │
1175
+ │ Task class ── bugfix/docs/release │
1218
1176
  └────────────────────┬────────────────────┘
1219
1177
 
1220
1178
  ┌────────────────────▼────────────────────┐
@@ -1236,17 +1194,27 @@ Works like `.gitignore`. Default rules exclude:
1236
1194
  │ +50 dep +40 rev-dep │
1237
1195
  │ +35 test +25 config +20 recent │
1238
1196
  │ -50 large unrelated │
1197
+ │ History noise penalty from metrics │
1198
+ └────────────────────┬────────────────────┘
1199
+
1200
+ ┌────────────────────▼────────────────────┐
1201
+ │ REPO MAP │
1202
+ │ │
1203
+ │ Compact semantic map grouped by module │
1204
+ │ Reserved inside the context budget │
1239
1205
  └────────────────────┬────────────────────┘
1240
1206
 
1241
1207
  ┌────────────────────▼────────────────────┐
1242
1208
  │ BUDGET SELECTION │
1243
1209
  │ │
1244
- │ Sort by score, consume until budget
1210
+ │ Sort by changed/task/value-per-token
1245
1211
  │ │
1246
1212
  │ changed + small ──▶ full content │
1247
- │ changed + large ──▶ symbol bodies
1248
- unchanged dep ──▶ summary + sigs
1249
- low score ──▶ summary only
1213
+ │ changed + large ──▶ task-scored diff
1214
+ task symbols ──▶ symbol bodies
1215
+ interface view ──▶ skeleton
1216
+ │ low context ──▶ summary/omit │
1217
+ │ budget fallback ──▶ downgrade first │
1250
1218
  └────────────────────┬────────────────────┘
1251
1219
 
1252
1220
  ┌────────────────────▼────────────────────┐
@@ -1259,6 +1227,8 @@ Works like `.gitignore`. Default rules exclude:
1259
1227
  │ Antigravity adapter ──▶ .agent/skills/agentpack/SKILL.md │
1260
1228
  │ Generic adapter ──▶ context.md │
1261
1229
  │ │
1230
+ │ Freshness + task class + repo map │
1231
+ │ Delta since last pack │
1262
1232
  │ Context receipts (why each file in/out)│
1263
1233
  │ Secret redaction (AWS/GH/OpenAI tokens)│
1264
1234
  └─────────────────────────────────────────┘
@@ -1274,16 +1244,16 @@ src/agentpack/
1274
1244
  agentpack.md # bundled /agentpack slash command for Claude CLI
1275
1245
 
1276
1246
  application/
1277
- pack_service.py # PackPlanner: shared scan→rank→select pipeline
1247
+ pack_service.py # PackPlanner: shared scan→summarize→graph→rank→repo_map→select pipeline
1278
1248
  # PackService: materializes plan → writes context file
1279
1249
  # AdapterRegistry: maps agent names to adapter instances
1280
1250
  # PackRequest / PackResult / PackPlan DTOs
1281
1251
 
1282
1252
  domain/ (via core/models.py)
1283
1253
  FileInfo, ScanResult # scan output (packable / ignored / binary)
1284
- Symbol, FileSummary # summary cache objects
1254
+ Symbol, FileSummary # summary cache objects (role, side_effects, public_api, errors, tests)
1285
1255
  SelectedFile, Receipt # selection output with redaction_warnings
1286
- ContextPack # final artifact with redaction_warnings
1256
+ ContextPack # final artifact with freshness, repo_map, delta_summary, redaction_warnings
1287
1257
  DependencyNode # typed graph node (path, imports, imported_by, tests)
1288
1258
  DependencyGraph # typed graph container (nodes dict + dict-like accessors)
1289
1259
 
@@ -1297,7 +1267,7 @@ src/agentpack/
1297
1267
  git.py # subprocess git + task inference from branch/commits
1298
1268
  merkle.py # root hash: sort(path:hash) → sha256
1299
1269
  cache.py # summary cache keyed path+hash+provider+version
1300
- context_pack.py # select_files: budget selection + secret redaction
1270
+ context_pack.py # select_files: full/diff/symbols/skeleton/summary + hunk scoring + redaction
1301
1271
  token_estimator.py # tiktoken cl100k_base (approximate)
1302
1272
  redactor.py # redact_secrets: fires at content materialization
1303
1273
  bootstrap.py # is_initialized, bootstrap_if_needed
@@ -1312,9 +1282,12 @@ src/agentpack/
1312
1282
  symbols.py # AST symbols + body via ast.get_source_segment
1313
1283
  tests.py # source → test file mapping heuristics
1314
1284
  ranking.py # keyword extraction, concept synonyms, scoring
1285
+ monorepo.py # workspace detection + workspace ownership helpers
1286
+ repo_map.py # compact semantic repo map reserved inside token budget
1287
+ task_classifier.py # coarse task class for freshness/rendering/scoring context
1315
1288
 
1316
1289
  summaries/
1317
- offline.py # zero-API: AST/regex → imports, symbols, summary
1290
+ offline.py # zero-API: AST/regex → imports, symbols, role, side effects, API, errors
1318
1291
  base.py # cache-or-build orchestration (parallel, ThreadPool+ProcessPool)
1319
1292
 
1320
1293
  adapters/ # context rendering only — no installation logic
@@ -1335,16 +1308,18 @@ src/agentpack/
1335
1308
  antigravity.py # AntigravityInstaller: GEMINI.md + auto-repack
1336
1309
 
1337
1310
  integrations/ # system/tool integration (not core domain)
1338
- agents.py # shared agent install/check contract
1311
+ agents.py # shared agent install/check/repair contract for all supported agents
1339
1312
  git_hooks.py # install/remove .git/hooks post-commit/merge/checkout
1340
1313
  vscode_tasks.py # install/remove .vscode/tasks.json entries
1341
1314
  global_install.py # global: git template hooks + shell rc hook
1342
1315
 
1343
1316
  renderers/
1344
- markdown.py # renders pre-redacted ContextPack to markdown
1317
+ markdown.py # renders pre-redacted ContextPack to markdown, including freshness/map/delta
1345
1318
  compact.py # compact protocol format for session context files
1346
1319
  receipts.py # context receipt formatter
1347
1320
 
1321
+ mcp_server.py # MCP tools: pack_context, get_context, explain, related, stats, delta
1322
+
1348
1323
  session/
1349
1324
  state.py # SessionState dataclass + load/save/create/stop helpers
1350
1325
  __init__.py # re-exports from state.py
@@ -1363,6 +1338,7 @@ src/agentpack/
1363
1338
  monitor.py # agentpack monitor
1364
1339
  explain.py # agentpack explain
1365
1340
  doctor.py # agentpack doctor
1341
+ tune.py # agentpack tune — tuning suggestions from metrics + benchmark misses
1366
1342
  hook_cmd.py # agentpack hook — Claude prompt hook + stale detection
1367
1343
  mcp_cmd.py # agentpack mcp — MCP server entrypoint
1368
1344
  watch.py # agentpack watch — file watcher with debounce
@@ -1374,212 +1350,21 @@ src/agentpack/
1374
1350
 
1375
1351
  - **Redaction at materialization**: secrets are stripped inside `select_files()` before content reaches any renderer or adapter. Every output format gets redacted content automatically — no per-renderer redaction needed.
1376
1352
  - **`ScanResult` splits cleanly**: `scan()` returns `ScanResult(packable, ignored, binary)` — downstream code only processes `packable` files, eliminating `if f.ignored or f.binary` guards throughout.
1377
- - **`PackPlanner` owns shared planning**: `PackPlanner.plan()` runs scan → summarize → graph → rank → select and returns a `PackPlan`. Both `pack` and `explain` use the same planner — no duplicated pipeline logic, no drift.
1378
- - **`PackService` materializes a plan**: takes a `PackPlan`, builds the `ContextPack` artifact, delegates rendering to `AdapterRegistry`, persists snapshot + metadata + metrics.
1353
+ - **`PackPlanner` owns shared planning**: `PackPlanner.plan()` runs scan → summarize → graph → changes → rank → repo map → select and returns a `PackPlan`. Both `pack` and `explain` use the same planner — no duplicated pipeline logic, no drift.
1354
+ - **`PackService` materializes a plan**: takes a `PackPlan`, computes delta since the previous pack, builds the `ContextPack` artifact, delegates rendering to `AdapterRegistry`, persists snapshot + metadata + metrics.
1355
+ - **Mode selection is value-aware**: changed files can be `full`, `diff`, `symbols`, `skeleton`, or `summary`. Large diffs keep task-relevant hunks first, and tight budgets downgrade files before dropping them.
1356
+ - **Repo maps are first-class context**: `analysis/repo_map.py` builds a compact semantic map before file context, and its token cost is reserved before file selection.
1357
+ - **Metrics feed history learning**: selection accuracy records hit/noise paths, token precision, mode counts, and mode tokens. Later packs gently penalize repeated noisy paths unless they are currently changed.
1358
+ - **Git history feeds recall**: files that historically changed in the same commits as live changed files receive a small boost, helping related tests, schemas, services, and configs surface without forcing full-content inclusion.
1359
+ - **Co-change is guarded by precision history**: one-off co-change neighbors are ignored, and paths repeatedly measured as noise do not get revived by history boosts.
1360
+ - **Precision guardrails adapt to bad history**: when summary token precision stays near zero, later packs raise the summary score floor, cap summaries more aggressively, and suppress summaries entirely for no-live-change packs. Weak filename-only matches are also damped unless other signals confirm them.
1379
1361
  - **`AdapterRegistry` maps agent → adapter**: adding a new agent output format requires one entry in `AdapterRegistry.get()`, not changes to `PackService`.
1380
1362
  - **`detect_agent()` runs at invocation time**: `--agent auto` (the default) calls `detect_agent()` fresh on every `pack` run and git hook execution — so context is always written for the active IDE, even when switching between agents or running in CI.
1381
1363
  - **`DependencyGraph` is typed**: `dependency_graph.build()` returns `DependencyGraph(nodes: dict[str, DependencyNode])` — no more `dict[str, dict]` with stringly-typed keys like `"imported_by"`. Typos are caught at the model layer.
1382
1364
  - **`integrations/` vs `core/`**: git hooks, shell rc patching, and VS Code tasks are infrastructure concerns — they live in `integrations/`, not `core/`. `core/` is pure domain logic.
1383
1365
  - **Adapters render; installers configure**: `adapters/` knows how to write a context file for an agent. `installers/` knows how to configure the agent's tool (CLAUDE.md, .cursorrules, settings.json). They are separate concerns and separate classes.
1384
-
1385
- ---
1386
-
1387
- ## Practical examples
1388
-
1389
- ### Bug fix: "I have a failing test, help me fix it"
1390
-
1391
- ```bash
1392
- # You're debugging a test failure in the auth module
1393
- agentpack pack --task "fix failing test in auth token validation"
1394
- ```
1395
-
1396
- AgentPack selects: the failing test file (modified), `auth/token.py` (dep), `auth/session.py` (dep), `config/settings.py` (config), skips 180 unrelated files. Your agent gets 12k tokens of precisely relevant context and starts debugging immediately.
1397
-
1398
- ---
1399
-
1400
- ### Feature: "Add rate limiting to the API"
1401
-
1402
- ```bash
1403
- # On a feature branch, nothing modified yet
1404
- agentpack pack --task "add rate limiting to REST API endpoints"
1405
- ```
1406
-
1407
- Keyword expansion activates: "rate limiting" → `throttle`, `leaky`, `bucket`, `quota`. AgentPack scores: `middleware/` directory (path keyword `api`), existing `throttle.py` or `leaky_bucket.py` (content keyword), `routes/*.py` (deps). Your agent gets the full middleware stack and starts implementing, not exploring.
1408
-
1409
- ---
1410
-
1411
- ### Code review: "Review my PR before I push"
1412
-
1413
- ```bash
1414
- # Review only what changed vs main
1415
- agentpack pack --task "code review auth refactor" --since main
1416
- ```
1417
-
1418
- Only files touched in this branch are included (full content). Everything else is summaries or omitted. Your agent reviews exactly the diff-visible code, not the whole codebase.
1419
-
1420
- ---
1421
-
1422
- ### Refactor: "Help me refactor the database layer"
1423
-
1424
- ```bash
1425
- agentpack pack --task "refactor database connection pooling" --mode deep
1426
- ```
1427
-
1428
- `--mode deep` adds: related docs, more full-content files, broader dep tree. Use when the task touches many files and you want your agent to see more context upfront.
1429
-
1430
- ---
1431
-
1432
- ### CI: automated context on every PR
1433
-
1434
- Add to `.github/workflows/agentpack-context.yml` — see the full example in [CI/CD: pack per PR](#cicd-pack-per-pr). Reviewers and CI bots get focused context without cloning the repo.
1435
-
1436
- ---
1437
-
1438
- ### Session mode: keep context fresh while you work
1439
-
1440
- ```bash
1441
- # One-time project setup
1442
- agentpack init # creates config/session/task.md + detected agent integration
1443
- # Edit .agentpack/task.md to set your task
1444
-
1445
- # Every terminal session — just one command
1446
- agentpack watch # keeps context fresh automatically
1447
-
1448
- # Change task mid-session: edit .agentpack/task.md directly
1449
- # watch detects the change and refreshes automatically
1450
- ```
1451
-
1452
- ---
1453
-
1454
- ### Debug why a file isn't showing up
1455
-
1456
- ```bash
1457
- agentpack explain --task "fix rate limiting in auth middleware"
1458
- # Top selected files:
1459
- # 1. src/auth/middleware.py score=180 [full] modified, filename keyword match
1460
- # 2. src/auth/limiter.py score=130 [symbols] dep + content keyword "throttle"
1461
- # ...
1462
- # Excluded:
1463
- # - src/payments/billing.py score=8 score too low
1464
- ```
1465
-
1466
- ---
1467
-
1468
- ## Tips & tricks
1469
-
1470
- ### Let `--task auto` do the work
1471
-
1472
- Skip writing a task description — agentpack infers it from your branch name, changed files, and recent commits:
1473
-
1474
- ```bash
1475
- agentpack pack --task auto
1476
- ```
1477
-
1478
- Priority order (strongest → weakest):
1479
-
1480
- | Source | Example output |
1481
- |--------|---------------|
1482
- | `task.md` (explicit) | `"migrate DB schema to multi-tenant"` |
1483
- | branch + staged files | `"feat add-rate-limiting: payments, throttle"` |
1484
- | staged files only | `"payments, throttle"` |
1485
- | branch + unstaged | `"feat add-rate-limiting: session, token"` |
1486
- | branch + latest commit | `"feat add-rate-limiting: fix token expiry check"` |
1487
- | branch name alone | `"feat add-rate-limiting"` |
1488
- | unstaged files | `"session, token"` |
1489
- | recent commit messages | `"fix token expiry check; add pagination"` |
1490
- | recently modified files | `"session, payments"` (noisy — last resort) |
1491
-
1492
- The heuristic that fired is logged: `Auto task (branch+staged): feat add-rate-limiting: payments`.
1493
-
1494
- The more descriptive your branch names (`feat/add-rate-limiting` beats `dev`) and the more you stage before running, the more accurate the inference.
1495
-
1496
- ### Concept synonym expansion
1497
-
1498
- AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`; domain terms such as `kundali` expand toward astrology/chart/compatibility terms. Files that implement a concept but don't use its exact name can still rank.
1499
-
1500
- ### Full-stack role boosts
1501
-
1502
- When a task points at a page, route, or API surface, AgentPack also gives a controlled boost to related implementation roles such as `service`, `controller`, `schema`, `handler`, `repository`, and `client`. This helps full-stack tasks pull backend implementation files instead of only frontend entrypoints.
1503
-
1504
- This is still heuristic. If a service should have appeared and did not, add it as an `expected_files` entry in `benchmark.toml` and run:
1505
-
1506
- ```bash
1507
- agentpack benchmark --compare --misses
1508
- ```
1509
-
1510
- ### Content-based keyword enrichment
1511
-
1512
- When you run `agentpack pack`, changed file content is scanned for high-frequency identifiers. If you're editing `session_manager.py` that mentions `validate_token` 30 times, `validate` and `token` are added as keywords — related files that use the same terms get a score boost even if your task string didn't mention them.
1513
-
1514
- ### Commit the summary cache for instant team packs
1515
-
1516
- ```bash
1517
- agentpack init --share-cache
1518
- git add .agentpack/cache/
1519
- git commit -m "chore: add agentpack summary cache"
1520
- ```
1521
-
1522
- Every teammate and CI job skips the summarize step. `agentpack pack` is significantly faster from a warm cache.
1523
-
1524
- ### Use `--since` for PR reviews
1525
-
1526
- ```bash
1527
- agentpack pack --task "review auth changes" --since main
1528
- ```
1529
-
1530
- Only includes files changed since `main`. Cuts out noise from unrelated work in long-running branches.
1531
-
1532
- ### Tune the budget for your use case
1533
-
1534
- ```bash
1535
- agentpack pack --task "fix bug" --mode minimal # changed files only, fewest tokens
1536
- agentpack pack --task "refactor" --mode deep # everything including docs
1537
- agentpack pack --task "fix bug" --budget 40000 # explicit token cap
1538
- ```
1539
-
1540
- `balanced` (default) is right for most tasks. Use `minimal` for quick fixes, `deep` when architectural context matters.
1541
-
1542
- ### Watch mode for active sessions
1543
-
1544
- ```bash
1545
- agentpack init # one-time setup (creates session/task.md + detected agent integration)
1546
- agentpack watch # in another terminal — auto-resumes each time
1547
- ```
1548
-
1549
- Refreshes `.agentpack/context.md` every time you save a file. Change the task by editing `.agentpack/task.md` directly — or tell Claude and it writes the file itself. `watch` picks up the change automatically.
1550
-
1551
- ### Debug file selection with `explain`
1552
-
1553
- ```bash
1554
- agentpack explain --task "fix auth session bug"
1555
- ```
1556
-
1557
- Shows ranked scores and reasons before committing to a pack. Use when a file you expect isn't appearing.
1558
-
1559
- For repeatable evals, prefer `benchmark --misses` because it compares selected files against the files you actually changed for historical tasks.
1560
-
1561
- ### Check what got included and why
1562
-
1563
- Every pack includes a context receipt explaining each file's inclusion or exclusion:
1564
-
1565
- ```
1566
- - `src/auth.py` included because modified, filename keyword match
1567
- - `tests/test_auth.py` summarized because test for src/auth.py
1568
- - `src/unrelated_big.py` excluded because score too low
1569
- ```
1570
-
1571
- Use this to tune your `.agentignore` or scoring weights when irrelevant files keep appearing.
1572
-
1573
- ### Tune scoring weights per project
1574
-
1575
- If tests are always irrelevant to your tasks, drop their weight. If config files are critical, raise them:
1576
-
1577
- ```toml
1578
- # .agentpack/config.toml
1579
- [scoring]
1580
- related_test = 5 # was 35 — tests rarely relevant
1581
- config_file = 60 # was 25 — configs always matter here
1582
- ```
1366
+ - **Agent integration contract is shared**: `integrations/agents.py` defines install, audit, and repair behavior for Claude, Cursor, Windsurf, Codex, Antigravity, and Generic. `install`, `repair`, `doctor --agent all`, and release verification use the same contract.
1367
+ - **MCP and hooks use deltas when possible**: MCP exposes `get_delta_context()`, and prompt hooks can emit task/top-file/delta hints instead of injecting the full context every time.
1583
1368
 
1584
1369
  ---
1585
1370
 
@@ -1597,7 +1382,8 @@ config_file = 60 # was 25 — configs always matter here
1597
1382
  ## Known limitations
1598
1383
 
1599
1384
  - **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
1600
- - **Monorepos**: single-root repos only. If you `agentpack pack` from a monorepo root, all packages are scanned together with no workspace awareness. Workaround: `cd packages/my-pkg && agentpack init && agentpack pack`.
1385
+ - **Monorepos**: workspace-aware ranking supports npm/pnpm, Cargo, and `go.work` layouts. `--workspace` creates filtered per-workspace outputs. Package dependency hints currently come from npm/pnpm `package.json`; Cargo/Go workspace membership is detected, but package-manager dependency edges for Cargo/Go are not yet modeled.
1386
+ - **Public benchmark proof**: source-checkout fixture results are useful regressions, not market proof. Use `agentpack benchmark --results-template` to publish real historical task results.
1601
1387
  - **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
1602
1388
  - **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
1603
1389
  - **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
@@ -1606,6 +1392,18 @@ config_file = 60 # was 25 — configs always matter here
1606
1392
 
1607
1393
  ---
1608
1394
 
1395
+ ## Roadmap
1396
+
1397
+ Next release target: **0.3.0 = public benchmark expansion + npm publish hardening**.
1398
+
1399
+ - Expand public source-checkout fixtures and publish reproducible `benchmark --sample-fixtures --compare --misses` output.
1400
+ - Raise recall on real historical tasks while keeping token precision healthy; target 60%+ recall, 50%+ token precision, and balanced packs under 25k tokens.
1401
+ - Improve second-pass expansion beyond current imports, reverse imports, related tests, historical co-change, and workspace hints with framework route/service/schema pairs.
1402
+ - Make MCP pull flows more prominent so agents can ask for `explain_file`, `get_related_files`, and `get_delta_context` instead of relying only on a static startup pack.
1403
+ - Keep integration contracts stable across Claude, Cursor, Windsurf, Codex, Antigravity, and Generic before any 1.0 work.
1404
+
1405
+ ---
1406
+
1609
1407
  ## Optional dependencies
1610
1408
 
1611
1409
  ```bash
@@ -1635,9 +1433,13 @@ python -m ruff check src tests
1635
1433
  python -m build
1636
1434
  npm test --prefix npm
1637
1435
  (cd npm && npm pack --dry-run)
1436
+ pytest tests/test_agent_integration_matrix.py -q
1638
1437
  agentpack benchmark --sample-fixtures --misses
1438
+ agentpack doctor
1639
1439
  ```
1640
1440
 
1441
+ For npm publish, configure GitHub secret `NPM_TOKEN`. `agentpack doctor` warns locally when neither `NPM_TOKEN` nor `NODE_AUTH_TOKEN` is present, and the npm publish workflow fails early with a clear error if the secret is missing.
1442
+
1641
1443
  Good contribution areas:
1642
1444
 
1643
1445
  - More real-world benchmark fixtures and public repo eval cases