agentpack-cli 0.1.21__tar.gz → 0.1.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/PKG-INFO +167 -20
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/README.md +165 -18
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/pyproject.toml +2 -2
- agentpack_cli-0.1.22/src/agentpack/__init__.py +3 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/ranking.py +89 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/application/pack_service.py +2 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/cli.py +2 -2
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/benchmark.py +231 -3
- agentpack_cli-0.1.22/src/agentpack/commands/quickstart.py +112 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/config.py +4 -0
- agentpack_cli-0.1.21/src/agentpack/__init__.py +0 -3
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/.gitignore +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/LICENSE +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/dependency_graph.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/symbols.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/doctor.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/explain.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/install.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/mcp_cmd.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/pack.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/stats.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/cache.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/context_pack.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/git.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/models.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/antigravity.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/claude.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/codex.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/cursor.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/global_install.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/mcp_server.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/markdown.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/base.py +0 -0
- {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/offline.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.22
|
|
4
|
+
Summary: Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Keywords: ai,claude,codex,coding-agent,context,cursor,llm,packing,windsurf
|
|
@@ -44,11 +44,27 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
[](https://opensource.org/licenses/MIT)
|
|
45
45
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
46
46
|
|
|
47
|
-
> **Status: alpha (v0.1.
|
|
47
|
+
> **Status: alpha (v0.1.22).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
48
48
|
>
|
|
49
49
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
50
50
|
|
|
51
|
-
**
|
|
51
|
+
**Task-aware context packing for AI coding agents.**
|
|
52
|
+
|
|
53
|
+
AgentPack scans a repository, ranks files for the task you are working on, and writes a compact markdown context pack for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, or any LLM workflow.
|
|
54
|
+
|
|
55
|
+
It is useful when the repo is too large to paste, but you still want the agent to start with more than a blank slate.
|
|
56
|
+
|
|
57
|
+
**What it is**
|
|
58
|
+
- A local CLI for building task-focused context packs
|
|
59
|
+
- A summary cache, import graph, ranking engine, and token-budget selector
|
|
60
|
+
- Optional integrations for popular coding agents
|
|
61
|
+
- An eval harness for measuring whether selected files match files you actually changed
|
|
62
|
+
|
|
63
|
+
**What it is not**
|
|
64
|
+
- Not a coding agent
|
|
65
|
+
- Not a semantic code search engine
|
|
66
|
+
- Not a replacement for manual inspection on high-stakes changes
|
|
67
|
+
- Not yet proven across a large public benchmark suite
|
|
52
68
|
|
|
53
69
|
---
|
|
54
70
|
|
|
@@ -71,16 +87,19 @@ None of these scale. On a 200-file codebase, option 1 wastes 5–10 turns just o
|
|
|
71
87
|
AgentPack solves this with a one-time offline analysis pass:
|
|
72
88
|
|
|
73
89
|
1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
|
|
74
|
-
2. **On each task** — uses git diff
|
|
75
|
-
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped.
|
|
76
|
-
4. **Explains pack quality** — noisy-pack diagnostics, score receipts,
|
|
90
|
+
2. **On each task** — uses git diff, import graph traversal, keyword/concept expansion, implementation-role boosts, and cross-layer relatedness to rank every file.
|
|
91
|
+
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies and likely implementation files get summaries, everything else gets dropped.
|
|
92
|
+
4. **Explains pack quality** — noisy-pack diagnostics, score receipts, token-precision metrics, and benchmark miss reports show when the pack is broad or missing expected files.
|
|
77
93
|
5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
|
|
78
94
|
|
|
79
|
-
The result: your agent starts
|
|
95
|
+
The result: your agent starts with a focused map of the relevant code. It should reduce blind exploration, not replace the agent's own file reads or your judgment.
|
|
80
96
|
|
|
81
97
|
```bash
|
|
82
98
|
pip install agentpack-cli
|
|
83
99
|
|
|
100
|
+
# Show the fastest path for your repo
|
|
101
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
102
|
+
|
|
84
103
|
# One-time setup per project
|
|
85
104
|
cd your-project
|
|
86
105
|
agentpack init # creates config, session, task.md — nothing else needed
|
|
@@ -100,13 +119,53 @@ agentpack global-install --dry-run # preview first
|
|
|
100
119
|
agentpack global-install
|
|
101
120
|
```
|
|
102
121
|
|
|
103
|
-
Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM.
|
|
122
|
+
Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM that can read markdown.
|
|
104
123
|
|
|
105
124
|
---
|
|
106
125
|
|
|
107
|
-
##
|
|
126
|
+
## What to expect
|
|
127
|
+
|
|
128
|
+
AgentPack's strongest value is repeatable orientation: it gives the agent a compact first-pass map before tool calls begin.
|
|
129
|
+
|
|
130
|
+
Typical results on large repos:
|
|
131
|
+
|
|
132
|
+
| Signal | What good looks like |
|
|
133
|
+
|---|---|
|
|
134
|
+
| Token reduction | 90-99% smaller than raw repo text |
|
|
135
|
+
| Pack size | Usually 8k-25k tokens for a specific task |
|
|
136
|
+
| Pack time | Seconds on warm cache; first summarize pass is slower |
|
|
137
|
+
| Recall | Should be high for files you later edit; validate with `agentpack benchmark` |
|
|
138
|
+
| Precision | Often modest; summaries are cheap but can still add noise |
|
|
108
139
|
|
|
109
|
-
|
|
140
|
+
The compression number is easy to verify, but it is not the same as usefulness. The important question is: **did AgentPack include the files you actually needed?**
|
|
141
|
+
|
|
142
|
+
Use the built-in eval flow:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
agentpack benchmark --init
|
|
146
|
+
# add real historical tasks and files you actually changed
|
|
147
|
+
agentpack benchmark --compare --misses
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
For source checkouts, there is also a small smoke suite:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
agentpack benchmark --sample-fixtures --misses
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
This runs FastAPI, Next.js, and mixed Python/TypeScript fixture tasks. It is a sanity check, not a substitute for real repo evals.
|
|
157
|
+
|
|
158
|
+
### Current quality bar
|
|
159
|
+
|
|
160
|
+
AgentPack is best described as a **map, not a compass**. It is already good at token reduction, changed-file inclusion, related tests, imports, configs, and common concepts like auth/cache/rate limiting. Recent ranking work also improves full-stack tasks by pulling service/controller/schema/handler files when UI routes or pages match the same domain.
|
|
161
|
+
|
|
162
|
+
Known weak spot: recall can still be low on unfamiliar product domains or cross-language flows. Use `benchmark --misses` and `agentpack explain` when an expected file is absent. Those commands show whether the miss was caused by ignore rules, low score, summary floor, budget cutoff, or missing task signal.
|
|
163
|
+
|
|
164
|
+
### Observed author-run numbers
|
|
165
|
+
|
|
166
|
+
These are local author-session numbers, included as anecdotal context rather than a benchmark claim.
|
|
167
|
+
|
|
168
|
+
#### Token Compression
|
|
110
169
|
|
|
111
170
|
| Metric | Value |
|
|
112
171
|
|--------|-------|
|
|
@@ -118,7 +177,7 @@ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigr
|
|
|
118
177
|
|
|
119
178
|
Per session: ~4.1M raw repo → ~35K packed context.
|
|
120
179
|
|
|
121
|
-
|
|
180
|
+
#### Cost (Sonnet 4.6, input tokens only)
|
|
122
181
|
|
|
123
182
|
| Scenario | Cost |
|
|
124
183
|
|----------|------|
|
|
@@ -128,7 +187,7 @@ Per session: ~4.1M raw repo → ~35K packed context.
|
|
|
128
187
|
|
|
129
188
|
> Honest note: raw_tokens = full repo estimate. Real savings depend on how much context you'd pass manually. Compression ratio (99%+) is verifiable; dollar figure is scenario-dependent.
|
|
130
189
|
|
|
131
|
-
|
|
190
|
+
#### Quality Signal
|
|
132
191
|
|
|
133
192
|
- 42 commits in 7 days (~6/day) vs 4.9/day before
|
|
134
193
|
- Shift from single-file fixes → multi-system coordinated fixes
|
|
@@ -202,9 +261,10 @@ _*`--agent generic` outputs standard markdown. Claude adapter has richer instruc
|
|
|
202
261
|
|
|
203
262
|
### What agentpack does NOT do well
|
|
204
263
|
|
|
205
|
-
- **Interactive sessions on small repos**: if your whole repo is <20k tokens,
|
|
206
|
-
- **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for
|
|
207
|
-
- **
|
|
264
|
+
- **Interactive sessions on small repos**: if your whole repo is <20k tokens, a simple repo dump may be enough
|
|
265
|
+
- **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for quick read-only exploration
|
|
266
|
+
- **Guaranteed source-of-truth selection**: AgentPack ranks likely files; it can miss task-critical files. Use `agentpack benchmark --misses`, `agentpack explain`, and normal `rg`/agent file reads for correctness.
|
|
267
|
+
- **Deep semantic understanding**: keyword/concept scoring, imports, symbols, and path roles help, but they are not an LLM-level code understanding system
|
|
208
268
|
|
|
209
269
|
---
|
|
210
270
|
|
|
@@ -222,6 +282,14 @@ Requires Python 3.10+.
|
|
|
222
282
|
|
|
223
283
|
## Start Once, Then Work Normally
|
|
224
284
|
|
|
285
|
+
For a guided two-minute path in any repo:
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
It shows the exact commands to initialize, set task text, generate a first pack, inspect stats, start watch mode, and scaffold a small benchmark file for your own tasks.
|
|
292
|
+
|
|
225
293
|
The full workflow:
|
|
226
294
|
|
|
227
295
|
```bash
|
|
@@ -642,6 +710,20 @@ Options:
|
|
|
642
710
|
|
|
643
711
|
---
|
|
644
712
|
|
|
713
|
+
### `agentpack quickstart`
|
|
714
|
+
|
|
715
|
+
Show the shortest useful path for the current repo.
|
|
716
|
+
|
|
717
|
+
```bash
|
|
718
|
+
agentpack quickstart
|
|
719
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
720
|
+
agentpack quickstart --task "fix auth token expiry" --write
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
`quickstart` does not guess at magic. It checks whether `.agentpack/config.toml`, `.agentpack/task.md`, and context packs exist, then prints the next few commands. With `--write`, it writes the supplied task into `.agentpack/task.md`.
|
|
724
|
+
|
|
725
|
+
---
|
|
726
|
+
|
|
645
727
|
### `agentpack session` _(removed)_
|
|
646
728
|
|
|
647
729
|
Session management was removed in v0.1.12. `agentpack init` bootstraps the session automatically. Use `agentpack watch` to keep context current. To change the task, edit `.agentpack/task.md`.
|
|
@@ -762,6 +844,8 @@ agentpack benchmark --task "fix auth token expiry" # single task
|
|
|
762
844
|
agentpack benchmark --task "fix auth bug" --compare # compare minimal/balanced/deep
|
|
763
845
|
agentpack benchmark --init # scaffold .agentpack/benchmark.toml
|
|
764
846
|
agentpack benchmark # run all cases in benchmark.toml
|
|
847
|
+
agentpack benchmark --sample-fixtures # source checkout demo evals
|
|
848
|
+
agentpack benchmark --misses # explain expected-file misses
|
|
765
849
|
```
|
|
766
850
|
|
|
767
851
|
Output per case:
|
|
@@ -812,6 +896,8 @@ expected_files = [
|
|
|
812
896
|
hit: src/auth/session.py, src/auth/token.py
|
|
813
897
|
```
|
|
814
898
|
|
|
899
|
+
Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
|
|
900
|
+
|
|
815
901
|
---
|
|
816
902
|
|
|
817
903
|
### `agentpack scan`
|
|
@@ -840,6 +926,20 @@ When a session is active, shows session panel (agent, mode, started, refresh cou
|
|
|
840
926
|
|
|
841
927
|
Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
|
|
842
928
|
|
|
929
|
+
To build a real usefulness signal for your repo:
|
|
930
|
+
|
|
931
|
+
```bash
|
|
932
|
+
agentpack benchmark --sample-fixtures
|
|
933
|
+
|
|
934
|
+
agentpack benchmark --init
|
|
935
|
+
# edit .agentpack/benchmark.toml with real tasks + files you actually changed
|
|
936
|
+
agentpack benchmark --compare --misses
|
|
937
|
+
```
|
|
938
|
+
|
|
939
|
+
`--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
|
|
940
|
+
|
|
941
|
+
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
942
|
+
|
|
843
943
|
---
|
|
844
944
|
|
|
845
945
|
### `agentpack status`
|
|
@@ -1189,6 +1289,7 @@ src/agentpack/
|
|
|
1189
1289
|
pack.py # agentpack pack → PackService.run()
|
|
1190
1290
|
install.py # agentpack install / global-install → installers/
|
|
1191
1291
|
init.py # agentpack init
|
|
1292
|
+
quickstart.py # agentpack quickstart — guided first-run commands
|
|
1192
1293
|
scan.py # agentpack scan
|
|
1193
1294
|
diff.py # agentpack diff
|
|
1194
1295
|
status.py # agentpack status
|
|
@@ -1197,10 +1298,11 @@ src/agentpack/
|
|
|
1197
1298
|
monitor.py # agentpack monitor
|
|
1198
1299
|
explain.py # agentpack explain
|
|
1199
1300
|
doctor.py # agentpack doctor
|
|
1200
|
-
|
|
1301
|
+
hook_cmd.py # agentpack hook — Claude prompt hook + stale detection
|
|
1302
|
+
mcp_cmd.py # agentpack mcp — MCP server entrypoint
|
|
1201
1303
|
watch.py # agentpack watch — file watcher with debounce
|
|
1202
1304
|
claude_cmd.py # agentpack claude — refresh + launch claude
|
|
1203
|
-
benchmark.py # agentpack benchmark — token efficiency
|
|
1305
|
+
benchmark.py # agentpack benchmark — token efficiency, recall, miss diagnostics
|
|
1204
1306
|
```
|
|
1205
1307
|
|
|
1206
1308
|
### Key architectural properties
|
|
@@ -1328,7 +1430,17 @@ The more descriptive your branch names (`feat/add-rate-limiting` beats `dev`) an
|
|
|
1328
1430
|
|
|
1329
1431
|
### Concept synonym expansion
|
|
1330
1432
|
|
|
1331
|
-
AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl
|
|
1433
|
+
AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`; domain terms such as `kundali` expand toward astrology/chart/compatibility terms. Files that implement a concept but don't use its exact name can still rank.
|
|
1434
|
+
|
|
1435
|
+
### Full-stack role boosts
|
|
1436
|
+
|
|
1437
|
+
When a task points at a page, route, or API surface, AgentPack also gives a controlled boost to related implementation roles such as `service`, `controller`, `schema`, `handler`, `repository`, and `client`. This helps full-stack tasks pull backend implementation files instead of only frontend entrypoints.
|
|
1438
|
+
|
|
1439
|
+
This is still heuristic. If a service should have appeared and did not, add it as an `expected_files` entry in `benchmark.toml` and run:
|
|
1440
|
+
|
|
1441
|
+
```bash
|
|
1442
|
+
agentpack benchmark --compare --misses
|
|
1443
|
+
```
|
|
1332
1444
|
|
|
1333
1445
|
### Content-based keyword enrichment
|
|
1334
1446
|
|
|
@@ -1379,6 +1491,8 @@ agentpack explain --task "fix auth session bug"
|
|
|
1379
1491
|
|
|
1380
1492
|
Shows ranked scores and reasons before committing to a pack. Use when a file you expect isn't appearing.
|
|
1381
1493
|
|
|
1494
|
+
For repeatable evals, prefer `benchmark --misses` because it compares selected files against the files you actually changed for historical tasks.
|
|
1495
|
+
|
|
1382
1496
|
### Check what got included and why
|
|
1383
1497
|
|
|
1384
1498
|
Every pack includes a context receipt explaining each file's inclusion or exclusion:
|
|
@@ -1410,7 +1524,8 @@ config_file = 60 # was 25 — configs always matter here
|
|
|
1410
1524
|
- **Non-destructive**: never overwrites user files; config patching only touches agentpack-managed blocks
|
|
1411
1525
|
- **Agent-neutral**: architecture is generic; Claude Code is the primary target (deepest integration); Cursor, Windsurf, Codex, and Antigravity are supported but less battle-tested
|
|
1412
1526
|
- **No daemons**: file watching is opt-in via `agentpack watch`; git hooks run in the background and are opt-in via `install`
|
|
1413
|
-
- **
|
|
1527
|
+
- **Measurable**: `benchmark`, `stats`, receipts, and `--misses` are first-class because compression without recall is not enough
|
|
1528
|
+
- **Honest**: packed token count reflects real content, and raw-repo savings are presented separately from practical usefulness
|
|
1414
1529
|
|
|
1415
1530
|
---
|
|
1416
1531
|
|
|
@@ -1419,6 +1534,7 @@ config_file = 60 # was 25 — configs always matter here
|
|
|
1419
1534
|
- **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
|
|
1420
1535
|
- **Monorepos**: single-root repos only. If you `agentpack pack` from a monorepo root, all packages are scanned together with no workspace awareness. Workaround: `cd packages/my-pkg && agentpack init && agentpack pack`.
|
|
1421
1536
|
- **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
|
|
1537
|
+
- **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
|
|
1422
1538
|
- **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
|
|
1423
1539
|
- **Token estimates**: uses tiktoken `cl100k_base` — approximate, not exact for Claude's billing.
|
|
1424
1540
|
- **Large repos (>5k files)**: global auto-bootstrap is skipped for repos over 5,000 files to avoid hangs. Run `agentpack init` explicitly in large codebases.
|
|
@@ -1435,6 +1551,37 @@ pip install "agentpack-cli[all]" # watch + mcp
|
|
|
1435
1551
|
|
|
1436
1552
|
---
|
|
1437
1553
|
|
|
1554
|
+
## Development
|
|
1555
|
+
|
|
1556
|
+
Clone and run locally:
|
|
1557
|
+
|
|
1558
|
+
```bash
|
|
1559
|
+
git clone https://github.com/vishal2612200/agentpack.git
|
|
1560
|
+
cd agentpack
|
|
1561
|
+
python -m pip install -e ".[dev,watch,mcp]" build
|
|
1562
|
+
pytest
|
|
1563
|
+
```
|
|
1564
|
+
|
|
1565
|
+
Useful checks before opening a PR:
|
|
1566
|
+
|
|
1567
|
+
```bash
|
|
1568
|
+
pytest
|
|
1569
|
+
python -m build
|
|
1570
|
+
agentpack benchmark --sample-fixtures --misses
|
|
1571
|
+
```
|
|
1572
|
+
|
|
1573
|
+
Good contribution areas:
|
|
1574
|
+
|
|
1575
|
+
- More real-world benchmark fixtures and public repo eval cases
|
|
1576
|
+
- Windows support for hooks and session integrations
|
|
1577
|
+
- Better symbol extraction for Go, Rust, Java, and Kotlin
|
|
1578
|
+
- More precise import/dependency resolution for framework-heavy repos
|
|
1579
|
+
- Ranking regressions with `expected_files` cases that reproduce misses
|
|
1580
|
+
|
|
1581
|
+
Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
|
|
1582
|
+
|
|
1583
|
+
---
|
|
1584
|
+
|
|
1438
1585
|
## License
|
|
1439
1586
|
|
|
1440
1587
|
MIT
|
|
@@ -5,11 +5,27 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
> **Status: alpha (v0.1.
|
|
8
|
+
> **Status: alpha (v0.1.22).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
9
9
|
>
|
|
10
10
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
11
11
|
|
|
12
|
-
**
|
|
12
|
+
**Task-aware context packing for AI coding agents.**
|
|
13
|
+
|
|
14
|
+
AgentPack scans a repository, ranks files for the task you are working on, and writes a compact markdown context pack for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, or any LLM workflow.
|
|
15
|
+
|
|
16
|
+
It is useful when the repo is too large to paste, but you still want the agent to start with more than a blank slate.
|
|
17
|
+
|
|
18
|
+
**What it is**
|
|
19
|
+
- A local CLI for building task-focused context packs
|
|
20
|
+
- A summary cache, import graph, ranking engine, and token-budget selector
|
|
21
|
+
- Optional integrations for popular coding agents
|
|
22
|
+
- An eval harness for measuring whether selected files match files you actually changed
|
|
23
|
+
|
|
24
|
+
**What it is not**
|
|
25
|
+
- Not a coding agent
|
|
26
|
+
- Not a semantic code search engine
|
|
27
|
+
- Not a replacement for manual inspection on high-stakes changes
|
|
28
|
+
- Not yet proven across a large public benchmark suite
|
|
13
29
|
|
|
14
30
|
---
|
|
15
31
|
|
|
@@ -32,16 +48,19 @@ None of these scale. On a 200-file codebase, option 1 wastes 5–10 turns just o
|
|
|
32
48
|
AgentPack solves this with a one-time offline analysis pass:
|
|
33
49
|
|
|
34
50
|
1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
|
|
35
|
-
2. **On each task** — uses git diff
|
|
36
|
-
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped.
|
|
37
|
-
4. **Explains pack quality** — noisy-pack diagnostics, score receipts,
|
|
51
|
+
2. **On each task** — uses git diff, import graph traversal, keyword/concept expansion, implementation-role boosts, and cross-layer relatedness to rank every file.
|
|
52
|
+
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies and likely implementation files get summaries, everything else gets dropped.
|
|
53
|
+
4. **Explains pack quality** — noisy-pack diagnostics, score receipts, token-precision metrics, and benchmark miss reports show when the pack is broad or missing expected files.
|
|
38
54
|
5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
|
|
39
55
|
|
|
40
|
-
The result: your agent starts
|
|
56
|
+
The result: your agent starts with a focused map of the relevant code. It should reduce blind exploration, not replace the agent's own file reads or your judgment.
|
|
41
57
|
|
|
42
58
|
```bash
|
|
43
59
|
pip install agentpack-cli
|
|
44
60
|
|
|
61
|
+
# Show the fastest path for your repo
|
|
62
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
63
|
+
|
|
45
64
|
# One-time setup per project
|
|
46
65
|
cd your-project
|
|
47
66
|
agentpack init # creates config, session, task.md — nothing else needed
|
|
@@ -61,13 +80,53 @@ agentpack global-install --dry-run # preview first
|
|
|
61
80
|
agentpack global-install
|
|
62
81
|
```
|
|
63
82
|
|
|
64
|
-
Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM.
|
|
83
|
+
Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM that can read markdown.
|
|
65
84
|
|
|
66
85
|
---
|
|
67
86
|
|
|
68
|
-
##
|
|
87
|
+
## What to expect
|
|
88
|
+
|
|
89
|
+
AgentPack's strongest value is repeatable orientation: it gives the agent a compact first-pass map before tool calls begin.
|
|
90
|
+
|
|
91
|
+
Typical results on large repos:
|
|
92
|
+
|
|
93
|
+
| Signal | What good looks like |
|
|
94
|
+
|---|---|
|
|
95
|
+
| Token reduction | 90-99% smaller than raw repo text |
|
|
96
|
+
| Pack size | Usually 8k-25k tokens for a specific task |
|
|
97
|
+
| Pack time | Seconds on warm cache; first summarize pass is slower |
|
|
98
|
+
| Recall | Should be high for files you later edit; validate with `agentpack benchmark` |
|
|
99
|
+
| Precision | Often modest; summaries are cheap but can still add noise |
|
|
69
100
|
|
|
70
|
-
|
|
101
|
+
The compression number is easy to verify, but it is not the same as usefulness. The important question is: **did AgentPack include the files you actually needed?**
|
|
102
|
+
|
|
103
|
+
Use the built-in eval flow:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
agentpack benchmark --init
|
|
107
|
+
# add real historical tasks and files you actually changed
|
|
108
|
+
agentpack benchmark --compare --misses
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
For source checkouts, there is also a small smoke suite:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
agentpack benchmark --sample-fixtures --misses
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
This runs FastAPI, Next.js, and mixed Python/TypeScript fixture tasks. It is a sanity check, not a substitute for real repo evals.
|
|
118
|
+
|
|
119
|
+
### Current quality bar
|
|
120
|
+
|
|
121
|
+
AgentPack is best described as a **map, not a compass**. It is already good at token reduction, changed-file inclusion, related tests, imports, configs, and common concepts like auth/cache/rate limiting. Recent ranking work also improves full-stack tasks by pulling service/controller/schema/handler files when UI routes or pages match the same domain.
|
|
122
|
+
|
|
123
|
+
Known weak spot: recall can still be low on unfamiliar product domains or cross-language flows. Use `benchmark --misses` and `agentpack explain` when an expected file is absent. Those commands show whether the miss was caused by ignore rules, low score, summary floor, budget cutoff, or missing task signal.
|
|
124
|
+
|
|
125
|
+
### Observed author-run numbers
|
|
126
|
+
|
|
127
|
+
These are local author-session numbers, included as anecdotal context rather than a benchmark claim.
|
|
128
|
+
|
|
129
|
+
#### Token Compression
|
|
71
130
|
|
|
72
131
|
| Metric | Value |
|
|
73
132
|
|--------|-------|
|
|
@@ -79,7 +138,7 @@ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigr
|
|
|
79
138
|
|
|
80
139
|
Per session: ~4.1M raw repo → ~35K packed context.
|
|
81
140
|
|
|
82
|
-
|
|
141
|
+
#### Cost (Sonnet 4.6, input tokens only)
|
|
83
142
|
|
|
84
143
|
| Scenario | Cost |
|
|
85
144
|
|----------|------|
|
|
@@ -89,7 +148,7 @@ Per session: ~4.1M raw repo → ~35K packed context.
|
|
|
89
148
|
|
|
90
149
|
> Honest note: raw_tokens = full repo estimate. Real savings depend on how much context you'd pass manually. Compression ratio (99%+) is verifiable; dollar figure is scenario-dependent.
|
|
91
150
|
|
|
92
|
-
|
|
151
|
+
#### Quality Signal
|
|
93
152
|
|
|
94
153
|
- 42 commits in 7 days (~6/day) vs 4.9/day before
|
|
95
154
|
- Shift from single-file fixes → multi-system coordinated fixes
|
|
@@ -163,9 +222,10 @@ _*`--agent generic` outputs standard markdown. Claude adapter has richer instruc
|
|
|
163
222
|
|
|
164
223
|
### What agentpack does NOT do well
|
|
165
224
|
|
|
166
|
-
- **Interactive sessions on small repos**: if your whole repo is <20k tokens,
|
|
167
|
-
- **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for
|
|
168
|
-
- **
|
|
225
|
+
- **Interactive sessions on small repos**: if your whole repo is <20k tokens, a simple repo dump may be enough
|
|
226
|
+
- **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for quick read-only exploration
|
|
227
|
+
- **Guaranteed source-of-truth selection**: AgentPack ranks likely files; it can miss task-critical files. Use `agentpack benchmark --misses`, `agentpack explain`, and normal `rg`/agent file reads for correctness.
|
|
228
|
+
- **Deep semantic understanding**: keyword/concept scoring, imports, symbols, and path roles help, but they are not an LLM-level code understanding system
|
|
169
229
|
|
|
170
230
|
---
|
|
171
231
|
|
|
@@ -183,6 +243,14 @@ Requires Python 3.10+.
|
|
|
183
243
|
|
|
184
244
|
## Start Once, Then Work Normally
|
|
185
245
|
|
|
246
|
+
For a guided two-minute path in any repo:
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
It shows the exact commands to initialize, set task text, generate a first pack, inspect stats, start watch mode, and scaffold a small benchmark file for your own tasks.
|
|
253
|
+
|
|
186
254
|
The full workflow:
|
|
187
255
|
|
|
188
256
|
```bash
|
|
@@ -603,6 +671,20 @@ Options:
|
|
|
603
671
|
|
|
604
672
|
---
|
|
605
673
|
|
|
674
|
+
### `agentpack quickstart`
|
|
675
|
+
|
|
676
|
+
Show the shortest useful path for the current repo.
|
|
677
|
+
|
|
678
|
+
```bash
|
|
679
|
+
agentpack quickstart
|
|
680
|
+
agentpack quickstart --task "fix auth token expiry"
|
|
681
|
+
agentpack quickstart --task "fix auth token expiry" --write
|
|
682
|
+
```
|
|
683
|
+
|
|
684
|
+
`quickstart` does not guess at magic. It checks whether `.agentpack/config.toml`, `.agentpack/task.md`, and context packs exist, then prints the next few commands. With `--write`, it writes the supplied task into `.agentpack/task.md`.
|
|
685
|
+
|
|
686
|
+
---
|
|
687
|
+
|
|
606
688
|
### `agentpack session` _(removed)_
|
|
607
689
|
|
|
608
690
|
Session management was removed in v0.1.12. `agentpack init` bootstraps the session automatically. Use `agentpack watch` to keep context current. To change the task, edit `.agentpack/task.md`.
|
|
@@ -723,6 +805,8 @@ agentpack benchmark --task "fix auth token expiry" # single task
|
|
|
723
805
|
agentpack benchmark --task "fix auth bug" --compare # compare minimal/balanced/deep
|
|
724
806
|
agentpack benchmark --init # scaffold .agentpack/benchmark.toml
|
|
725
807
|
agentpack benchmark # run all cases in benchmark.toml
|
|
808
|
+
agentpack benchmark --sample-fixtures # source checkout demo evals
|
|
809
|
+
agentpack benchmark --misses # explain expected-file misses
|
|
726
810
|
```
|
|
727
811
|
|
|
728
812
|
Output per case:
|
|
@@ -773,6 +857,8 @@ expected_files = [
|
|
|
773
857
|
hit: src/auth/session.py, src/auth/token.py
|
|
774
858
|
```
|
|
775
859
|
|
|
860
|
+
Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
|
|
861
|
+
|
|
776
862
|
---
|
|
777
863
|
|
|
778
864
|
### `agentpack scan`
|
|
@@ -801,6 +887,20 @@ When a session is active, shows session panel (agent, mode, started, refresh cou
|
|
|
801
887
|
|
|
802
888
|
Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
|
|
803
889
|
|
|
890
|
+
To build a real usefulness signal for your repo:
|
|
891
|
+
|
|
892
|
+
```bash
|
|
893
|
+
agentpack benchmark --sample-fixtures
|
|
894
|
+
|
|
895
|
+
agentpack benchmark --init
|
|
896
|
+
# edit .agentpack/benchmark.toml with real tasks + files you actually changed
|
|
897
|
+
agentpack benchmark --compare --misses
|
|
898
|
+
```
|
|
899
|
+
|
|
900
|
+
`--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
|
|
901
|
+
|
|
902
|
+
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
903
|
+
|
|
804
904
|
---
|
|
805
905
|
|
|
806
906
|
### `agentpack status`
|
|
@@ -1150,6 +1250,7 @@ src/agentpack/
|
|
|
1150
1250
|
pack.py # agentpack pack → PackService.run()
|
|
1151
1251
|
install.py # agentpack install / global-install → installers/
|
|
1152
1252
|
init.py # agentpack init
|
|
1253
|
+
quickstart.py # agentpack quickstart — guided first-run commands
|
|
1153
1254
|
scan.py # agentpack scan
|
|
1154
1255
|
diff.py # agentpack diff
|
|
1155
1256
|
status.py # agentpack status
|
|
@@ -1158,10 +1259,11 @@ src/agentpack/
|
|
|
1158
1259
|
monitor.py # agentpack monitor
|
|
1159
1260
|
explain.py # agentpack explain
|
|
1160
1261
|
doctor.py # agentpack doctor
|
|
1161
|
-
|
|
1262
|
+
hook_cmd.py # agentpack hook — Claude prompt hook + stale detection
|
|
1263
|
+
mcp_cmd.py # agentpack mcp — MCP server entrypoint
|
|
1162
1264
|
watch.py # agentpack watch — file watcher with debounce
|
|
1163
1265
|
claude_cmd.py # agentpack claude — refresh + launch claude
|
|
1164
|
-
benchmark.py # agentpack benchmark — token efficiency
|
|
1266
|
+
benchmark.py # agentpack benchmark — token efficiency, recall, miss diagnostics
|
|
1165
1267
|
```
|
|
1166
1268
|
|
|
1167
1269
|
### Key architectural properties
|
|
@@ -1289,7 +1391,17 @@ The more descriptive your branch names (`feat/add-rate-limiting` beats `dev`) an
|
|
|
1289
1391
|
|
|
1290
1392
|
### Concept synonym expansion
|
|
1291
1393
|
|
|
1292
|
-
AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl
|
|
1394
|
+
AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`; domain terms such as `kundali` expand toward astrology/chart/compatibility terms. Files that implement a concept but don't use its exact name can still rank.
|
|
1395
|
+
|
|
1396
|
+
### Full-stack role boosts
|
|
1397
|
+
|
|
1398
|
+
When a task points at a page, route, or API surface, AgentPack also gives a controlled boost to related implementation roles such as `service`, `controller`, `schema`, `handler`, `repository`, and `client`. This helps full-stack tasks pull backend implementation files instead of only frontend entrypoints.
|
|
1399
|
+
|
|
1400
|
+
This is still heuristic. If a service should have appeared and did not, add it as an `expected_files` entry in `benchmark.toml` and run:
|
|
1401
|
+
|
|
1402
|
+
```bash
|
|
1403
|
+
agentpack benchmark --compare --misses
|
|
1404
|
+
```
|
|
1293
1405
|
|
|
1294
1406
|
### Content-based keyword enrichment
|
|
1295
1407
|
|
|
@@ -1340,6 +1452,8 @@ agentpack explain --task "fix auth session bug"
|
|
|
1340
1452
|
|
|
1341
1453
|
Shows ranked scores and reasons before committing to a pack. Use when a file you expect isn't appearing.
|
|
1342
1454
|
|
|
1455
|
+
For repeatable evals, prefer `benchmark --misses` because it compares selected files against the files you actually changed for historical tasks.
|
|
1456
|
+
|
|
1343
1457
|
### Check what got included and why
|
|
1344
1458
|
|
|
1345
1459
|
Every pack includes a context receipt explaining each file's inclusion or exclusion:
|
|
@@ -1371,7 +1485,8 @@ config_file = 60 # was 25 — configs always matter here
|
|
|
1371
1485
|
- **Non-destructive**: never overwrites user files; config patching only touches agentpack-managed blocks
|
|
1372
1486
|
- **Agent-neutral**: architecture is generic; Claude Code is the primary target (deepest integration); Cursor, Windsurf, Codex, and Antigravity are supported but less battle-tested
|
|
1373
1487
|
- **No daemons**: file watching is opt-in via `agentpack watch`; git hooks run in the background and are opt-in via `install`
|
|
1374
|
-
- **
|
|
1488
|
+
- **Measurable**: `benchmark`, `stats`, receipts, and `--misses` are first-class because compression without recall is not enough
|
|
1489
|
+
- **Honest**: packed token count reflects real content, and raw-repo savings are presented separately from practical usefulness
|
|
1375
1490
|
|
|
1376
1491
|
---
|
|
1377
1492
|
|
|
@@ -1380,6 +1495,7 @@ config_file = 60 # was 25 — configs always matter here
|
|
|
1380
1495
|
- **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
|
|
1381
1496
|
- **Monorepos**: single-root repos only. If you `agentpack pack` from a monorepo root, all packages are scanned together with no workspace awareness. Workaround: `cd packages/my-pkg && agentpack init && agentpack pack`.
|
|
1382
1497
|
- **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
|
|
1498
|
+
- **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
|
|
1383
1499
|
- **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
|
|
1384
1500
|
- **Token estimates**: uses tiktoken `cl100k_base` — approximate, not exact for Claude's billing.
|
|
1385
1501
|
- **Large repos (>5k files)**: global auto-bootstrap is skipped for repos over 5,000 files to avoid hangs. Run `agentpack init` explicitly in large codebases.
|
|
@@ -1396,6 +1512,37 @@ pip install "agentpack-cli[all]" # watch + mcp
|
|
|
1396
1512
|
|
|
1397
1513
|
---
|
|
1398
1514
|
|
|
1515
|
+
## Development
|
|
1516
|
+
|
|
1517
|
+
Clone and run locally:
|
|
1518
|
+
|
|
1519
|
+
```bash
|
|
1520
|
+
git clone https://github.com/vishal2612200/agentpack.git
|
|
1521
|
+
cd agentpack
|
|
1522
|
+
python -m pip install -e ".[dev,watch,mcp]" build
|
|
1523
|
+
pytest
|
|
1524
|
+
```
|
|
1525
|
+
|
|
1526
|
+
Useful checks before opening a PR:
|
|
1527
|
+
|
|
1528
|
+
```bash
|
|
1529
|
+
pytest
|
|
1530
|
+
python -m build
|
|
1531
|
+
agentpack benchmark --sample-fixtures --misses
|
|
1532
|
+
```
|
|
1533
|
+
|
|
1534
|
+
Good contribution areas:
|
|
1535
|
+
|
|
1536
|
+
- More real-world benchmark fixtures and public repo eval cases
|
|
1537
|
+
- Windows support for hooks and session integrations
|
|
1538
|
+
- Better symbol extraction for Go, Rust, Java, and Kotlin
|
|
1539
|
+
- More precise import/dependency resolution for framework-heavy repos
|
|
1540
|
+
- Ranking regressions with `expected_files` cases that reproduce misses
|
|
1541
|
+
|
|
1542
|
+
Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
|
|
1543
|
+
|
|
1544
|
+
---
|
|
1545
|
+
|
|
1399
1546
|
## License
|
|
1400
1547
|
|
|
1401
1548
|
MIT
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "agentpack-cli"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "
|
|
3
|
+
version = "0.1.22"
|
|
4
|
+
description = "Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
7
7
|
license = {text = "MIT"}
|