agentpack-cli 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/PKG-INFO +72 -15
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/README.md +71 -14
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/pyproject.toml +1 -1
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/__init__.py +1 -1
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/dependency_graph.py +27 -15
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/ranking.py +200 -6
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/repo_map.py +20 -1
- agentpack_cli-0.3.0/src/agentpack/analysis/role_inference.py +553 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/symbols.py +1 -1
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/application/pack_service.py +24 -2
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/benchmark.py +342 -4
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/cache.py +6 -2
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/config.py +5 -1
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/context_pack.py +23 -1
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/models.py +20 -7
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/mcp_server.py +71 -16
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/summaries/offline.py +191 -51
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/.gitignore +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/LICENSE +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/monorepo.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/task_classifier.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/cli.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/doctor.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/explain.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/install.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/mcp_cmd.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/pack.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/quickstart.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/repair.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/stats.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/tune.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/git.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/antigravity.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/claude.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/codex.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/cursor.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/integrations/agents.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/integrations/global_install.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/renderers/markdown.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.2.1 → agentpack_cli-0.3.0}/src/agentpack/summaries/base.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
[](https://opensource.org/licenses/MIT)
|
|
45
45
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
46
46
|
|
|
47
|
-
> **Status: alpha (v0.
|
|
47
|
+
> **Status: alpha (v0.3.0).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
48
48
|
>
|
|
49
49
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
50
50
|
|
|
@@ -93,7 +93,7 @@ npm install -g @vishal2612200/agentpack
|
|
|
93
93
|
agentpack --version
|
|
94
94
|
```
|
|
95
95
|
|
|
96
|
-
The npm package is a Node launcher around the Python implementation. It installs the matching `agentpack-cli` package into a per-version virtual environment on first run.
|
|
96
|
+
The npm package is a Node launcher around the Python implementation. It requires Node.js 18+ and Python 3.10+, then installs the matching core `agentpack-cli` package into a per-version virtual environment on first run. The Python package remains the source of truth; npm is the convenience install path for JavaScript-heavy teams. Use the PyPI extras below when you need optional `watch` or `mcp` dependencies.
|
|
97
97
|
|
|
98
98
|
## Quickstart
|
|
99
99
|
|
|
@@ -151,10 +151,19 @@ Use real repo evals instead of trusting compression numbers:
|
|
|
151
151
|
```bash
|
|
152
152
|
agentpack benchmark --init
|
|
153
153
|
# add historical tasks and files actually changed
|
|
154
|
-
agentpack benchmark --compare --misses
|
|
154
|
+
agentpack benchmark --compare --misses --public-table
|
|
155
|
+
agentpack benchmark --public-repos --prove-targets --misses --public-table
|
|
155
156
|
agentpack benchmark --results-template
|
|
156
157
|
```
|
|
157
158
|
|
|
159
|
+
For public proof, use several real repositories or anonymized historical task
|
|
160
|
+
sets and publish the generated table from `benchmarks/results/*-public.md`.
|
|
161
|
+
This repo includes a curated public smoke suite in
|
|
162
|
+
`benchmarks/public-repos.toml`; it evaluates real commits from Pallets Click,
|
|
163
|
+
ItsDangerous, and MarkupSafe by checking out each commit's parent and scoring
|
|
164
|
+
against files actually changed by the commit. Synthetic fixtures are useful
|
|
165
|
+
regression tests, but should not be presented as market proof.
|
|
166
|
+
|
|
158
167
|
## Debugging Selection
|
|
159
168
|
|
|
160
169
|
When AgentPack misses a file, the next command should explain the miss:
|
|
@@ -170,6 +179,41 @@ agentpack explain --task "fix billing webhook" --budget-plan
|
|
|
170
179
|
|
|
171
180
|
This is the core reliability loop: pack, measure recall, inspect misses, then tune task wording, `.agentignore`, or scoring weights.
|
|
172
181
|
|
|
182
|
+
## MCP-First Workflow
|
|
183
|
+
|
|
184
|
+
For MCP-capable agents, the preferred workflow is pull-based:
|
|
185
|
+
|
|
186
|
+
1. Call `start_task(task)` when a new task begins. AgentPack writes `.agentpack/task.md`, packs context, and returns ranked markdown.
|
|
187
|
+
2. Call `get_context()` when you need the latest cached pack; it tells you if the pack is stale.
|
|
188
|
+
3. Call `get_delta_context()` after edits or hook hints to see what changed without loading the full pack.
|
|
189
|
+
4. Call `explain_file(path)` or `get_related_files(path)` when a file looks relevant or suspicious.
|
|
190
|
+
|
|
191
|
+
The CLI remains the setup/debug/release path. MCP is the best interactive path because the agent can ask for only the context it needs instead of relying on one static startup blob.
|
|
192
|
+
|
|
193
|
+
## Before / After Agent Behavior
|
|
194
|
+
|
|
195
|
+
Without AgentPack:
|
|
196
|
+
|
|
197
|
+
```text
|
|
198
|
+
User: fix auth token expiry
|
|
199
|
+
Agent: rg "auth"; opens router; opens middleware; opens tests; opens config;
|
|
200
|
+
asks for more files; eventually finds token/session code.
|
|
201
|
+
Cost: repeated repo exploration and many unrelated file reads.
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
With AgentPack:
|
|
205
|
+
|
|
206
|
+
```text
|
|
207
|
+
User: fix auth token expiry
|
|
208
|
+
Agent: calls start_task("fix auth token expiry")
|
|
209
|
+
AgentPack: returns ranked files with reasons:
|
|
210
|
+
1. src/auth/token.py — filename/content match, changed dependency
|
|
211
|
+
2. src/auth/session.py — related implementation
|
|
212
|
+
3. tests/test_auth.py — paired test
|
|
213
|
+
Agent: verifies those files, edits, runs tests, checks misses if needed.
|
|
214
|
+
Cost: starts from a measured map, then still verifies source normally.
|
|
215
|
+
```
|
|
216
|
+
|
|
173
217
|
## When it helps
|
|
174
218
|
|
|
175
219
|
| Workflow | Value |
|
|
@@ -766,7 +810,8 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
766
810
|
|
|
767
811
|
| Tool | Description |
|
|
768
812
|
|---|---|
|
|
769
|
-
| `
|
|
813
|
+
| `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
|
|
814
|
+
| `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
|
|
770
815
|
| `get_context()` | Return the latest pre-built pack instantly (no repack). Prepends a freshness/staleness header so you know if it's stale. |
|
|
771
816
|
| `refresh()` | Refresh using the current `task.md` or git-inferred task. |
|
|
772
817
|
| `explain_file(path, task)` | Show score, inclusion mode, reasons, symbols, imports, and importers for one file. |
|
|
@@ -779,7 +824,7 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
779
824
|
> **Stale context** — repo changed since last pack (generated: ...). Run pack_context() to refresh.
|
|
780
825
|
```
|
|
781
826
|
|
|
782
|
-
**Smart truncation:** `pack_context()`
|
|
827
|
+
**Smart truncation:** `start_task()` and `pack_context()` keep headers intact and trim file content blocks to fit the token budget, appending a note about how many files were omitted.
|
|
783
828
|
|
|
784
829
|
Zero API calls — all analysis is offline. Summary cache keyed by file hash: cold run parallelises AST parsing across CPU cores; warm cache hits are instant.
|
|
785
830
|
|
|
@@ -831,8 +876,10 @@ agentpack benchmark --init # scaffold .agentpack
|
|
|
831
876
|
agentpack benchmark --results-template # scaffold publishable results note
|
|
832
877
|
agentpack benchmark # run all cases in benchmark.toml
|
|
833
878
|
agentpack benchmark --sample-fixtures # source checkout demo evals
|
|
879
|
+
agentpack benchmark --public-repos # real public commit evals
|
|
834
880
|
agentpack benchmark --misses # explain expected-file misses
|
|
835
881
|
agentpack benchmark --prove-targets # fail if recall/token precision targets miss
|
|
882
|
+
agentpack benchmark --public-table # write benchmarks/results/*-public.md
|
|
836
883
|
```
|
|
837
884
|
|
|
838
885
|
Output per case:
|
|
@@ -889,6 +936,15 @@ Use `--misses` when recall is low. It prints each expected file that was not sel
|
|
|
889
936
|
|
|
890
937
|
Use `--prove-targets` in CI or release prep when benchmark cases have `expected_files`. By default it requires average recall >=60% and token precision >=50%; tune with `--min-recall` and `--min-token-precision`.
|
|
891
938
|
|
|
939
|
+
Use `--public-repos` from an AgentPack source checkout to run the committed
|
|
940
|
+
real-repo smoke suite:
|
|
941
|
+
|
|
942
|
+
```bash
|
|
943
|
+
agentpack benchmark --public-repos --prove-targets --misses --public-table
|
|
944
|
+
```
|
|
945
|
+
|
|
946
|
+
Use `--public-table` after adding real historical tasks to write a publishable Markdown table with per-repo/task recall, token precision, rank@K, pack size, and miss count. This is the recommended artifact for README claims, release notes, and external benchmarks.
|
|
947
|
+
|
|
892
948
|
Add `task_type` to group results by workflow area. Benchmark summaries report average precision, recall, F1, and token noise by type, so a repo can show "backend-api is good, frontend-web is noisy" instead of hiding that under one aggregate.
|
|
893
949
|
|
|
894
950
|
---
|
|
@@ -1318,7 +1374,7 @@ src/agentpack/
|
|
|
1318
1374
|
compact.py # compact protocol format for session context files
|
|
1319
1375
|
receipts.py # context receipt formatter
|
|
1320
1376
|
|
|
1321
|
-
mcp_server.py # MCP tools: pack_context, get_context, explain, related, stats, delta
|
|
1377
|
+
mcp_server.py # MCP tools: start_task, pack_context, get_context, explain, related, stats, delta
|
|
1322
1378
|
|
|
1323
1379
|
session/
|
|
1324
1380
|
state.py # SessionState dataclass + load/save/create/stop helpers
|
|
@@ -1356,6 +1412,7 @@ src/agentpack/
|
|
|
1356
1412
|
- **Repo maps are first-class context**: `analysis/repo_map.py` builds a compact semantic map before file context, and its token cost is reserved before file selection.
|
|
1357
1413
|
- **Metrics feed history learning**: selection accuracy records hit/noise paths, token precision, mode counts, and mode tokens. Later packs gently penalize repeated noisy paths unless they are currently changed.
|
|
1358
1414
|
- **Git history feeds recall**: files that historically changed in the same commits as live changed files receive a small boost, helping related tests, schemas, services, and configs surface without forcing full-content inclusion.
|
|
1415
|
+
- **Second-pass expansion is guarded**: after first scoring, strong seeds can lift two-hop import, reverse-import, config, and related-test neighbours only when they share task or domain signal.
|
|
1359
1416
|
- **Co-change is guarded by precision history**: one-off co-change neighbors are ignored, and paths repeatedly measured as noise do not get revived by history boosts.
|
|
1360
1417
|
- **Precision guardrails adapt to bad history**: when summary token precision stays near zero, later packs raise the summary score floor, cap summaries more aggressively, and suppress summaries entirely for no-live-change packs. Weak filename-only matches are also damped unless other signals confirm them.
|
|
1361
1418
|
- **`AdapterRegistry` maps agent → adapter**: adding a new agent output format requires one entry in `AdapterRegistry.get()`, not changes to `PackService`.
|
|
@@ -1364,7 +1421,7 @@ src/agentpack/
|
|
|
1364
1421
|
- **`integrations/` vs `core/`**: git hooks, shell rc patching, and VS Code tasks are infrastructure concerns — they live in `integrations/`, not `core/`. `core/` is pure domain logic.
|
|
1365
1422
|
- **Adapters render; installers configure**: `adapters/` knows how to write a context file for an agent. `installers/` knows how to configure the agent's tool (CLAUDE.md, .cursorrules, settings.json). They are separate concerns and separate classes.
|
|
1366
1423
|
- **Agent integration contract is shared**: `integrations/agents.py` defines install, audit, and repair behavior for Claude, Cursor, Windsurf, Codex, Antigravity, and Generic. `install`, `repair`, `doctor --agent all`, and release verification use the same contract.
|
|
1367
|
-
- **MCP
|
|
1424
|
+
- **MCP is the interactive path**: `start_task()` writes task state and returns a fresh pack, while `get_context()`, `get_delta_context()`, `explain_file()`, and `get_related_files()` let agents pull follow-up context on demand.
|
|
1368
1425
|
|
|
1369
1426
|
---
|
|
1370
1427
|
|
|
@@ -1383,7 +1440,7 @@ src/agentpack/
|
|
|
1383
1440
|
|
|
1384
1441
|
- **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
|
|
1385
1442
|
- **Monorepos**: workspace-aware ranking supports npm/pnpm, Cargo, and `go.work` layouts. `--workspace` creates filtered per-workspace outputs. Package dependency hints currently come from npm/pnpm `package.json`; Cargo/Go workspace membership is detected, but package-manager dependency edges for Cargo/Go are not yet modeled.
|
|
1386
|
-
- **Public benchmark proof**:
|
|
1443
|
+
- **Public benchmark proof**: `benchmarks/public-repos.toml` is a curated smoke suite over real public commits, and `benchmarks/results/2026-05-15-public.md` records the current proof run. Treat it as a floor, not a leaderboard; expand cases before broad external claims.
|
|
1387
1444
|
- **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
|
|
1388
1445
|
- **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
|
|
1389
1446
|
- **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
|
|
@@ -1394,12 +1451,12 @@ src/agentpack/
|
|
|
1394
1451
|
|
|
1395
1452
|
## Roadmap
|
|
1396
1453
|
|
|
1397
|
-
|
|
1454
|
+
Post-0.3 release focus: broader real-repo proof, npm publish reliability, and continued ranking precision.
|
|
1398
1455
|
|
|
1399
|
-
- Expand public
|
|
1400
|
-
-
|
|
1401
|
-
-
|
|
1402
|
-
- Make
|
|
1456
|
+
- Expand the public real-repo suite beyond the current curated Pallets smoke set.
|
|
1457
|
+
- Keep recall gains measured with `--prove-targets`; target 60%+ recall, 50%+ token precision, and task packs under 25k tokens.
|
|
1458
|
+
- Extend second-pass expansion with framework route/service/schema pairs once benchmark misses prove the pattern.
|
|
1459
|
+
- Make npm publishing reliable by adding `NPM_TOKEN` and rerunning the npm release workflow.
|
|
1403
1460
|
- Keep integration contracts stable across Claude, Cursor, Windsurf, Codex, Antigravity, and Generic before any 1.0 work.
|
|
1404
1461
|
|
|
1405
1462
|
---
|
|
@@ -1438,7 +1495,7 @@ agentpack benchmark --sample-fixtures --misses
|
|
|
1438
1495
|
agentpack doctor
|
|
1439
1496
|
```
|
|
1440
1497
|
|
|
1441
|
-
For npm publish, configure GitHub secret `NPM_TOKEN`. `agentpack doctor` warns locally when neither `NPM_TOKEN` nor `NODE_AUTH_TOKEN` is present, and the npm publish workflow fails early
|
|
1498
|
+
For npm publish, configure GitHub secret `NPM_TOKEN`. The token must publish to the npm scope in `npm/package.json` (`@vishal2612200` today): use a token from that npm user, or create an npm org with that scope and grant the token owner publish access. If `npm publish` reaches the registry and then fails with `E404 Not Found - PUT ... @scope/package`, the token is authenticated but does not own or have write access to that scope. `agentpack doctor` warns locally when neither `NPM_TOKEN` nor `NODE_AUTH_TOKEN` is present, and the npm publish workflow fails early when the secret or scope access is wrong.
|
|
1442
1499
|
|
|
1443
1500
|
Good contribution areas:
|
|
1444
1501
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
> **Status: alpha (v0.
|
|
8
|
+
> **Status: alpha (v0.3.0).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
9
9
|
>
|
|
10
10
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
11
11
|
|
|
@@ -54,7 +54,7 @@ npm install -g @vishal2612200/agentpack
|
|
|
54
54
|
agentpack --version
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
The npm package is a Node launcher around the Python implementation. It installs the matching `agentpack-cli` package into a per-version virtual environment on first run.
|
|
57
|
+
The npm package is a Node launcher around the Python implementation. It requires Node.js 18+ and Python 3.10+, then installs the matching core `agentpack-cli` package into a per-version virtual environment on first run. The Python package remains the source of truth; npm is the convenience install path for JavaScript-heavy teams. Use the PyPI extras below when you need optional `watch` or `mcp` dependencies.
|
|
58
58
|
|
|
59
59
|
## Quickstart
|
|
60
60
|
|
|
@@ -112,10 +112,19 @@ Use real repo evals instead of trusting compression numbers:
|
|
|
112
112
|
```bash
|
|
113
113
|
agentpack benchmark --init
|
|
114
114
|
# add historical tasks and files actually changed
|
|
115
|
-
agentpack benchmark --compare --misses
|
|
115
|
+
agentpack benchmark --compare --misses --public-table
|
|
116
|
+
agentpack benchmark --public-repos --prove-targets --misses --public-table
|
|
116
117
|
agentpack benchmark --results-template
|
|
117
118
|
```
|
|
118
119
|
|
|
120
|
+
For public proof, use several real repositories or anonymized historical task
|
|
121
|
+
sets and publish the generated table from `benchmarks/results/*-public.md`.
|
|
122
|
+
This repo includes a curated public smoke suite in
|
|
123
|
+
`benchmarks/public-repos.toml`; it evaluates real commits from Pallets Click,
|
|
124
|
+
ItsDangerous, and MarkupSafe by checking out each commit's parent and scoring
|
|
125
|
+
against files actually changed by the commit. Synthetic fixtures are useful
|
|
126
|
+
regression tests, but should not be presented as market proof.
|
|
127
|
+
|
|
119
128
|
## Debugging Selection
|
|
120
129
|
|
|
121
130
|
When AgentPack misses a file, the next command should explain the miss:
|
|
@@ -131,6 +140,41 @@ agentpack explain --task "fix billing webhook" --budget-plan
|
|
|
131
140
|
|
|
132
141
|
This is the core reliability loop: pack, measure recall, inspect misses, then tune task wording, `.agentignore`, or scoring weights.
|
|
133
142
|
|
|
143
|
+
## MCP-First Workflow
|
|
144
|
+
|
|
145
|
+
For MCP-capable agents, the preferred workflow is pull-based:
|
|
146
|
+
|
|
147
|
+
1. Call `start_task(task)` when a new task begins. AgentPack writes `.agentpack/task.md`, packs context, and returns ranked markdown.
|
|
148
|
+
2. Call `get_context()` when you need the latest cached pack; it tells you if the pack is stale.
|
|
149
|
+
3. Call `get_delta_context()` after edits or hook hints to see what changed without loading the full pack.
|
|
150
|
+
4. Call `explain_file(path)` or `get_related_files(path)` when a file looks relevant or suspicious.
|
|
151
|
+
|
|
152
|
+
The CLI remains the setup/debug/release path. MCP is the best interactive path because the agent can ask for only the context it needs instead of relying on one static startup blob.
|
|
153
|
+
|
|
154
|
+
## Before / After Agent Behavior
|
|
155
|
+
|
|
156
|
+
Without AgentPack:
|
|
157
|
+
|
|
158
|
+
```text
|
|
159
|
+
User: fix auth token expiry
|
|
160
|
+
Agent: rg "auth"; opens router; opens middleware; opens tests; opens config;
|
|
161
|
+
asks for more files; eventually finds token/session code.
|
|
162
|
+
Cost: repeated repo exploration and many unrelated file reads.
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
With AgentPack:
|
|
166
|
+
|
|
167
|
+
```text
|
|
168
|
+
User: fix auth token expiry
|
|
169
|
+
Agent: calls start_task("fix auth token expiry")
|
|
170
|
+
AgentPack: returns ranked files with reasons:
|
|
171
|
+
1. src/auth/token.py — filename/content match, changed dependency
|
|
172
|
+
2. src/auth/session.py — related implementation
|
|
173
|
+
3. tests/test_auth.py — paired test
|
|
174
|
+
Agent: verifies those files, edits, runs tests, checks misses if needed.
|
|
175
|
+
Cost: starts from a measured map, then still verifies source normally.
|
|
176
|
+
```
|
|
177
|
+
|
|
134
178
|
## When it helps
|
|
135
179
|
|
|
136
180
|
| Workflow | Value |
|
|
@@ -727,7 +771,8 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
727
771
|
|
|
728
772
|
| Tool | Description |
|
|
729
773
|
|---|---|
|
|
730
|
-
| `
|
|
774
|
+
| `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
|
|
775
|
+
| `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
|
|
731
776
|
| `get_context()` | Return the latest pre-built pack instantly (no repack). Prepends a freshness/staleness header so you know if it's stale. |
|
|
732
777
|
| `refresh()` | Refresh using the current `task.md` or git-inferred task. |
|
|
733
778
|
| `explain_file(path, task)` | Show score, inclusion mode, reasons, symbols, imports, and importers for one file. |
|
|
@@ -740,7 +785,7 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
740
785
|
> **Stale context** — repo changed since last pack (generated: ...). Run pack_context() to refresh.
|
|
741
786
|
```
|
|
742
787
|
|
|
743
|
-
**Smart truncation:** `pack_context()`
|
|
788
|
+
**Smart truncation:** `start_task()` and `pack_context()` keep headers intact and trim file content blocks to fit the token budget, appending a note about how many files were omitted.
|
|
744
789
|
|
|
745
790
|
Zero API calls — all analysis is offline. Summary cache keyed by file hash: cold run parallelises AST parsing across CPU cores; warm cache hits are instant.
|
|
746
791
|
|
|
@@ -792,8 +837,10 @@ agentpack benchmark --init # scaffold .agentpack
|
|
|
792
837
|
agentpack benchmark --results-template # scaffold publishable results note
|
|
793
838
|
agentpack benchmark # run all cases in benchmark.toml
|
|
794
839
|
agentpack benchmark --sample-fixtures # source checkout demo evals
|
|
840
|
+
agentpack benchmark --public-repos # real public commit evals
|
|
795
841
|
agentpack benchmark --misses # explain expected-file misses
|
|
796
842
|
agentpack benchmark --prove-targets # fail if recall/token precision targets miss
|
|
843
|
+
agentpack benchmark --public-table # write benchmarks/results/*-public.md
|
|
797
844
|
```
|
|
798
845
|
|
|
799
846
|
Output per case:
|
|
@@ -850,6 +897,15 @@ Use `--misses` when recall is low. It prints each expected file that was not sel
|
|
|
850
897
|
|
|
851
898
|
Use `--prove-targets` in CI or release prep when benchmark cases have `expected_files`. By default it requires average recall >=60% and token precision >=50%; tune with `--min-recall` and `--min-token-precision`.
|
|
852
899
|
|
|
900
|
+
Use `--public-repos` from an AgentPack source checkout to run the committed
|
|
901
|
+
real-repo smoke suite:
|
|
902
|
+
|
|
903
|
+
```bash
|
|
904
|
+
agentpack benchmark --public-repos --prove-targets --misses --public-table
|
|
905
|
+
```
|
|
906
|
+
|
|
907
|
+
Use `--public-table` after adding real historical tasks to write a publishable Markdown table with per-repo/task recall, token precision, rank@K, pack size, and miss count. This is the recommended artifact for README claims, release notes, and external benchmarks.
|
|
908
|
+
|
|
853
909
|
Add `task_type` to group results by workflow area. Benchmark summaries report average precision, recall, F1, and token noise by type, so a repo can show "backend-api is good, frontend-web is noisy" instead of hiding that under one aggregate.
|
|
854
910
|
|
|
855
911
|
---
|
|
@@ -1279,7 +1335,7 @@ src/agentpack/
|
|
|
1279
1335
|
compact.py # compact protocol format for session context files
|
|
1280
1336
|
receipts.py # context receipt formatter
|
|
1281
1337
|
|
|
1282
|
-
mcp_server.py # MCP tools: pack_context, get_context, explain, related, stats, delta
|
|
1338
|
+
mcp_server.py # MCP tools: start_task, pack_context, get_context, explain, related, stats, delta
|
|
1283
1339
|
|
|
1284
1340
|
session/
|
|
1285
1341
|
state.py # SessionState dataclass + load/save/create/stop helpers
|
|
@@ -1317,6 +1373,7 @@ src/agentpack/
|
|
|
1317
1373
|
- **Repo maps are first-class context**: `analysis/repo_map.py` builds a compact semantic map before file context, and its token cost is reserved before file selection.
|
|
1318
1374
|
- **Metrics feed history learning**: selection accuracy records hit/noise paths, token precision, mode counts, and mode tokens. Later packs gently penalize repeated noisy paths unless they are currently changed.
|
|
1319
1375
|
- **Git history feeds recall**: files that historically changed in the same commits as live changed files receive a small boost, helping related tests, schemas, services, and configs surface without forcing full-content inclusion.
|
|
1376
|
+
- **Second-pass expansion is guarded**: after first scoring, strong seeds can lift two-hop import, reverse-import, config, and related-test neighbours only when they share task or domain signal.
|
|
1320
1377
|
- **Co-change is guarded by precision history**: one-off co-change neighbors are ignored, and paths repeatedly measured as noise do not get revived by history boosts.
|
|
1321
1378
|
- **Precision guardrails adapt to bad history**: when summary token precision stays near zero, later packs raise the summary score floor, cap summaries more aggressively, and suppress summaries entirely for no-live-change packs. Weak filename-only matches are also damped unless other signals confirm them.
|
|
1322
1379
|
- **`AdapterRegistry` maps agent → adapter**: adding a new agent output format requires one entry in `AdapterRegistry.get()`, not changes to `PackService`.
|
|
@@ -1325,7 +1382,7 @@ src/agentpack/
|
|
|
1325
1382
|
- **`integrations/` vs `core/`**: git hooks, shell rc patching, and VS Code tasks are infrastructure concerns — they live in `integrations/`, not `core/`. `core/` is pure domain logic.
|
|
1326
1383
|
- **Adapters render; installers configure**: `adapters/` knows how to write a context file for an agent. `installers/` knows how to configure the agent's tool (CLAUDE.md, .cursorrules, settings.json). They are separate concerns and separate classes.
|
|
1327
1384
|
- **Agent integration contract is shared**: `integrations/agents.py` defines install, audit, and repair behavior for Claude, Cursor, Windsurf, Codex, Antigravity, and Generic. `install`, `repair`, `doctor --agent all`, and release verification use the same contract.
|
|
1328
|
-
- **MCP
|
|
1385
|
+
- **MCP is the interactive path**: `start_task()` writes task state and returns a fresh pack, while `get_context()`, `get_delta_context()`, `explain_file()`, and `get_related_files()` let agents pull follow-up context on demand.
|
|
1329
1386
|
|
|
1330
1387
|
---
|
|
1331
1388
|
|
|
@@ -1344,7 +1401,7 @@ src/agentpack/
|
|
|
1344
1401
|
|
|
1345
1402
|
- **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
|
|
1346
1403
|
- **Monorepos**: workspace-aware ranking supports npm/pnpm, Cargo, and `go.work` layouts. `--workspace` creates filtered per-workspace outputs. Package dependency hints currently come from npm/pnpm `package.json`; Cargo/Go workspace membership is detected, but package-manager dependency edges for Cargo/Go are not yet modeled.
|
|
1347
|
-
- **Public benchmark proof**:
|
|
1404
|
+
- **Public benchmark proof**: `benchmarks/public-repos.toml` is a curated smoke suite over real public commits, and `benchmarks/results/2026-05-15-public.md` records the current proof run. Treat it as a floor, not a leaderboard; expand cases before broad external claims.
|
|
1348
1405
|
- **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
|
|
1349
1406
|
- **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
|
|
1350
1407
|
- **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
|
|
@@ -1355,12 +1412,12 @@ src/agentpack/
|
|
|
1355
1412
|
|
|
1356
1413
|
## Roadmap
|
|
1357
1414
|
|
|
1358
|
-
|
|
1415
|
+
Post-0.3 release focus: broader real-repo proof, npm publish reliability, and continued ranking precision.
|
|
1359
1416
|
|
|
1360
|
-
- Expand public
|
|
1361
|
-
-
|
|
1362
|
-
-
|
|
1363
|
-
- Make
|
|
1417
|
+
- Expand the public real-repo suite beyond the current curated Pallets smoke set.
|
|
1418
|
+
- Keep recall gains measured with `--prove-targets`; target 60%+ recall, 50%+ token precision, and task packs under 25k tokens.
|
|
1419
|
+
- Extend second-pass expansion with framework route/service/schema pairs once benchmark misses prove the pattern.
|
|
1420
|
+
- Make npm publishing reliable by adding `NPM_TOKEN` and rerunning the npm release workflow.
|
|
1364
1421
|
- Keep integration contracts stable across Claude, Cursor, Windsurf, Codex, Antigravity, and Generic before any 1.0 work.
|
|
1365
1422
|
|
|
1366
1423
|
---
|
|
@@ -1399,7 +1456,7 @@ agentpack benchmark --sample-fixtures --misses
|
|
|
1399
1456
|
agentpack doctor
|
|
1400
1457
|
```
|
|
1401
1458
|
|
|
1402
|
-
For npm publish, configure GitHub secret `NPM_TOKEN`. `agentpack doctor` warns locally when neither `NPM_TOKEN` nor `NODE_AUTH_TOKEN` is present, and the npm publish workflow fails early
|
|
1459
|
+
For npm publish, configure GitHub secret `NPM_TOKEN`. The token must publish to the npm scope in `npm/package.json` (`@vishal2612200` today): use a token from that npm user, or create an npm org with that scope and grant the token owner publish access. If `npm publish` reaches the registry and then fails with `E404 Not Found - PUT ... @scope/package`, the token is authenticated but does not own or have write access to that scope. `agentpack doctor` warns locally when neither `NPM_TOKEN` nor `NODE_AUTH_TOKEN` is present, and the npm publish workflow fails early when the secret or scope access is wrong.
|
|
1403
1460
|
|
|
1404
1461
|
Good contribution areas:
|
|
1405
1462
|
|
|
@@ -37,8 +37,9 @@ def build(
|
|
|
37
37
|
if summaries and fi.path in summaries:
|
|
38
38
|
cached_imports = summaries[fi.path].get("imports", [])
|
|
39
39
|
if cached_imports:
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
resolved_cached = _resolve_imports(fi.path, fi.language, cached_imports, root, path_set)
|
|
41
|
+
graph.nodes[fi.path].imports = resolved_cached
|
|
42
|
+
for dep in resolved_cached:
|
|
42
43
|
if dep in graph:
|
|
43
44
|
graph.nodes[dep].imported_by.append(fi.path)
|
|
44
45
|
continue
|
|
@@ -58,19 +59,7 @@ def build(
|
|
|
58
59
|
elif lang in ("java", "kotlin"):
|
|
59
60
|
raw_imports = java_imports(fi.abs_path, cached)
|
|
60
61
|
|
|
61
|
-
resolved
|
|
62
|
-
for imp in raw_imports:
|
|
63
|
-
if imp.startswith("."):
|
|
64
|
-
if lang == "python":
|
|
65
|
-
r = py_resolve(fi.path, imp, root)
|
|
66
|
-
elif lang in ("javascript", "typescript"):
|
|
67
|
-
r = js_resolve(fi.path, imp, root)
|
|
68
|
-
else:
|
|
69
|
-
r = None
|
|
70
|
-
if r and r in path_set:
|
|
71
|
-
resolved.append(r)
|
|
72
|
-
else:
|
|
73
|
-
resolved.append(imp)
|
|
62
|
+
resolved = _resolve_imports(fi.path, lang, raw_imports, root, path_set)
|
|
74
63
|
|
|
75
64
|
graph.nodes[fi.path].imports = resolved
|
|
76
65
|
for dep in resolved:
|
|
@@ -78,3 +67,26 @@ def build(
|
|
|
78
67
|
graph.nodes[dep].imported_by.append(fi.path)
|
|
79
68
|
|
|
80
69
|
return graph
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _resolve_imports(
|
|
73
|
+
importer: str,
|
|
74
|
+
language: str | None,
|
|
75
|
+
imports: list[str],
|
|
76
|
+
root: Path,
|
|
77
|
+
path_set: set[str],
|
|
78
|
+
) -> list[str]:
|
|
79
|
+
resolved: list[str] = []
|
|
80
|
+
for imp in imports:
|
|
81
|
+
if imp.startswith("."):
|
|
82
|
+
if language == "python":
|
|
83
|
+
r = py_resolve(importer, imp, root)
|
|
84
|
+
elif language in ("javascript", "typescript"):
|
|
85
|
+
r = js_resolve(importer, imp, root)
|
|
86
|
+
else:
|
|
87
|
+
r = None
|
|
88
|
+
if r and r in path_set:
|
|
89
|
+
resolved.append(r)
|
|
90
|
+
else:
|
|
91
|
+
resolved.append(imp)
|
|
92
|
+
return resolved
|