agentpack-cli 0.1.21__tar.gz → 0.1.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/PKG-INFO +167 -20
  2. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/README.md +165 -18
  3. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/pyproject.toml +2 -2
  4. agentpack_cli-0.1.22/src/agentpack/__init__.py +3 -0
  5. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/ranking.py +89 -0
  6. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/application/pack_service.py +2 -0
  7. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/cli.py +2 -2
  8. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/benchmark.py +231 -3
  9. agentpack_cli-0.1.22/src/agentpack/commands/quickstart.py +112 -0
  10. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/config.py +4 -0
  11. agentpack_cli-0.1.21/src/agentpack/__init__.py +0 -3
  12. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/.gitignore +0 -0
  13. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/LICENSE +0 -0
  14. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/__init__.py +0 -0
  15. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/antigravity.py +0 -0
  16. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/base.py +0 -0
  17. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/claude.py +0 -0
  18. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/codex.py +0 -0
  19. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/cursor.py +0 -0
  20. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/detect.py +0 -0
  21. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/generic.py +0 -0
  22. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/adapters/windsurf.py +0 -0
  23. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/__init__.py +0 -0
  24. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/dependency_graph.py +0 -0
  25. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/go_imports.py +0 -0
  26. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/java_imports.py +0 -0
  27. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/js_ts_imports.py +0 -0
  28. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/python_imports.py +0 -0
  29. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/rust_imports.py +0 -0
  30. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/symbols.py +0 -0
  31. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/analysis/tests.py +0 -0
  32. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/application/__init__.py +0 -0
  33. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/__init__.py +0 -0
  34. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/_shared.py +0 -0
  35. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/claude_cmd.py +0 -0
  36. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/diff.py +0 -0
  37. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/doctor.py +0 -0
  38. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/explain.py +0 -0
  39. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/hook_cmd.py +0 -0
  40. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/init.py +0 -0
  41. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/install.py +0 -0
  42. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/mcp_cmd.py +0 -0
  43. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/monitor.py +0 -0
  44. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/pack.py +0 -0
  45. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/scan.py +0 -0
  46. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/stats.py +0 -0
  47. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/status.py +0 -0
  48. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/summarize.py +0 -0
  49. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/commands/watch.py +0 -0
  50. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/__init__.py +0 -0
  51. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/bootstrap.py +0 -0
  52. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/cache.py +0 -0
  53. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/context_pack.py +0 -0
  54. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/diff.py +0 -0
  55. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/git.py +0 -0
  56. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/git_hooks.py +0 -0
  57. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/global_install.py +0 -0
  58. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/ignore.py +0 -0
  59. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/merkle.py +0 -0
  60. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/models.py +0 -0
  61. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/redactor.py +0 -0
  62. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/scanner.py +0 -0
  63. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/snapshot.py +0 -0
  64. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/token_estimator.py +0 -0
  65. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/core/vscode_tasks.py +0 -0
  66. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/data/agentpack.md +0 -0
  67. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/__init__.py +0 -0
  68. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/antigravity.py +0 -0
  69. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/claude.py +0 -0
  70. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/codex.py +0 -0
  71. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/cursor.py +0 -0
  72. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/installers/windsurf.py +0 -0
  73. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/__init__.py +0 -0
  74. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/git_hooks.py +0 -0
  75. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/global_install.py +0 -0
  76. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/integrations/vscode_tasks.py +0 -0
  77. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/mcp_server.py +0 -0
  78. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/__init__.py +0 -0
  79. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/compact.py +0 -0
  80. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/markdown.py +0 -0
  81. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/renderers/receipts.py +0 -0
  82. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/session/__init__.py +0 -0
  83. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/session/state.py +0 -0
  84. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/__init__.py +0 -0
  85. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/base.py +0 -0
  86. {agentpack_cli-0.1.21 → agentpack_cli-0.1.22}/src/agentpack/summaries/offline.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentpack-cli
3
- Version: 0.1.21
4
- Summary: Token-aware context packing for AI coding agents — Claude, Cursor, Windsurf, and Codex
3
+ Version: 0.1.22
4
+ Summary: Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity
5
5
  License: MIT
6
6
  License-File: LICENSE
7
7
  Keywords: ai,claude,codex,coding-agent,context,cursor,llm,packing,windsurf
@@ -44,11 +44,27 @@ Description-Content-Type: text/markdown
44
44
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
45
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
46
46
 
47
- > **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
47
+ > **Status: alpha (v0.1.22).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
48
48
  >
49
49
  > **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
50
50
 
51
- **Token-aware context packing for AI coding agents.**
51
+ **Task-aware context packing for AI coding agents.**
52
+
53
+ AgentPack scans a repository, ranks files for the task you are working on, and writes a compact markdown context pack for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, or any LLM workflow.
54
+
55
+ It is useful when the repo is too large to paste, but you still want the agent to start with more than a blank slate.
56
+
57
+ **What it is**
58
+ - A local CLI for building task-focused context packs
59
+ - A summary cache, import graph, ranking engine, and token-budget selector
60
+ - Optional integrations for popular coding agents
61
+ - An eval harness for measuring whether selected files match files you actually changed
62
+
63
+ **What it is not**
64
+ - Not a coding agent
65
+ - Not a semantic code search engine
66
+ - Not a replacement for manual inspection on high-stakes changes
67
+ - Not yet proven across a large public benchmark suite
52
68
 
53
69
  ---
54
70
 
@@ -71,16 +87,19 @@ None of these scale. On a 200-file codebase, option 1 wastes 5–10 turns just o
71
87
  AgentPack solves this with a one-time offline analysis pass:
72
88
 
73
89
  1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
74
- 2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
75
- 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
76
- 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
90
+ 2. **On each task** — uses git diff, import graph traversal, keyword/concept expansion, implementation-role boosts, and cross-layer relatedness to rank every file.
91
+ 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies and likely implementation files get summaries, everything else gets dropped.
92
+ 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, token-precision metrics, and benchmark miss reports show when the pack is broad or missing expected files.
77
93
  5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
78
94
 
79
- The result: your agent starts every session with a focused, accurate picture of the relevant code without you doing anything after opt-in.
95
+ The result: your agent starts with a focused map of the relevant code. It should reduce blind exploration, not replace the agent's own file reads or your judgment.
80
96
 
81
97
  ```bash
82
98
  pip install agentpack-cli
83
99
 
100
+ # Show the fastest path for your repo
101
+ agentpack quickstart --task "fix auth token expiry"
102
+
84
103
  # One-time setup per project
85
104
  cd your-project
86
105
  agentpack init # creates config, session, task.md — nothing else needed
@@ -100,13 +119,53 @@ agentpack global-install --dry-run # preview first
100
119
  agentpack global-install
101
120
  ```
102
121
 
103
- Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM.
122
+ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM that can read markdown.
104
123
 
105
124
  ---
106
125
 
107
- ## AgentPack Results (7 days, 21 sessions)
126
+ ## What to expect
127
+
128
+ AgentPack's strongest value is repeatable orientation: it gives the agent a compact first-pass map before tool calls begin.
129
+
130
+ Typical results on large repos:
131
+
132
+ | Signal | What good looks like |
133
+ |---|---|
134
+ | Token reduction | 90-99% smaller than raw repo text |
135
+ | Pack size | Usually 8k-25k tokens for a specific task |
136
+ | Pack time | Seconds on warm cache; first summarize pass is slower |
137
+ | Recall | Should be high for files you later edit; validate with `agentpack benchmark` |
138
+ | Precision | Often modest; summaries are cheap but can still add noise |
108
139
 
109
- ### Token Compression
140
+ The compression number is easy to verify, but it is not the same as usefulness. The important question is: **did AgentPack include the files you actually needed?**
141
+
142
+ Use the built-in eval flow:
143
+
144
+ ```bash
145
+ agentpack benchmark --init
146
+ # add real historical tasks and files you actually changed
147
+ agentpack benchmark --compare --misses
148
+ ```
149
+
150
+ For source checkouts, there is also a small smoke suite:
151
+
152
+ ```bash
153
+ agentpack benchmark --sample-fixtures --misses
154
+ ```
155
+
156
+ This runs FastAPI, Next.js, and mixed Python/TypeScript fixture tasks. It is a sanity check, not a substitute for real repo evals.
157
+
158
+ ### Current quality bar
159
+
160
+ AgentPack is best described as a **map, not a compass**. It is already good at token reduction, changed-file inclusion, related tests, imports, configs, and common concepts like auth/cache/rate limiting. Recent ranking work also improves full-stack tasks by pulling service/controller/schema/handler files when UI routes or pages match the same domain.
161
+
162
+ Known weak spot: recall can still be low on unfamiliar product domains or cross-language flows. Use `benchmark --misses` and `agentpack explain` when an expected file is absent. Those commands show whether the miss was caused by ignore rules, low score, summary floor, budget cutoff, or missing task signal.
163
+
164
+ ### Observed author-run numbers
165
+
166
+ These are local author-session numbers, included as anecdotal context rather than a benchmark claim.
167
+
168
+ #### Token Compression
110
169
 
111
170
  | Metric | Value |
112
171
  |--------|-------|
@@ -118,7 +177,7 @@ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigr
118
177
 
119
178
  Per session: ~4.1M raw repo → ~35K packed context.
120
179
 
121
- ### Cost (Sonnet 4.6, input tokens only)
180
+ #### Cost (Sonnet 4.6, input tokens only)
122
181
 
123
182
  | Scenario | Cost |
124
183
  |----------|------|
@@ -128,7 +187,7 @@ Per session: ~4.1M raw repo → ~35K packed context.
128
187
 
129
188
  > Honest note: raw_tokens = full repo estimate. Real savings depend on how much context you'd pass manually. Compression ratio (99%+) is verifiable; dollar figure is scenario-dependent.
130
189
 
131
- ### Quality Signal
190
+ #### Quality Signal
132
191
 
133
192
  - 42 commits in 7 days (~6/day) vs 4.9/day before
134
193
  - Shift from single-file fixes → multi-system coordinated fixes
@@ -202,9 +261,10 @@ _*`--agent generic` outputs standard markdown. Claude adapter has richer instruc
202
261
 
203
262
  ### What agentpack does NOT do well
204
263
 
205
- - **Interactive sessions on small repos**: if your whole repo is <20k tokens, just use repomix
206
- - **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for that
207
- - **Semantic understanding**: keyword scoring + AST is not a language model precise technical terms in your task description work better than vague ones
264
+ - **Interactive sessions on small repos**: if your whole repo is <20k tokens, a simple repo dump may be enough
265
+ - **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for quick read-only exploration
266
+ - **Guaranteed source-of-truth selection**: AgentPack ranks likely files; it can miss task-critical files. Use `agentpack benchmark --misses`, `agentpack explain`, and normal `rg`/agent file reads for correctness.
267
+ - **Deep semantic understanding**: keyword/concept scoring, imports, symbols, and path roles help, but they are not an LLM-level code understanding system
208
268
 
209
269
  ---
210
270
 
@@ -222,6 +282,14 @@ Requires Python 3.10+.
222
282
 
223
283
  ## Start Once, Then Work Normally
224
284
 
285
+ For a guided two-minute path in any repo:
286
+
287
+ ```bash
288
+ agentpack quickstart --task "fix auth token expiry"
289
+ ```
290
+
291
+ It shows the exact commands to initialize, set task text, generate a first pack, inspect stats, start watch mode, and scaffold a small benchmark file for your own tasks.
292
+
225
293
  The full workflow:
226
294
 
227
295
  ```bash
@@ -642,6 +710,20 @@ Options:
642
710
 
643
711
  ---
644
712
 
713
+ ### `agentpack quickstart`
714
+
715
+ Show the shortest useful path for the current repo.
716
+
717
+ ```bash
718
+ agentpack quickstart
719
+ agentpack quickstart --task "fix auth token expiry"
720
+ agentpack quickstart --task "fix auth token expiry" --write
721
+ ```
722
+
723
+ `quickstart` does not guess at magic. It checks whether `.agentpack/config.toml`, `.agentpack/task.md`, and context packs exist, then prints the next few commands. With `--write`, it writes the supplied task into `.agentpack/task.md`.
724
+
725
+ ---
726
+
645
727
  ### `agentpack session` _(removed)_
646
728
 
647
729
  Session management was removed in v0.1.12. `agentpack init` bootstraps the session automatically. Use `agentpack watch` to keep context current. To change the task, edit `.agentpack/task.md`.
@@ -762,6 +844,8 @@ agentpack benchmark --task "fix auth token expiry" # single task
762
844
  agentpack benchmark --task "fix auth bug" --compare # compare minimal/balanced/deep
763
845
  agentpack benchmark --init # scaffold .agentpack/benchmark.toml
764
846
  agentpack benchmark # run all cases in benchmark.toml
847
+ agentpack benchmark --sample-fixtures # source checkout demo evals
848
+ agentpack benchmark --misses # explain expected-file misses
765
849
  ```
766
850
 
767
851
  Output per case:
@@ -812,6 +896,8 @@ expected_files = [
812
896
  hit: src/auth/session.py, src/auth/token.py
813
897
  ```
814
898
 
899
+ Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
900
+
815
901
  ---
816
902
 
817
903
  ### `agentpack scan`
@@ -840,6 +926,20 @@ When a session is active, shows session panel (agent, mode, started, refresh cou
840
926
 
841
927
  Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
842
928
 
929
+ To build a real usefulness signal for your repo:
930
+
931
+ ```bash
932
+ agentpack benchmark --sample-fixtures
933
+
934
+ agentpack benchmark --init
935
+ # edit .agentpack/benchmark.toml with real tasks + files you actually changed
936
+ agentpack benchmark --compare --misses
937
+ ```
938
+
939
+ `--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
940
+
941
+ For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
942
+
843
943
  ---
844
944
 
845
945
  ### `agentpack status`
@@ -1189,6 +1289,7 @@ src/agentpack/
1189
1289
  pack.py # agentpack pack → PackService.run()
1190
1290
  install.py # agentpack install / global-install → installers/
1191
1291
  init.py # agentpack init
1292
+ quickstart.py # agentpack quickstart — guided first-run commands
1192
1293
  scan.py # agentpack scan
1193
1294
  diff.py # agentpack diff
1194
1295
  status.py # agentpack status
@@ -1197,10 +1298,11 @@ src/agentpack/
1197
1298
  monitor.py # agentpack monitor
1198
1299
  explain.py # agentpack explain
1199
1300
  doctor.py # agentpack doctor
1200
- session.py # agentpack session start/stop/status/refresh
1301
+ hook_cmd.py # agentpack hook — Claude prompt hook + stale detection
1302
+ mcp_cmd.py # agentpack mcp — MCP server entrypoint
1201
1303
  watch.py # agentpack watch — file watcher with debounce
1202
1304
  claude_cmd.py # agentpack claude — refresh + launch claude
1203
- benchmark.py # agentpack benchmark — token efficiency + selection quality
1305
+ benchmark.py # agentpack benchmark — token efficiency, recall, miss diagnostics
1204
1306
  ```
1205
1307
 
1206
1308
  ### Key architectural properties
@@ -1328,7 +1430,17 @@ The more descriptive your branch names (`feat/add-rate-limiting` beats `dev`) an
1328
1430
 
1329
1431
  ### Concept synonym expansion
1330
1432
 
1331
- AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`. Files that implement a concept but don't use its exact name still rank correctly.
1433
+ AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`; domain terms such as `kundali` expand toward astrology/chart/compatibility terms. Files that implement a concept but don't use its exact name can still rank.
1434
+
1435
+ ### Full-stack role boosts
1436
+
1437
+ When a task points at a page, route, or API surface, AgentPack also gives a controlled boost to related implementation roles such as `service`, `controller`, `schema`, `handler`, `repository`, and `client`. This helps full-stack tasks pull backend implementation files instead of only frontend entrypoints.
1438
+
1439
+ This is still heuristic. If a service should have appeared and did not, add it as an `expected_files` entry in `benchmark.toml` and run:
1440
+
1441
+ ```bash
1442
+ agentpack benchmark --compare --misses
1443
+ ```
1332
1444
 
1333
1445
  ### Content-based keyword enrichment
1334
1446
 
@@ -1379,6 +1491,8 @@ agentpack explain --task "fix auth session bug"
1379
1491
 
1380
1492
  Shows ranked scores and reasons before committing to a pack. Use when a file you expect isn't appearing.
1381
1493
 
1494
+ For repeatable evals, prefer `benchmark --misses` because it compares selected files against the files you actually changed for historical tasks.
1495
+
1382
1496
  ### Check what got included and why
1383
1497
 
1384
1498
  Every pack includes a context receipt explaining each file's inclusion or exclusion:
@@ -1410,7 +1524,8 @@ config_file = 60 # was 25 — configs always matter here
1410
1524
  - **Non-destructive**: never overwrites user files; config patching only touches agentpack-managed blocks
1411
1525
  - **Agent-neutral**: architecture is generic; Claude Code is the primary target (deepest integration); Cursor, Windsurf, Codex, and Antigravity are supported but less battle-tested
1412
1526
  - **No daemons**: file watching is opt-in via `agentpack watch`; git hooks run in the background and are opt-in via `install`
1413
- - **Honest**: packed token count reflects real content, not raw repo size
1527
+ - **Measurable**: `benchmark`, `stats`, receipts, and `--misses` are first-class because compression without recall is not enough
1528
+ - **Honest**: packed token count reflects real content, and raw-repo savings are presented separately from practical usefulness
1414
1529
 
1415
1530
  ---
1416
1531
 
@@ -1419,6 +1534,7 @@ config_file = 60 # was 25 — configs always matter here
1419
1534
  - **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
1420
1535
  - **Monorepos**: single-root repos only. If you `agentpack pack` from a monorepo root, all packages are scanned together with no workspace awareness. Workaround: `cd packages/my-pkg && agentpack init && agentpack pack`.
1421
1536
  - **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
1537
+ - **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
1422
1538
  - **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
1423
1539
  - **Token estimates**: uses tiktoken `cl100k_base` — approximate, not exact for Claude's billing.
1424
1540
  - **Large repos (>5k files)**: global auto-bootstrap is skipped for repos over 5,000 files to avoid hangs. Run `agentpack init` explicitly in large codebases.
@@ -1435,6 +1551,37 @@ pip install "agentpack-cli[all]" # watch + mcp
1435
1551
 
1436
1552
  ---
1437
1553
 
1554
+ ## Development
1555
+
1556
+ Clone and run locally:
1557
+
1558
+ ```bash
1559
+ git clone https://github.com/vishal2612200/agentpack.git
1560
+ cd agentpack
1561
+ python -m pip install -e ".[dev,watch,mcp]" build
1562
+ pytest
1563
+ ```
1564
+
1565
+ Useful checks before opening a PR:
1566
+
1567
+ ```bash
1568
+ pytest
1569
+ python -m build
1570
+ agentpack benchmark --sample-fixtures --misses
1571
+ ```
1572
+
1573
+ Good contribution areas:
1574
+
1575
+ - More real-world benchmark fixtures and public repo eval cases
1576
+ - Windows support for hooks and session integrations
1577
+ - Better symbol extraction for Go, Rust, Java, and Kotlin
1578
+ - More precise import/dependency resolution for framework-heavy repos
1579
+ - Ranking regressions with `expected_files` cases that reproduce misses
1580
+
1581
+ Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
1582
+
1583
+ ---
1584
+
1438
1585
  ## License
1439
1586
 
1440
1587
  MIT
@@ -5,11 +5,27 @@
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
7
7
 
8
- > **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
8
+ > **Status: alpha (v0.1.22).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
9
9
  >
10
10
  > **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
11
11
 
12
- **Token-aware context packing for AI coding agents.**
12
+ **Task-aware context packing for AI coding agents.**
13
+
14
+ AgentPack scans a repository, ranks files for the task you are working on, and writes a compact markdown context pack for Claude Code, Cursor, Windsurf, Codex, Antigravity, CI jobs, or any LLM workflow.
15
+
16
+ It is useful when the repo is too large to paste, but you still want the agent to start with more than a blank slate.
17
+
18
+ **What it is**
19
+ - A local CLI for building task-focused context packs
20
+ - A summary cache, import graph, ranking engine, and token-budget selector
21
+ - Optional integrations for popular coding agents
22
+ - An eval harness for measuring whether selected files match files you actually changed
23
+
24
+ **What it is not**
25
+ - Not a coding agent
26
+ - Not a semantic code search engine
27
+ - Not a replacement for manual inspection on high-stakes changes
28
+ - Not yet proven across a large public benchmark suite
13
29
 
14
30
  ---
15
31
 
@@ -32,16 +48,19 @@ None of these scale. On a 200-file codebase, option 1 wastes 5–10 turns just o
32
48
  AgentPack solves this with a one-time offline analysis pass:
33
49
 
34
50
  1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
35
- 2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
36
- 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
37
- 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
51
+ 2. **On each task** — uses git diff, import graph traversal, keyword/concept expansion, implementation-role boosts, and cross-layer relatedness to rank every file.
52
+ 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies and likely implementation files get summaries, everything else gets dropped.
53
+ 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, token-precision metrics, and benchmark miss reports show when the pack is broad or missing expected files.
38
54
  5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
39
55
 
40
- The result: your agent starts every session with a focused, accurate picture of the relevant code without you doing anything after opt-in.
56
+ The result: your agent starts with a focused map of the relevant code. It should reduce blind exploration, not replace the agent's own file reads or your judgment.
41
57
 
42
58
  ```bash
43
59
  pip install agentpack-cli
44
60
 
61
+ # Show the fastest path for your repo
62
+ agentpack quickstart --task "fix auth token expiry"
63
+
45
64
  # One-time setup per project
46
65
  cd your-project
47
66
  agentpack init # creates config, session, task.md — nothing else needed
@@ -61,13 +80,53 @@ agentpack global-install --dry-run # preview first
61
80
  agentpack global-install
62
81
  ```
63
82
 
64
- Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM.
83
+ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigravity** (Google), or any LLM that can read markdown.
65
84
 
66
85
  ---
67
86
 
68
- ## AgentPack Results (7 days, 21 sessions)
87
+ ## What to expect
88
+
89
+ AgentPack's strongest value is repeatable orientation: it gives the agent a compact first-pass map before tool calls begin.
90
+
91
+ Typical results on large repos:
92
+
93
+ | Signal | What good looks like |
94
+ |---|---|
95
+ | Token reduction | 90-99% smaller than raw repo text |
96
+ | Pack size | Usually 8k-25k tokens for a specific task |
97
+ | Pack time | Seconds on warm cache; first summarize pass is slower |
98
+ | Recall | Should be high for files you later edit; validate with `agentpack benchmark` |
99
+ | Precision | Often modest; summaries are cheap but can still add noise |
69
100
 
70
- ### Token Compression
101
+ The compression number is easy to verify, but it is not the same as usefulness. The important question is: **did AgentPack include the files you actually needed?**
102
+
103
+ Use the built-in eval flow:
104
+
105
+ ```bash
106
+ agentpack benchmark --init
107
+ # add real historical tasks and files you actually changed
108
+ agentpack benchmark --compare --misses
109
+ ```
110
+
111
+ For source checkouts, there is also a small smoke suite:
112
+
113
+ ```bash
114
+ agentpack benchmark --sample-fixtures --misses
115
+ ```
116
+
117
+ This runs FastAPI, Next.js, and mixed Python/TypeScript fixture tasks. It is a sanity check, not a substitute for real repo evals.
118
+
119
+ ### Current quality bar
120
+
121
+ AgentPack is best described as a **map, not a compass**. It is already good at token reduction, changed-file inclusion, related tests, imports, configs, and common concepts like auth/cache/rate limiting. Recent ranking work also improves full-stack tasks by pulling service/controller/schema/handler files when UI routes or pages match the same domain.
122
+
123
+ Known weak spot: recall can still be low on unfamiliar product domains or cross-language flows. Use `benchmark --misses` and `agentpack explain` when an expected file is absent. Those commands show whether the miss was caused by ignore rules, low score, summary floor, budget cutoff, or missing task signal.
124
+
125
+ ### Observed author-run numbers
126
+
127
+ These are local author-session numbers, included as anecdotal context rather than a benchmark claim.
128
+
129
+ #### Token Compression
71
130
 
72
131
  | Metric | Value |
73
132
  |--------|-------|
@@ -79,7 +138,7 @@ Supported agents: **Claude Code**, **Cursor**, **Windsurf**, **Codex**, **Antigr
79
138
 
80
139
  Per session: ~4.1M raw repo → ~35K packed context.
81
140
 
82
- ### Cost (Sonnet 4.6, input tokens only)
141
+ #### Cost (Sonnet 4.6, input tokens only)
83
142
 
84
143
  | Scenario | Cost |
85
144
  |----------|------|
@@ -89,7 +148,7 @@ Per session: ~4.1M raw repo → ~35K packed context.
89
148
 
90
149
  > Honest note: raw_tokens = full repo estimate. Real savings depend on how much context you'd pass manually. Compression ratio (99%+) is verifiable; dollar figure is scenario-dependent.
91
150
 
92
- ### Quality Signal
151
+ #### Quality Signal
93
152
 
94
153
  - 42 commits in 7 days (~6/day) vs 4.9/day before
95
154
  - Shift from single-file fixes → multi-system coordinated fixes
@@ -163,9 +222,10 @@ _*`--agent generic` outputs standard markdown. Claude adapter has richer instruc
163
222
 
164
223
  ### What agentpack does NOT do well
165
224
 
166
- - **Interactive sessions on small repos**: if your whole repo is <20k tokens, just use repomix
167
- - **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for that
168
- - **Semantic understanding**: keyword scoring + AST is not a language model precise technical terms in your task description work better than vague ones
225
+ - **Interactive sessions on small repos**: if your whole repo is <20k tokens, a simple repo dump may be enough
226
+ - **One-shot public repo questions**: gitingest's "replace hub with ingest" is faster for quick read-only exploration
227
+ - **Guaranteed source-of-truth selection**: AgentPack ranks likely files; it can miss task-critical files. Use `agentpack benchmark --misses`, `agentpack explain`, and normal `rg`/agent file reads for correctness.
228
+ - **Deep semantic understanding**: keyword/concept scoring, imports, symbols, and path roles help, but they are not an LLM-level code understanding system
169
229
 
170
230
  ---
171
231
 
@@ -183,6 +243,14 @@ Requires Python 3.10+.
183
243
 
184
244
  ## Start Once, Then Work Normally
185
245
 
246
+ For a guided two-minute path in any repo:
247
+
248
+ ```bash
249
+ agentpack quickstart --task "fix auth token expiry"
250
+ ```
251
+
252
+ It shows the exact commands to initialize, set task text, generate a first pack, inspect stats, start watch mode, and scaffold a small benchmark file for your own tasks.
253
+
186
254
  The full workflow:
187
255
 
188
256
  ```bash
@@ -603,6 +671,20 @@ Options:
603
671
 
604
672
  ---
605
673
 
674
+ ### `agentpack quickstart`
675
+
676
+ Show the shortest useful path for the current repo.
677
+
678
+ ```bash
679
+ agentpack quickstart
680
+ agentpack quickstart --task "fix auth token expiry"
681
+ agentpack quickstart --task "fix auth token expiry" --write
682
+ ```
683
+
684
+ `quickstart` does not guess at magic. It checks whether `.agentpack/config.toml`, `.agentpack/task.md`, and context packs exist, then prints the next few commands. With `--write`, it writes the supplied task into `.agentpack/task.md`.
685
+
686
+ ---
687
+
606
688
  ### `agentpack session` _(removed)_
607
689
 
608
690
  Session management was removed in v0.1.12. `agentpack init` bootstraps the session automatically. Use `agentpack watch` to keep context current. To change the task, edit `.agentpack/task.md`.
@@ -723,6 +805,8 @@ agentpack benchmark --task "fix auth token expiry" # single task
723
805
  agentpack benchmark --task "fix auth bug" --compare # compare minimal/balanced/deep
724
806
  agentpack benchmark --init # scaffold .agentpack/benchmark.toml
725
807
  agentpack benchmark # run all cases in benchmark.toml
808
+ agentpack benchmark --sample-fixtures # source checkout demo evals
809
+ agentpack benchmark --misses # explain expected-file misses
726
810
  ```
727
811
 
728
812
  Output per case:
@@ -773,6 +857,8 @@ expected_files = [
773
857
  hit: src/auth/session.py, src/auth/token.py
774
858
  ```
775
859
 
860
+ Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
861
+
776
862
  ---
777
863
 
778
864
  ### `agentpack scan`
@@ -801,6 +887,20 @@ When a session is active, shows session panel (agent, mode, started, refresh cou
801
887
 
802
888
  Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
803
889
 
890
+ To build a real usefulness signal for your repo:
891
+
892
+ ```bash
893
+ agentpack benchmark --sample-fixtures
894
+
895
+ agentpack benchmark --init
896
+ # edit .agentpack/benchmark.toml with real tasks + files you actually changed
897
+ agentpack benchmark --compare --misses
898
+ ```
899
+
900
+ `--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
901
+
902
+ For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
903
+
804
904
  ---
805
905
 
806
906
  ### `agentpack status`
@@ -1150,6 +1250,7 @@ src/agentpack/
1150
1250
  pack.py # agentpack pack → PackService.run()
1151
1251
  install.py # agentpack install / global-install → installers/
1152
1252
  init.py # agentpack init
1253
+ quickstart.py # agentpack quickstart — guided first-run commands
1153
1254
  scan.py # agentpack scan
1154
1255
  diff.py # agentpack diff
1155
1256
  status.py # agentpack status
@@ -1158,10 +1259,11 @@ src/agentpack/
1158
1259
  monitor.py # agentpack monitor
1159
1260
  explain.py # agentpack explain
1160
1261
  doctor.py # agentpack doctor
1161
- session.py # agentpack session start/stop/status/refresh
1262
+ hook_cmd.py # agentpack hook — Claude prompt hook + stale detection
1263
+ mcp_cmd.py # agentpack mcp — MCP server entrypoint
1162
1264
  watch.py # agentpack watch — file watcher with debounce
1163
1265
  claude_cmd.py # agentpack claude — refresh + launch claude
1164
- benchmark.py # agentpack benchmark — token efficiency + selection quality
1266
+ benchmark.py # agentpack benchmark — token efficiency, recall, miss diagnostics
1165
1267
  ```
1166
1268
 
1167
1269
  ### Key architectural properties
@@ -1289,7 +1391,17 @@ The more descriptive your branch names (`feat/add-rate-limiting` beats `dev`) an
1289
1391
 
1290
1392
  ### Concept synonym expansion
1291
1393
 
1292
- AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`. Files that implement a concept but don't use its exact name still rank correctly.
1394
+ AgentPack expands task keywords automatically — "rate limiting" expands to `throttle`, `leaky`, `bucket`, `quota`, `debounce`; "auth" expands to `jwt`, `bearer`, `token`, `oauth`; "cache" expands to `lru`, `memoize`, `redis`, `ttl`; domain terms such as `kundali` expand toward astrology/chart/compatibility terms. Files that implement a concept but don't use its exact name can still rank.
1395
+
1396
+ ### Full-stack role boosts
1397
+
1398
+ When a task points at a page, route, or API surface, AgentPack also gives a controlled boost to related implementation roles such as `service`, `controller`, `schema`, `handler`, `repository`, and `client`. This helps full-stack tasks pull backend implementation files instead of only frontend entrypoints.
1399
+
1400
+ This is still heuristic. If a service should have appeared and did not, add it as an `expected_files` entry in `benchmark.toml` and run:
1401
+
1402
+ ```bash
1403
+ agentpack benchmark --compare --misses
1404
+ ```
1293
1405
 
1294
1406
  ### Content-based keyword enrichment
1295
1407
 
@@ -1340,6 +1452,8 @@ agentpack explain --task "fix auth session bug"
1340
1452
 
1341
1453
  Shows ranked scores and reasons before committing to a pack. Use when a file you expect isn't appearing.
1342
1454
 
1455
+ For repeatable evals, prefer `benchmark --misses` because it compares selected files against the files you actually changed for historical tasks.
1456
+
1343
1457
  ### Check what got included and why
1344
1458
 
1345
1459
  Every pack includes a context receipt explaining each file's inclusion or exclusion:
@@ -1371,7 +1485,8 @@ config_file = 60 # was 25 — configs always matter here
1371
1485
  - **Non-destructive**: never overwrites user files; config patching only touches agentpack-managed blocks
1372
1486
  - **Agent-neutral**: architecture is generic; Claude Code is the primary target (deepest integration); Cursor, Windsurf, Codex, and Antigravity are supported but less battle-tested
1373
1487
  - **No daemons**: file watching is opt-in via `agentpack watch`; git hooks run in the background and are opt-in via `install`
1374
- - **Honest**: packed token count reflects real content, not raw repo size
1488
+ - **Measurable**: `benchmark`, `stats`, receipts, and `--misses` are first-class because compression without recall is not enough
1489
+ - **Honest**: packed token count reflects real content, and raw-repo savings are presented separately from practical usefulness
1375
1490
 
1376
1491
  ---
1377
1492
 
@@ -1380,6 +1495,7 @@ config_file = 60 # was 25 — configs always matter here
1380
1495
  - **Windows**: not supported. Git hooks use POSIX shell (`#!/bin/sh`, `>/dev/null 2>&1 &`). The Claude Code session hooks use `python3` and `rm -f`. Contributions welcome.
1381
1496
  - **Monorepos**: single-root repos only. If you `agentpack pack` from a monorepo root, all packages are scanned together with no workspace awareness. Workaround: `cd packages/my-pkg && agentpack init && agentpack pack`.
1382
1497
  - **Symbol extraction**: Python (AST, full) and JavaScript/TypeScript (regex, arrow functions + classes) are well-supported. Go, Rust, Java, Kotlin have import graph traversal but no symbol extraction — they fall back to file-level summaries.
1498
+ - **Selection recall**: ranking is heuristic. It can miss files when task language differs from code language, when repos have unusual architecture, or when important files are only connected at runtime.
1383
1499
  - **Secret redaction**: covers AWS keys, GitHub tokens, OpenAI/Anthropic keys, JWTs, and private key blocks. Not a substitute for a dedicated secrets scanner on sensitive repos.
1384
1500
  - **Token estimates**: uses tiktoken `cl100k_base` — approximate, not exact for Claude's billing.
1385
1501
  - **Large repos (>5k files)**: global auto-bootstrap is skipped for repos over 5,000 files to avoid hangs. Run `agentpack init` explicitly in large codebases.
@@ -1396,6 +1512,37 @@ pip install "agentpack-cli[all]" # watch + mcp
1396
1512
 
1397
1513
  ---
1398
1514
 
1515
+ ## Development
1516
+
1517
+ Clone and run locally:
1518
+
1519
+ ```bash
1520
+ git clone https://github.com/vishal2612200/agentpack.git
1521
+ cd agentpack
1522
+ python -m pip install -e ".[dev,watch,mcp]" build
1523
+ pytest
1524
+ ```
1525
+
1526
+ Useful checks before opening a PR:
1527
+
1528
+ ```bash
1529
+ pytest
1530
+ python -m build
1531
+ agentpack benchmark --sample-fixtures --misses
1532
+ ```
1533
+
1534
+ Good contribution areas:
1535
+
1536
+ - More real-world benchmark fixtures and public repo eval cases
1537
+ - Windows support for hooks and session integrations
1538
+ - Better symbol extraction for Go, Rust, Java, and Kotlin
1539
+ - More precise import/dependency resolution for framework-heavy repos
1540
+ - Ranking regressions with `expected_files` cases that reproduce misses
1541
+
1542
+ Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
1543
+
1544
+ ---
1545
+
1399
1546
  ## License
1400
1547
 
1401
1548
  MIT
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "agentpack-cli"
3
- version = "0.1.21"
4
- description = "Token-aware context packing for AI coding agents — Claude, Cursor, Windsurf, and Codex"
3
+ version = "0.1.22"
4
+ description = "Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
7
7
  license = {text = "MIT"}