agentpack-cli 0.3.8__tar.gz → 0.3.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/PKG-INFO +103 -3
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/README.md +102 -2
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/pyproject.toml +1 -1
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/__init__.py +1 -1
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/ranking.py +559 -32
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/application/pack_service.py +215 -14
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/cli.py +2 -0
- agentpack_cli-0.3.10/src/agentpack/commands/eval_cmd.py +264 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/explain.py +106 -4
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/install.py +42 -4
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/pack.py +5 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/stats.py +6 -2
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/tune.py +24 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/context_pack.py +21 -0
- agentpack_cli-0.3.10/src/agentpack/core/evals.py +939 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/global_install.py +61 -34
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/.gitignore +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/LICENSE +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/dependency_graph.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/monorepo.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/naming_signals.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/repo_map.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/role_inference.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/symbols.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/task_classifier.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/benchmark.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/doctor.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/guard.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/ignore_cmd.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/mcp_cmd.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/migrate.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/quickstart.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/repair.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/cache.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/config.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/git.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/models.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/task_freshness.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/antigravity.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/claude.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/codex.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/cursor.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/agents.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/platform.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/mcp_server.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/renderers/markdown.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/summaries/base.py +0 -0
- {agentpack_cli-0.3.8 → agentpack_cli-0.3.10}/src/agentpack/summaries/offline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Local context engine for AI coding agents that ranks relevant files and builds task-focused context packs.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -40,13 +40,14 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
# AgentPack
|
|
41
41
|
|
|
42
42
|
[](https://pypi.org/project/agentpack-cli/)
|
|
43
|
+
[](https://pepy.tech/projects/agentpack-cli)
|
|
43
44
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
44
45
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
45
46
|
[](https://pypi.org/project/agentpack-cli/)
|
|
46
47
|
[](https://opensource.org/licenses/MIT)
|
|
47
48
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
48
49
|
|
|
49
|
-
> **Status: alpha (v0.3.
|
|
50
|
+
> **Status: alpha (v0.3.10).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
50
51
|
>
|
|
51
52
|
> **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
|
|
52
53
|
|
|
@@ -601,12 +602,14 @@ Command map:
|
|
|
601
602
|
| `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
|
|
602
603
|
| `agentpack explain` | Understand why a file was selected or omitted |
|
|
603
604
|
| `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
|
|
605
|
+
| `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
|
|
604
606
|
| `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
|
|
605
607
|
| `agentpack status` | Inspect current pack freshness and metadata |
|
|
606
608
|
| `agentpack diff` | Show what changed between context snapshots |
|
|
607
609
|
| `agentpack monitor` | Review recent pack runs and quality signals |
|
|
608
610
|
| `agentpack scan` | Inspect packable, ignored, binary, and largest files |
|
|
609
611
|
| `agentpack global-install` | Install opt-in global hooks for initialized repos |
|
|
612
|
+
| `agentpack global-repair-hooks` | Repair stale global template hooks and current repo git hooks |
|
|
610
613
|
|
|
611
614
|
### `agentpack global-install`
|
|
612
615
|
|
|
@@ -622,7 +625,7 @@ agentpack global-install --agent antigravity # Antigravity
|
|
|
622
625
|
```
|
|
623
626
|
|
|
624
627
|
What it does:
|
|
625
|
-
- **Git template hooks** (`~/.git-templates/hooks/`) — git copies these into every repo on `git init` / `git clone`. On `post-commit`, `post-merge`, `post-checkout
|
|
628
|
+
- **Git template hooks** (`~/.git-templates/hooks/`) — git copies these into every repo on `git init` / `git clone`. On `post-commit`, `post-merge`, `post-checkout` they call AgentPack's cross-platform `GitAutoRepack` hook runner and always exit cleanly. Repacking still happens only in opted-in repos; fresh clones without `.agentpack/config.toml` remain a safe no-op.
|
|
626
629
|
- **Shell cd hook** (`~/.zshrc`, `~/.bashrc`, or the PowerShell profile on Windows) — on `cd` or prompt refresh, repacks if stale **only in opted-in repos**. Never touches repos without `.agentpack/config.toml`. Never auto-inits.
|
|
627
630
|
- **Agent config** — same agent-specific files that `agentpack init --agent <x>` or `agentpack install --agent <x>` writes for the current project.
|
|
628
631
|
|
|
@@ -644,6 +647,27 @@ Preview before committing:
|
|
|
644
647
|
agentpack global-install --dry-run
|
|
645
648
|
```
|
|
646
649
|
|
|
650
|
+
If you installed an older AgentPack build and want to refresh copied git hooks after an upgrade, run:
|
|
651
|
+
|
|
652
|
+
```bash
|
|
653
|
+
agentpack global-repair-hooks
|
|
654
|
+
```
|
|
655
|
+
|
|
656
|
+
That repairs `~/.git-templates/hooks/`, reasserts `git config --global init.templateDir`, and updates the current repo's `.git/hooks/` to the safe `GitAutoRepack` path.
|
|
657
|
+
|
|
658
|
+
### `agentpack global-repair-hooks`
|
|
659
|
+
|
|
660
|
+
Refresh AgentPack's global git template hooks and the current repo's local git hooks after an upgrade.
|
|
661
|
+
|
|
662
|
+
```bash
|
|
663
|
+
agentpack global-repair-hooks
|
|
664
|
+
```
|
|
665
|
+
|
|
666
|
+
Use this when:
|
|
667
|
+
- old template hooks were copied before the `GitAutoRepack` runner existed
|
|
668
|
+
- a stale hook script still shells out directly instead of calling `agentpack hook`
|
|
669
|
+
- you want new clones and the current repo to pick up the latest non-destructive hook behavior immediately
|
|
670
|
+
|
|
647
671
|
---
|
|
648
672
|
|
|
649
673
|
### `agentpack global-uninstall`
|
|
@@ -1189,6 +1213,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
|
|
|
1189
1213
|
|
|
1190
1214
|
---
|
|
1191
1215
|
|
|
1216
|
+
### `agentpack eval`
|
|
1217
|
+
|
|
1218
|
+
Run deterministic failure evals. AgentPack does not run the coding agent and
|
|
1219
|
+
does not use an LLM judge; it verifies the current or replayed worktree with
|
|
1220
|
+
commands and diff policies.
|
|
1221
|
+
|
|
1222
|
+
```bash
|
|
1223
|
+
agentpack eval --init
|
|
1224
|
+
# edit .agentpack/evals.toml with real failures and checks
|
|
1225
|
+
agentpack eval
|
|
1226
|
+
agentpack eval --case auth-timeout --prove-targets
|
|
1227
|
+
agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
|
|
1228
|
+
agentpack eval --watch --until-pass
|
|
1229
|
+
agentpack eval --replay --prove-targets
|
|
1230
|
+
agentpack eval --variant baseline
|
|
1231
|
+
agentpack eval --variant agentpack
|
|
1232
|
+
agentpack eval --compare-variants baseline:agentpack
|
|
1233
|
+
agentpack eval --ci-template
|
|
1234
|
+
agentpack eval --report
|
|
1235
|
+
```
|
|
1236
|
+
|
|
1237
|
+
Example case:
|
|
1238
|
+
|
|
1239
|
+
```toml
|
|
1240
|
+
[[cases]]
|
|
1241
|
+
id = "auth-timeout"
|
|
1242
|
+
task = "fix auth token timeout"
|
|
1243
|
+
failure_class = "context"
|
|
1244
|
+
failure_source = "agent_failed"
|
|
1245
|
+
base_ref = "HEAD"
|
|
1246
|
+
patch_file = ".agentpack/evals/auth-timeout.patch"
|
|
1247
|
+
required_changed_files = ["src/auth/token.py"]
|
|
1248
|
+
forbidden_changed_files = ["src/db/**"]
|
|
1249
|
+
max_changed_files = 5
|
|
1250
|
+
max_changed_lines = 250
|
|
1251
|
+
agent = "codex"
|
|
1252
|
+
context_file = ".agentpack/context.md"
|
|
1253
|
+
context_hash = "..."
|
|
1254
|
+
selected_files = ["src/auth/token.py", "tests/test_auth.py"]
|
|
1255
|
+
|
|
1256
|
+
[[cases.checks]]
|
|
1257
|
+
name = "tests"
|
|
1258
|
+
command = "pytest tests/test_auth.py -q"
|
|
1259
|
+
timeout_s = 120
|
|
1260
|
+
retries = 1 # optional, marks pass-after-fail checks as flaky
|
|
1261
|
+
```
|
|
1262
|
+
|
|
1263
|
+
Use `eval` after an agent run: capture the real failure, add deterministic
|
|
1264
|
+
checks such as tests, typecheck, lint, schema validation, API contract tests,
|
|
1265
|
+
diff size, forbidden files, or golden outputs, then rerun until the harness
|
|
1266
|
+
passes. The model can propose; the harness must verify.
|
|
1267
|
+
|
|
1268
|
+
For hands-free local iteration, keep `agentpack eval --watch --until-pass`
|
|
1269
|
+
running in a terminal while the agent or developer edits. It reruns when the
|
|
1270
|
+
case file, patch artifacts, golden files, or git diff content changes and stops
|
|
1271
|
+
when all deterministic checks pass. `--capture` stores the current patch under
|
|
1272
|
+
`.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
|
|
1273
|
+
`base_ref` into an isolated git worktree, applies that patch, and runs the same
|
|
1274
|
+
deterministic checks there. To measure AgentPack's contribution, run the same
|
|
1275
|
+
case with `--variant baseline` and then with `--variant agentpack`;
|
|
1276
|
+
`--compare-variants baseline:agentpack` reports which cases improved, regressed,
|
|
1277
|
+
stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
|
|
1278
|
+
GitHub Actions workflow for `benchmarks/evals.toml`.
|
|
1279
|
+
|
|
1280
|
+
Eval files are executable trust boundaries: commands in `checks.command` run
|
|
1281
|
+
locally and in CI. Review eval TOML from contributors with the same care as
|
|
1282
|
+
shell scripts or workflow files.
|
|
1283
|
+
|
|
1284
|
+
Captured patch artifacts are secret-scanned with the same local redactor used
|
|
1285
|
+
for context packs before they are written. If a patch line contains a real
|
|
1286
|
+
secret, the artifact stores `[REDACTED:<type>]` and the case records
|
|
1287
|
+
`patch_redaction_warnings`. Secret-bearing patches may replay with redacted
|
|
1288
|
+
values; replace secrets with safe fixture values when exact replay matters.
|
|
1289
|
+
|
|
1290
|
+
---
|
|
1291
|
+
|
|
1192
1292
|
### `agentpack status`
|
|
1193
1293
|
|
|
1194
1294
|
Check whether the context pack is stale.
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# AgentPack
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/agentpack-cli/)
|
|
4
|
+
[](https://pepy.tech/projects/agentpack-cli)
|
|
4
5
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
5
6
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
6
7
|
[](https://pypi.org/project/agentpack-cli/)
|
|
7
8
|
[](https://opensource.org/licenses/MIT)
|
|
8
9
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
9
10
|
|
|
10
|
-
> **Status: alpha (v0.3.
|
|
11
|
+
> **Status: alpha (v0.3.10).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
11
12
|
>
|
|
12
13
|
> **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
|
|
13
14
|
|
|
@@ -562,12 +563,14 @@ Command map:
|
|
|
562
563
|
| `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
|
|
563
564
|
| `agentpack explain` | Understand why a file was selected or omitted |
|
|
564
565
|
| `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
|
|
566
|
+
| `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
|
|
565
567
|
| `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
|
|
566
568
|
| `agentpack status` | Inspect current pack freshness and metadata |
|
|
567
569
|
| `agentpack diff` | Show what changed between context snapshots |
|
|
568
570
|
| `agentpack monitor` | Review recent pack runs and quality signals |
|
|
569
571
|
| `agentpack scan` | Inspect packable, ignored, binary, and largest files |
|
|
570
572
|
| `agentpack global-install` | Install opt-in global hooks for initialized repos |
|
|
573
|
+
| `agentpack global-repair-hooks` | Repair stale global template hooks and current repo git hooks |
|
|
571
574
|
|
|
572
575
|
### `agentpack global-install`
|
|
573
576
|
|
|
@@ -583,7 +586,7 @@ agentpack global-install --agent antigravity # Antigravity
|
|
|
583
586
|
```
|
|
584
587
|
|
|
585
588
|
What it does:
|
|
586
|
-
- **Git template hooks** (`~/.git-templates/hooks/`) — git copies these into every repo on `git init` / `git clone`. On `post-commit`, `post-merge`, `post-checkout
|
|
589
|
+
- **Git template hooks** (`~/.git-templates/hooks/`) — git copies these into every repo on `git init` / `git clone`. On `post-commit`, `post-merge`, `post-checkout` they call AgentPack's cross-platform `GitAutoRepack` hook runner and always exit cleanly. Repacking still happens only in opted-in repos; fresh clones without `.agentpack/config.toml` remain a safe no-op.
|
|
587
590
|
- **Shell cd hook** (`~/.zshrc`, `~/.bashrc`, or the PowerShell profile on Windows) — on `cd` or prompt refresh, repacks if stale **only in opted-in repos**. Never touches repos without `.agentpack/config.toml`. Never auto-inits.
|
|
588
591
|
- **Agent config** — same agent-specific files that `agentpack init --agent <x>` or `agentpack install --agent <x>` writes for the current project.
|
|
589
592
|
|
|
@@ -605,6 +608,27 @@ Preview before committing:
|
|
|
605
608
|
agentpack global-install --dry-run
|
|
606
609
|
```
|
|
607
610
|
|
|
611
|
+
If you installed an older AgentPack build and want to refresh copied git hooks after an upgrade, run:
|
|
612
|
+
|
|
613
|
+
```bash
|
|
614
|
+
agentpack global-repair-hooks
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
That repairs `~/.git-templates/hooks/`, reasserts `git config --global init.templateDir`, and updates the current repo's `.git/hooks/` to the safe `GitAutoRepack` path.
|
|
618
|
+
|
|
619
|
+
### `agentpack global-repair-hooks`
|
|
620
|
+
|
|
621
|
+
Refresh AgentPack's global git template hooks and the current repo's local git hooks after an upgrade.
|
|
622
|
+
|
|
623
|
+
```bash
|
|
624
|
+
agentpack global-repair-hooks
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
Use this when:
|
|
628
|
+
- old template hooks were copied before the `GitAutoRepack` runner existed
|
|
629
|
+
- a stale hook script still shells out directly instead of calling `agentpack hook`
|
|
630
|
+
- you want new clones and the current repo to pick up the latest non-destructive hook behavior immediately
|
|
631
|
+
|
|
608
632
|
---
|
|
609
633
|
|
|
610
634
|
### `agentpack global-uninstall`
|
|
@@ -1150,6 +1174,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
|
|
|
1150
1174
|
|
|
1151
1175
|
---
|
|
1152
1176
|
|
|
1177
|
+
### `agentpack eval`
|
|
1178
|
+
|
|
1179
|
+
Run deterministic failure evals. AgentPack does not run the coding agent and
|
|
1180
|
+
does not use an LLM judge; it verifies the current or replayed worktree with
|
|
1181
|
+
commands and diff policies.
|
|
1182
|
+
|
|
1183
|
+
```bash
|
|
1184
|
+
agentpack eval --init
|
|
1185
|
+
# edit .agentpack/evals.toml with real failures and checks
|
|
1186
|
+
agentpack eval
|
|
1187
|
+
agentpack eval --case auth-timeout --prove-targets
|
|
1188
|
+
agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
|
|
1189
|
+
agentpack eval --watch --until-pass
|
|
1190
|
+
agentpack eval --replay --prove-targets
|
|
1191
|
+
agentpack eval --variant baseline
|
|
1192
|
+
agentpack eval --variant agentpack
|
|
1193
|
+
agentpack eval --compare-variants baseline:agentpack
|
|
1194
|
+
agentpack eval --ci-template
|
|
1195
|
+
agentpack eval --report
|
|
1196
|
+
```
|
|
1197
|
+
|
|
1198
|
+
Example case:
|
|
1199
|
+
|
|
1200
|
+
```toml
|
|
1201
|
+
[[cases]]
|
|
1202
|
+
id = "auth-timeout"
|
|
1203
|
+
task = "fix auth token timeout"
|
|
1204
|
+
failure_class = "context"
|
|
1205
|
+
failure_source = "agent_failed"
|
|
1206
|
+
base_ref = "HEAD"
|
|
1207
|
+
patch_file = ".agentpack/evals/auth-timeout.patch"
|
|
1208
|
+
required_changed_files = ["src/auth/token.py"]
|
|
1209
|
+
forbidden_changed_files = ["src/db/**"]
|
|
1210
|
+
max_changed_files = 5
|
|
1211
|
+
max_changed_lines = 250
|
|
1212
|
+
agent = "codex"
|
|
1213
|
+
context_file = ".agentpack/context.md"
|
|
1214
|
+
context_hash = "..."
|
|
1215
|
+
selected_files = ["src/auth/token.py", "tests/test_auth.py"]
|
|
1216
|
+
|
|
1217
|
+
[[cases.checks]]
|
|
1218
|
+
name = "tests"
|
|
1219
|
+
command = "pytest tests/test_auth.py -q"
|
|
1220
|
+
timeout_s = 120
|
|
1221
|
+
retries = 1 # optional, marks pass-after-fail checks as flaky
|
|
1222
|
+
```
|
|
1223
|
+
|
|
1224
|
+
Use `eval` after an agent run: capture the real failure, add deterministic
|
|
1225
|
+
checks such as tests, typecheck, lint, schema validation, API contract tests,
|
|
1226
|
+
diff size, forbidden files, or golden outputs, then rerun until the harness
|
|
1227
|
+
passes. The model can propose; the harness must verify.
|
|
1228
|
+
|
|
1229
|
+
For hands-free local iteration, keep `agentpack eval --watch --until-pass`
|
|
1230
|
+
running in a terminal while the agent or developer edits. It reruns when the
|
|
1231
|
+
case file, patch artifacts, golden files, or git diff content changes and stops
|
|
1232
|
+
when all deterministic checks pass. `--capture` stores the current patch under
|
|
1233
|
+
`.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
|
|
1234
|
+
`base_ref` into an isolated git worktree, applies that patch, and runs the same
|
|
1235
|
+
deterministic checks there. To measure AgentPack's contribution, run the same
|
|
1236
|
+
case with `--variant baseline` and then with `--variant agentpack`;
|
|
1237
|
+
`--compare-variants baseline:agentpack` reports which cases improved, regressed,
|
|
1238
|
+
stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
|
|
1239
|
+
GitHub Actions workflow for `benchmarks/evals.toml`.
|
|
1240
|
+
|
|
1241
|
+
Eval files are executable trust boundaries: commands in `checks.command` run
|
|
1242
|
+
locally and in CI. Review eval TOML from contributors with the same care as
|
|
1243
|
+
shell scripts or workflow files.
|
|
1244
|
+
|
|
1245
|
+
Captured patch artifacts are secret-scanned with the same local redactor used
|
|
1246
|
+
for context packs before they are written. If a patch line contains a real
|
|
1247
|
+
secret, the artifact stores `[REDACTED:<type>]` and the case records
|
|
1248
|
+
`patch_redaction_warnings`. Secret-bearing patches may replay with redacted
|
|
1249
|
+
values; replace secrets with safe fixture values when exact replay matters.
|
|
1250
|
+
|
|
1251
|
+
---
|
|
1252
|
+
|
|
1153
1253
|
### `agentpack status`
|
|
1154
1254
|
|
|
1155
1255
|
Check whether the context pack is stale.
|