agentpack-cli 0.3.9__tar.gz → 0.3.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/PKG-INFO +148 -2
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/README.md +147 -1
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/pyproject.toml +1 -1
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/__init__.py +1 -1
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/cli.py +6 -0
- agentpack_cli-0.3.11/src/agentpack/commands/eval_cmd.py +264 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/mcp_cmd.py +1 -1
- agentpack_cli-0.3.11/src/agentpack/commands/route.py +29 -0
- agentpack_cli-0.3.11/src/agentpack/commands/skills.py +46 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/tune.py +24 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/config.py +20 -0
- agentpack_cli-0.3.11/src/agentpack/core/evals.py +939 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/antigravity.py +4 -3
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/claude.py +3 -2
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/codex.py +4 -3
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/cursor.py +8 -6
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/mcp_server.py +46 -0
- agentpack_cli-0.3.11/src/agentpack/router/__init__.py +23 -0
- agentpack_cli-0.3.11/src/agentpack/router/discovery.py +106 -0
- agentpack_cli-0.3.11/src/agentpack/router/models.py +67 -0
- agentpack_cli-0.3.11/src/agentpack/router/parser.py +240 -0
- agentpack_cli-0.3.11/src/agentpack/router/prompt_builder.py +87 -0
- agentpack_cli-0.3.11/src/agentpack/router/scoring.py +110 -0
- agentpack_cli-0.3.11/src/agentpack/router/service.py +156 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/.gitignore +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/LICENSE +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/dependency_graph.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/monorepo.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/naming_signals.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/ranking.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/repo_map.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/role_inference.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/symbols.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/task_classifier.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/application/pack_service.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/benchmark.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/doctor.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/explain.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/guard.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/ignore_cmd.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/install.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/migrate.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/pack.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/quickstart.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/repair.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/stats.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/cache.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/context_pack.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/git.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/models.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/task_freshness.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/agents.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/global_install.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/platform.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/markdown.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/base.py +0 -0
- {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/offline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.11
|
|
4
4
|
Summary: Local context engine for AI coding agents that ranks relevant files and builds task-focused context packs.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -40,13 +40,14 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
# AgentPack
|
|
41
41
|
|
|
42
42
|
[](https://pypi.org/project/agentpack-cli/)
|
|
43
|
+
[](https://pepy.tech/projects/agentpack-cli)
|
|
43
44
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
44
45
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
45
46
|
[](https://pypi.org/project/agentpack-cli/)
|
|
46
47
|
[](https://opensource.org/licenses/MIT)
|
|
47
48
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
48
49
|
|
|
49
|
-
> **Status: alpha (v0.3.
|
|
50
|
+
> **Status: alpha (v0.3.11).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
50
51
|
>
|
|
51
52
|
> **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
|
|
52
53
|
|
|
@@ -64,6 +65,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
|
|
|
64
65
|
- [Quality Bar](#quality-bar)
|
|
65
66
|
- [Download Stats](#download-stats)
|
|
66
67
|
- [Debugging Selection](#debugging-selection)
|
|
68
|
+
- [Task Router](#task-router)
|
|
67
69
|
- [Supported Integrations](#supported-integrations)
|
|
68
70
|
- [Commands](#commands)
|
|
69
71
|
- [Architecture](#architecture)
|
|
@@ -78,6 +80,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
|
|
|
78
80
|
- **Local code intelligence**: extracts roles, domains, entrypoints, definitions, dependencies, env reads, side effects, and external systems using static analysis.
|
|
79
81
|
- **Semantic repo map**: adds a compact module-level map before file context so agents orient faster.
|
|
80
82
|
- **Freshness and deltas**: records task source, git state, snapshot hashes, selected-file deltas, stale-context warnings, MCP auto-refresh signals, and a machine-readable `agentpack:freshness` block in markdown fallback artifacts.
|
|
83
|
+
- **Task router**: MCP and CLI surfaces route a task to relevant files, scoped rules, installed skills, suggested commands, and safety warnings without executing skills automatically.
|
|
81
84
|
- **Agent integrations**: installs Claude Code, Cursor, Windsurf, Codex, Antigravity, VS Code tasks, git hooks, and MCP configuration.
|
|
82
85
|
- **Local and measurable**: no API calls for scan, summarize, rank, pack, stats, or benchmark; quality is measured with expected-file evals.
|
|
83
86
|
|
|
@@ -291,6 +294,40 @@ agentpack guard --agent auto --repair-stale --refresh-context
|
|
|
291
294
|
|
|
292
295
|
`guard` checks pack freshness, task freshness, repo snapshot freshness, and installed agent rules/hooks. With `--repair-stale --refresh-context`, it repairs stale AgentPack rule files and refreshes missing or stale context before returning success. `agentpack pack` also self-heals stale AgentPack rule blocks for the active agent, so older installs that still run `pack` get upgraded opportunistically.
|
|
293
296
|
|
|
297
|
+
## Task Router
|
|
298
|
+
|
|
299
|
+
AgentPack Router is the MCP-first path for agents that need a task map before loading full context. It returns:
|
|
300
|
+
|
|
301
|
+
- files to read first
|
|
302
|
+
- repo and tool rules to apply
|
|
303
|
+
- installed skills to consider
|
|
304
|
+
- commands to consider, never execute automatically
|
|
305
|
+
- safety warnings for external side-effect skills
|
|
306
|
+
- an agent-ready prompt block
|
|
307
|
+
|
|
308
|
+
Use MCP when available:
|
|
309
|
+
|
|
310
|
+
```text
|
|
311
|
+
route_task("fix flaky payment webhook test")
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Use CLI for inspection or scripting:
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
agentpack skills scan
|
|
318
|
+
agentpack skills index
|
|
319
|
+
agentpack route --task "fix flaky payment webhook test"
|
|
320
|
+
agentpack route --task "fix flaky payment webhook test" --format json
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
Router reads skills and rules from `.claude/skills/`, `~/.claude/skills/`, `~/.codex/skills/`, `~/.agents/skills/`, `.agentpack/skills/`, `.cursor/rules/`, `AGENTS.md`, `CLAUDE.md`, and `GEMINI.md`. Rules are mandatory scoped instructions; skills are optional recommendations. The local `.agentpack/skills_index.json` stores metadata only and omits raw skill/rule bodies.
|
|
324
|
+
|
|
325
|
+
Safety defaults:
|
|
326
|
+
|
|
327
|
+
- skills are recommended, not executed
|
|
328
|
+
- suggested commands are returned as strings with reasons
|
|
329
|
+
- external side-effect skills, such as deploy or cloud mutation checklists, are warned and not selected unless explicitly allowed in config
|
|
330
|
+
|
|
294
331
|
## Before / After Agent Behavior
|
|
295
332
|
|
|
296
333
|
Without AgentPack:
|
|
@@ -597,10 +634,14 @@ Command map:
|
|
|
597
634
|
| `agentpack install` | Refresh or add an agent integration without changing project state |
|
|
598
635
|
| `agentpack repair` | Restore missing or drifted integration files |
|
|
599
636
|
| `agentpack pack` | Generate a ranked context pack for one task |
|
|
637
|
+
| `agentpack route` | Route a task to files, rules, skills, commands, and safety warnings |
|
|
638
|
+
| `agentpack skills scan` | Print discovered local/global skills and rules |
|
|
639
|
+
| `agentpack skills index` | Write `.agentpack/skills_index.json` metadata for faster routing |
|
|
600
640
|
| `agentpack watch` | Keep the context pack fresh while you work |
|
|
601
641
|
| `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
|
|
602
642
|
| `agentpack explain` | Understand why a file was selected or omitted |
|
|
603
643
|
| `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
|
|
644
|
+
| `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
|
|
604
645
|
| `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
|
|
605
646
|
| `agentpack status` | Inspect current pack freshness and metadata |
|
|
606
647
|
| `agentpack diff` | Show what changed between context snapshots |
|
|
@@ -893,6 +934,32 @@ This keeps unrelated dirty files from consuming the whole context budget while p
|
|
|
893
934
|
|
|
894
935
|
---
|
|
895
936
|
|
|
937
|
+
### `agentpack route`
|
|
938
|
+
|
|
939
|
+
Route a task without writing context files. This is the CLI debug/admin surface for the same router used by MCP `route_task`.
|
|
940
|
+
|
|
941
|
+
```bash
|
|
942
|
+
agentpack route --task "fix flaky payment webhook test"
|
|
943
|
+
agentpack route --task "fix flaky payment webhook test" --format json
|
|
944
|
+
```
|
|
945
|
+
|
|
946
|
+
Output includes relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt. It uses the existing AgentPack file ranker in memory and does not write `.agentpack/context.md`.
|
|
947
|
+
|
|
948
|
+
---
|
|
949
|
+
|
|
950
|
+
### `agentpack skills`
|
|
951
|
+
|
|
952
|
+
Inspect or index installed skills and rule files.
|
|
953
|
+
|
|
954
|
+
```bash
|
|
955
|
+
agentpack skills scan
|
|
956
|
+
agentpack skills index
|
|
957
|
+
```
|
|
958
|
+
|
|
959
|
+
`scan` prints discovered artifacts. `index` writes `.agentpack/skills_index.json` with metadata only; raw skill and rule bodies are omitted from the index.
|
|
960
|
+
|
|
961
|
+
---
|
|
962
|
+
|
|
896
963
|
### `agentpack quickstart`
|
|
897
964
|
|
|
898
965
|
Show the shortest useful path for the current repo.
|
|
@@ -984,6 +1051,9 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
984
1051
|
|
|
985
1052
|
| Tool | Description |
|
|
986
1053
|
|---|---|
|
|
1054
|
+
| `route_task(task)` | Read-only task router. Returns relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt as JSON. |
|
|
1055
|
+
| `get_skills()` | Return discovered skill/rule inventory as JSON. |
|
|
1056
|
+
| `explain_route(task)` | Return route JSON with positive skill score reasons for debugging router choices. |
|
|
987
1057
|
| `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
|
|
988
1058
|
| `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
|
|
989
1059
|
| `get_context()` | Return the latest pack. If `.agentpack/task.md` or the repo snapshot differs from the packed metadata, it auto-refreshes before returning; otherwise it prepends a freshness header. |
|
|
@@ -1211,6 +1281,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
|
|
|
1211
1281
|
|
|
1212
1282
|
---
|
|
1213
1283
|
|
|
1284
|
+
### `agentpack eval`
|
|
1285
|
+
|
|
1286
|
+
Run deterministic failure evals. AgentPack does not run the coding agent and
|
|
1287
|
+
does not use an LLM judge; it verifies the current or replayed worktree with
|
|
1288
|
+
commands and diff policies.
|
|
1289
|
+
|
|
1290
|
+
```bash
|
|
1291
|
+
agentpack eval --init
|
|
1292
|
+
# edit .agentpack/evals.toml with real failures and checks
|
|
1293
|
+
agentpack eval
|
|
1294
|
+
agentpack eval --case auth-timeout --prove-targets
|
|
1295
|
+
agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
|
|
1296
|
+
agentpack eval --watch --until-pass
|
|
1297
|
+
agentpack eval --replay --prove-targets
|
|
1298
|
+
agentpack eval --variant baseline
|
|
1299
|
+
agentpack eval --variant agentpack
|
|
1300
|
+
agentpack eval --compare-variants baseline:agentpack
|
|
1301
|
+
agentpack eval --ci-template
|
|
1302
|
+
agentpack eval --report
|
|
1303
|
+
```
|
|
1304
|
+
|
|
1305
|
+
Example case:
|
|
1306
|
+
|
|
1307
|
+
```toml
|
|
1308
|
+
[[cases]]
|
|
1309
|
+
id = "auth-timeout"
|
|
1310
|
+
task = "fix auth token timeout"
|
|
1311
|
+
failure_class = "context"
|
|
1312
|
+
failure_source = "agent_failed"
|
|
1313
|
+
base_ref = "HEAD"
|
|
1314
|
+
patch_file = ".agentpack/evals/auth-timeout.patch"
|
|
1315
|
+
required_changed_files = ["src/auth/token.py"]
|
|
1316
|
+
forbidden_changed_files = ["src/db/**"]
|
|
1317
|
+
max_changed_files = 5
|
|
1318
|
+
max_changed_lines = 250
|
|
1319
|
+
agent = "codex"
|
|
1320
|
+
context_file = ".agentpack/context.md"
|
|
1321
|
+
context_hash = "..."
|
|
1322
|
+
selected_files = ["src/auth/token.py", "tests/test_auth.py"]
|
|
1323
|
+
|
|
1324
|
+
[[cases.checks]]
|
|
1325
|
+
name = "tests"
|
|
1326
|
+
command = "pytest tests/test_auth.py -q"
|
|
1327
|
+
timeout_s = 120
|
|
1328
|
+
retries = 1 # optional, marks pass-after-fail checks as flaky
|
|
1329
|
+
```
|
|
1330
|
+
|
|
1331
|
+
Use `eval` after an agent run: capture the real failure, add deterministic
|
|
1332
|
+
checks such as tests, typecheck, lint, schema validation, API contract tests,
|
|
1333
|
+
diff size, forbidden files, or golden outputs, then rerun until the harness
|
|
1334
|
+
passes. The model can propose; the harness must verify.
|
|
1335
|
+
|
|
1336
|
+
For hands-free local iteration, keep `agentpack eval --watch --until-pass`
|
|
1337
|
+
running in a terminal while the agent or developer edits. It reruns when the
|
|
1338
|
+
case file, patch artifacts, golden files, or git diff content changes and stops
|
|
1339
|
+
when all deterministic checks pass. `--capture` stores the current patch under
|
|
1340
|
+
`.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
|
|
1341
|
+
`base_ref` into an isolated git worktree, applies that patch, and runs the same
|
|
1342
|
+
deterministic checks there. To measure AgentPack's contribution, run the same
|
|
1343
|
+
case with `--variant baseline` and then with `--variant agentpack`;
|
|
1344
|
+
`--compare-variants baseline:agentpack` reports which cases improved, regressed,
|
|
1345
|
+
stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
|
|
1346
|
+
GitHub Actions workflow for `benchmarks/evals.toml`.
|
|
1347
|
+
|
|
1348
|
+
Eval files are executable trust boundaries: commands in `checks.command` run
|
|
1349
|
+
locally and in CI. Review eval TOML from contributors with the same care as
|
|
1350
|
+
shell scripts or workflow files.
|
|
1351
|
+
|
|
1352
|
+
Captured patch artifacts are secret-scanned with the same local redactor used
|
|
1353
|
+
for context packs before they are written. If a patch line contains a real
|
|
1354
|
+
secret, the artifact stores `[REDACTED:<type>]` and the case records
|
|
1355
|
+
`patch_redaction_warnings`. Secret-bearing patches may replay with redacted
|
|
1356
|
+
values; replace secrets with safe fixture values when exact replay matters.
|
|
1357
|
+
|
|
1358
|
+
---
|
|
1359
|
+
|
|
1214
1360
|
### `agentpack status`
|
|
1215
1361
|
|
|
1216
1362
|
Check whether the context pack is stale.
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# AgentPack
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/agentpack-cli/)
|
|
4
|
+
[](https://pepy.tech/projects/agentpack-cli)
|
|
4
5
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
5
6
|
[](https://www.npmjs.com/package/@vishal2612200/agentpack)
|
|
6
7
|
[](https://pypi.org/project/agentpack-cli/)
|
|
7
8
|
[](https://opensource.org/licenses/MIT)
|
|
8
9
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
9
10
|
|
|
10
|
-
> **Status: alpha (v0.3.
|
|
11
|
+
> **Status: alpha (v0.3.11).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
|
|
11
12
|
>
|
|
12
13
|
> **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
|
|
13
14
|
|
|
@@ -25,6 +26,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
|
|
|
25
26
|
- [Quality Bar](#quality-bar)
|
|
26
27
|
- [Download Stats](#download-stats)
|
|
27
28
|
- [Debugging Selection](#debugging-selection)
|
|
29
|
+
- [Task Router](#task-router)
|
|
28
30
|
- [Supported Integrations](#supported-integrations)
|
|
29
31
|
- [Commands](#commands)
|
|
30
32
|
- [Architecture](#architecture)
|
|
@@ -39,6 +41,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
|
|
|
39
41
|
- **Local code intelligence**: extracts roles, domains, entrypoints, definitions, dependencies, env reads, side effects, and external systems using static analysis.
|
|
40
42
|
- **Semantic repo map**: adds a compact module-level map before file context so agents orient faster.
|
|
41
43
|
- **Freshness and deltas**: records task source, git state, snapshot hashes, selected-file deltas, stale-context warnings, MCP auto-refresh signals, and a machine-readable `agentpack:freshness` block in markdown fallback artifacts.
|
|
44
|
+
- **Task router**: MCP and CLI surfaces route a task to relevant files, scoped rules, installed skills, suggested commands, and safety warnings without executing skills automatically.
|
|
42
45
|
- **Agent integrations**: installs Claude Code, Cursor, Windsurf, Codex, Antigravity, VS Code tasks, git hooks, and MCP configuration.
|
|
43
46
|
- **Local and measurable**: no API calls for scan, summarize, rank, pack, stats, or benchmark; quality is measured with expected-file evals.
|
|
44
47
|
|
|
@@ -252,6 +255,40 @@ agentpack guard --agent auto --repair-stale --refresh-context
|
|
|
252
255
|
|
|
253
256
|
`guard` checks pack freshness, task freshness, repo snapshot freshness, and installed agent rules/hooks. With `--repair-stale --refresh-context`, it repairs stale AgentPack rule files and refreshes missing or stale context before returning success. `agentpack pack` also self-heals stale AgentPack rule blocks for the active agent, so older installs that still run `pack` get upgraded opportunistically.
|
|
254
257
|
|
|
258
|
+
## Task Router
|
|
259
|
+
|
|
260
|
+
AgentPack Router is the MCP-first path for agents that need a task map before loading full context. It returns:
|
|
261
|
+
|
|
262
|
+
- files to read first
|
|
263
|
+
- repo and tool rules to apply
|
|
264
|
+
- installed skills to consider
|
|
265
|
+
- commands to consider, never execute automatically
|
|
266
|
+
- safety warnings for external side-effect skills
|
|
267
|
+
- an agent-ready prompt block
|
|
268
|
+
|
|
269
|
+
Use MCP when available:
|
|
270
|
+
|
|
271
|
+
```text
|
|
272
|
+
route_task("fix flaky payment webhook test")
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Use CLI for inspection or scripting:
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
agentpack skills scan
|
|
279
|
+
agentpack skills index
|
|
280
|
+
agentpack route --task "fix flaky payment webhook test"
|
|
281
|
+
agentpack route --task "fix flaky payment webhook test" --format json
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Router reads skills and rules from `.claude/skills/`, `~/.claude/skills/`, `~/.codex/skills/`, `~/.agents/skills/`, `.agentpack/skills/`, `.cursor/rules/`, `AGENTS.md`, `CLAUDE.md`, and `GEMINI.md`. Rules are mandatory scoped instructions; skills are optional recommendations. The local `.agentpack/skills_index.json` stores metadata only and omits raw skill/rule bodies.
|
|
285
|
+
|
|
286
|
+
Safety defaults:
|
|
287
|
+
|
|
288
|
+
- skills are recommended, not executed
|
|
289
|
+
- suggested commands are returned as strings with reasons
|
|
290
|
+
- external side-effect skills, such as deploy or cloud mutation checklists, are warned and not selected unless explicitly allowed in config
|
|
291
|
+
|
|
255
292
|
## Before / After Agent Behavior
|
|
256
293
|
|
|
257
294
|
Without AgentPack:
|
|
@@ -558,10 +595,14 @@ Command map:
|
|
|
558
595
|
| `agentpack install` | Refresh or add an agent integration without changing project state |
|
|
559
596
|
| `agentpack repair` | Restore missing or drifted integration files |
|
|
560
597
|
| `agentpack pack` | Generate a ranked context pack for one task |
|
|
598
|
+
| `agentpack route` | Route a task to files, rules, skills, commands, and safety warnings |
|
|
599
|
+
| `agentpack skills scan` | Print discovered local/global skills and rules |
|
|
600
|
+
| `agentpack skills index` | Write `.agentpack/skills_index.json` metadata for faster routing |
|
|
561
601
|
| `agentpack watch` | Keep the context pack fresh while you work |
|
|
562
602
|
| `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
|
|
563
603
|
| `agentpack explain` | Understand why a file was selected or omitted |
|
|
564
604
|
| `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
|
|
605
|
+
| `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
|
|
565
606
|
| `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
|
|
566
607
|
| `agentpack status` | Inspect current pack freshness and metadata |
|
|
567
608
|
| `agentpack diff` | Show what changed between context snapshots |
|
|
@@ -854,6 +895,32 @@ This keeps unrelated dirty files from consuming the whole context budget while p
|
|
|
854
895
|
|
|
855
896
|
---
|
|
856
897
|
|
|
898
|
+
### `agentpack route`
|
|
899
|
+
|
|
900
|
+
Route a task without writing context files. This is the CLI debug/admin surface for the same router used by MCP `route_task`.
|
|
901
|
+
|
|
902
|
+
```bash
|
|
903
|
+
agentpack route --task "fix flaky payment webhook test"
|
|
904
|
+
agentpack route --task "fix flaky payment webhook test" --format json
|
|
905
|
+
```
|
|
906
|
+
|
|
907
|
+
Output includes relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt. It uses the existing AgentPack file ranker in memory and does not write `.agentpack/context.md`.
|
|
908
|
+
|
|
909
|
+
---
|
|
910
|
+
|
|
911
|
+
### `agentpack skills`
|
|
912
|
+
|
|
913
|
+
Inspect or index installed skills and rule files.
|
|
914
|
+
|
|
915
|
+
```bash
|
|
916
|
+
agentpack skills scan
|
|
917
|
+
agentpack skills index
|
|
918
|
+
```
|
|
919
|
+
|
|
920
|
+
`scan` prints discovered artifacts. `index` writes `.agentpack/skills_index.json` with metadata only; raw skill and rule bodies are omitted from the index.
|
|
921
|
+
|
|
922
|
+
---
|
|
923
|
+
|
|
857
924
|
### `agentpack quickstart`
|
|
858
925
|
|
|
859
926
|
Show the shortest useful path for the current repo.
|
|
@@ -945,6 +1012,9 @@ Register in Claude Code settings (`~/.claude/settings.json`):
|
|
|
945
1012
|
|
|
946
1013
|
| Tool | Description |
|
|
947
1014
|
|---|---|
|
|
1015
|
+
| `route_task(task)` | Read-only task router. Returns relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt as JSON. |
|
|
1016
|
+
| `get_skills()` | Return discovered skill/rule inventory as JSON. |
|
|
1017
|
+
| `explain_route(task)` | Return route JSON with positive skill score reasons for debugging router choices. |
|
|
948
1018
|
| `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
|
|
949
1019
|
| `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
|
|
950
1020
|
| `get_context()` | Return the latest pack. If `.agentpack/task.md` or the repo snapshot differs from the packed metadata, it auto-refreshes before returning; otherwise it prepends a freshness header. |
|
|
@@ -1172,6 +1242,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
|
|
|
1172
1242
|
|
|
1173
1243
|
---
|
|
1174
1244
|
|
|
1245
|
+
### `agentpack eval`
|
|
1246
|
+
|
|
1247
|
+
Run deterministic failure evals. AgentPack does not run the coding agent and
|
|
1248
|
+
does not use an LLM judge; it verifies the current or replayed worktree with
|
|
1249
|
+
commands and diff policies.
|
|
1250
|
+
|
|
1251
|
+
```bash
|
|
1252
|
+
agentpack eval --init
|
|
1253
|
+
# edit .agentpack/evals.toml with real failures and checks
|
|
1254
|
+
agentpack eval
|
|
1255
|
+
agentpack eval --case auth-timeout --prove-targets
|
|
1256
|
+
agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
|
|
1257
|
+
agentpack eval --watch --until-pass
|
|
1258
|
+
agentpack eval --replay --prove-targets
|
|
1259
|
+
agentpack eval --variant baseline
|
|
1260
|
+
agentpack eval --variant agentpack
|
|
1261
|
+
agentpack eval --compare-variants baseline:agentpack
|
|
1262
|
+
agentpack eval --ci-template
|
|
1263
|
+
agentpack eval --report
|
|
1264
|
+
```
|
|
1265
|
+
|
|
1266
|
+
Example case:
|
|
1267
|
+
|
|
1268
|
+
```toml
|
|
1269
|
+
[[cases]]
|
|
1270
|
+
id = "auth-timeout"
|
|
1271
|
+
task = "fix auth token timeout"
|
|
1272
|
+
failure_class = "context"
|
|
1273
|
+
failure_source = "agent_failed"
|
|
1274
|
+
base_ref = "HEAD"
|
|
1275
|
+
patch_file = ".agentpack/evals/auth-timeout.patch"
|
|
1276
|
+
required_changed_files = ["src/auth/token.py"]
|
|
1277
|
+
forbidden_changed_files = ["src/db/**"]
|
|
1278
|
+
max_changed_files = 5
|
|
1279
|
+
max_changed_lines = 250
|
|
1280
|
+
agent = "codex"
|
|
1281
|
+
context_file = ".agentpack/context.md"
|
|
1282
|
+
context_hash = "..."
|
|
1283
|
+
selected_files = ["src/auth/token.py", "tests/test_auth.py"]
|
|
1284
|
+
|
|
1285
|
+
[[cases.checks]]
|
|
1286
|
+
name = "tests"
|
|
1287
|
+
command = "pytest tests/test_auth.py -q"
|
|
1288
|
+
timeout_s = 120
|
|
1289
|
+
retries = 1 # optional, marks pass-after-fail checks as flaky
|
|
1290
|
+
```
|
|
1291
|
+
|
|
1292
|
+
Use `eval` after an agent run: capture the real failure, add deterministic
|
|
1293
|
+
checks such as tests, typecheck, lint, schema validation, API contract tests,
|
|
1294
|
+
diff size, forbidden files, or golden outputs, then rerun until the harness
|
|
1295
|
+
passes. The model can propose; the harness must verify.
|
|
1296
|
+
|
|
1297
|
+
For hands-free local iteration, keep `agentpack eval --watch --until-pass`
|
|
1298
|
+
running in a terminal while the agent or developer edits. It reruns when the
|
|
1299
|
+
case file, patch artifacts, golden files, or git diff content changes and stops
|
|
1300
|
+
when all deterministic checks pass. `--capture` stores the current patch under
|
|
1301
|
+
`.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
|
|
1302
|
+
`base_ref` into an isolated git worktree, applies that patch, and runs the same
|
|
1303
|
+
deterministic checks there. To measure AgentPack's contribution, run the same
|
|
1304
|
+
case with `--variant baseline` and then with `--variant agentpack`;
|
|
1305
|
+
`--compare-variants baseline:agentpack` reports which cases improved, regressed,
|
|
1306
|
+
stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
|
|
1307
|
+
GitHub Actions workflow for `benchmarks/evals.toml`.
|
|
1308
|
+
|
|
1309
|
+
Eval files are executable trust boundaries: commands in `checks.command` run
|
|
1310
|
+
locally and in CI. Review eval TOML from contributors with the same care as
|
|
1311
|
+
shell scripts or workflow files.
|
|
1312
|
+
|
|
1313
|
+
Captured patch artifacts are secret-scanned with the same local redactor used
|
|
1314
|
+
for context packs before they are written. If a patch line contains a real
|
|
1315
|
+
secret, the artifact stores `[REDACTED:<type>]` and the case records
|
|
1316
|
+
`patch_redaction_warnings`. Secret-bearing patches may replay with redacted
|
|
1317
|
+
values; replace secrets with safe fixture values when exact replay matters.
|
|
1318
|
+
|
|
1319
|
+
---
|
|
1320
|
+
|
|
1175
1321
|
### `agentpack status`
|
|
1176
1322
|
|
|
1177
1323
|
Check whether the context pack is stale.
|
|
@@ -6,6 +6,7 @@ from agentpack.commands import (
|
|
|
6
6
|
claude_cmd,
|
|
7
7
|
diff,
|
|
8
8
|
doctor,
|
|
9
|
+
eval_cmd,
|
|
9
10
|
explain,
|
|
10
11
|
guard,
|
|
11
12
|
hook_cmd,
|
|
@@ -18,7 +19,9 @@ from agentpack.commands import (
|
|
|
18
19
|
pack,
|
|
19
20
|
quickstart,
|
|
20
21
|
repair,
|
|
22
|
+
route,
|
|
21
23
|
scan,
|
|
24
|
+
skills,
|
|
22
25
|
stats,
|
|
23
26
|
status,
|
|
24
27
|
summarize,
|
|
@@ -55,11 +58,13 @@ for mod in [
|
|
|
55
58
|
pack,
|
|
56
59
|
install,
|
|
57
60
|
repair,
|
|
61
|
+
route,
|
|
58
62
|
migrate,
|
|
59
63
|
monitor,
|
|
60
64
|
explain,
|
|
61
65
|
guard,
|
|
62
66
|
doctor,
|
|
67
|
+
eval_cmd,
|
|
63
68
|
tune,
|
|
64
69
|
watch,
|
|
65
70
|
claude_cmd,
|
|
@@ -67,6 +72,7 @@ for mod in [
|
|
|
67
72
|
mcp_cmd,
|
|
68
73
|
hook_cmd,
|
|
69
74
|
quickstart,
|
|
75
|
+
skills,
|
|
70
76
|
]:
|
|
71
77
|
mod.register(app)
|
|
72
78
|
|