agentpack-cli 0.3.9__tar.gz → 0.3.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/PKG-INFO +148 -2
  2. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/README.md +147 -1
  3. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/pyproject.toml +1 -1
  4. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/__init__.py +1 -1
  5. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/cli.py +6 -0
  6. agentpack_cli-0.3.11/src/agentpack/commands/eval_cmd.py +264 -0
  7. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/mcp_cmd.py +1 -1
  8. agentpack_cli-0.3.11/src/agentpack/commands/route.py +29 -0
  9. agentpack_cli-0.3.11/src/agentpack/commands/skills.py +46 -0
  10. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/tune.py +24 -0
  11. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/config.py +20 -0
  12. agentpack_cli-0.3.11/src/agentpack/core/evals.py +939 -0
  13. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/antigravity.py +4 -3
  14. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/claude.py +3 -2
  15. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/codex.py +4 -3
  16. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/cursor.py +8 -6
  17. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/mcp_server.py +46 -0
  18. agentpack_cli-0.3.11/src/agentpack/router/__init__.py +23 -0
  19. agentpack_cli-0.3.11/src/agentpack/router/discovery.py +106 -0
  20. agentpack_cli-0.3.11/src/agentpack/router/models.py +67 -0
  21. agentpack_cli-0.3.11/src/agentpack/router/parser.py +240 -0
  22. agentpack_cli-0.3.11/src/agentpack/router/prompt_builder.py +87 -0
  23. agentpack_cli-0.3.11/src/agentpack/router/scoring.py +110 -0
  24. agentpack_cli-0.3.11/src/agentpack/router/service.py +156 -0
  25. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/.gitignore +0 -0
  26. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/LICENSE +0 -0
  27. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/__init__.py +0 -0
  28. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/antigravity.py +0 -0
  29. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/base.py +0 -0
  30. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/claude.py +0 -0
  31. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/codex.py +0 -0
  32. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/cursor.py +0 -0
  33. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/detect.py +0 -0
  34. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/generic.py +0 -0
  35. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/adapters/windsurf.py +0 -0
  36. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/__init__.py +0 -0
  37. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/dependency_graph.py +0 -0
  38. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/go_imports.py +0 -0
  39. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/java_imports.py +0 -0
  40. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/js_ts_imports.py +0 -0
  41. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/monorepo.py +0 -0
  42. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/naming_signals.py +0 -0
  43. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/python_imports.py +0 -0
  44. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/ranking.py +0 -0
  45. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/repo_map.py +0 -0
  46. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/role_inference.py +0 -0
  47. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/rust_imports.py +0 -0
  48. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/symbols.py +0 -0
  49. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/task_classifier.py +0 -0
  50. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/analysis/tests.py +0 -0
  51. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/application/__init__.py +0 -0
  52. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/application/pack_service.py +0 -0
  53. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/__init__.py +0 -0
  54. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/_shared.py +0 -0
  55. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/benchmark.py +0 -0
  56. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/claude_cmd.py +0 -0
  57. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/diff.py +0 -0
  58. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/doctor.py +0 -0
  59. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/explain.py +0 -0
  60. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/guard.py +0 -0
  61. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/hook_cmd.py +0 -0
  62. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/ignore_cmd.py +0 -0
  63. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/init.py +0 -0
  64. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/install.py +0 -0
  65. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/migrate.py +0 -0
  66. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/monitor.py +0 -0
  67. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/pack.py +0 -0
  68. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/quickstart.py +0 -0
  69. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/repair.py +0 -0
  70. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/scan.py +0 -0
  71. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/stats.py +0 -0
  72. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/status.py +0 -0
  73. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/summarize.py +0 -0
  74. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/commands/watch.py +0 -0
  75. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/__init__.py +0 -0
  76. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/bootstrap.py +0 -0
  77. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/cache.py +0 -0
  78. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/context_pack.py +0 -0
  79. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/diff.py +0 -0
  80. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/git.py +0 -0
  81. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/git_hooks.py +0 -0
  82. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/global_install.py +0 -0
  83. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/ignore.py +0 -0
  84. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/merkle.py +0 -0
  85. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/models.py +0 -0
  86. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/redactor.py +0 -0
  87. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/scanner.py +0 -0
  88. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/snapshot.py +0 -0
  89. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/task_freshness.py +0 -0
  90. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/token_estimator.py +0 -0
  91. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/core/vscode_tasks.py +0 -0
  92. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/data/agentpack.md +0 -0
  93. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/__init__.py +0 -0
  94. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/installers/windsurf.py +0 -0
  95. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/__init__.py +0 -0
  96. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/agents.py +0 -0
  97. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/git_hooks.py +0 -0
  98. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/global_install.py +0 -0
  99. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/platform.py +0 -0
  100. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/integrations/vscode_tasks.py +0 -0
  101. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/__init__.py +0 -0
  102. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/compact.py +0 -0
  103. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/markdown.py +0 -0
  104. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/renderers/receipts.py +0 -0
  105. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/session/__init__.py +0 -0
  106. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/session/state.py +0 -0
  107. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/__init__.py +0 -0
  108. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/base.py +0 -0
  109. {agentpack_cli-0.3.9 → agentpack_cli-0.3.11}/src/agentpack/summaries/offline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentpack-cli
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: Local context engine for AI coding agents that ranks relevant files and builds task-focused context packs.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -40,13 +40,14 @@ Description-Content-Type: text/markdown
40
40
  # AgentPack
41
41
 
42
42
  [![PyPI version](https://img.shields.io/pypi/v/agentpack-cli.svg)](https://pypi.org/project/agentpack-cli/)
43
+ [![PyPI Downloads](https://static.pepy.tech/personalized-badge/agentpack-cli?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/agentpack-cli)
43
44
  [![npm version](https://img.shields.io/npm/v/@vishal2612200/agentpack.svg)](https://www.npmjs.com/package/@vishal2612200/agentpack)
44
45
  [![npm downloads](https://img.shields.io/npm/dm/@vishal2612200/agentpack.svg)](https://www.npmjs.com/package/@vishal2612200/agentpack)
45
46
  [![Python versions](https://img.shields.io/pypi/pyversions/agentpack-cli.svg)](https://pypi.org/project/agentpack-cli/)
46
47
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
47
48
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
48
49
 
49
- > **Status: alpha (v0.3.9).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
50
+ > **Status: alpha (v0.3.11).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
50
51
  >
51
52
  > **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
52
53
 
@@ -64,6 +65,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
64
65
  - [Quality Bar](#quality-bar)
65
66
  - [Download Stats](#download-stats)
66
67
  - [Debugging Selection](#debugging-selection)
68
+ - [Task Router](#task-router)
67
69
  - [Supported Integrations](#supported-integrations)
68
70
  - [Commands](#commands)
69
71
  - [Architecture](#architecture)
@@ -78,6 +80,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
78
80
  - **Local code intelligence**: extracts roles, domains, entrypoints, definitions, dependencies, env reads, side effects, and external systems using static analysis.
79
81
  - **Semantic repo map**: adds a compact module-level map before file context so agents orient faster.
80
82
  - **Freshness and deltas**: records task source, git state, snapshot hashes, selected-file deltas, stale-context warnings, MCP auto-refresh signals, and a machine-readable `agentpack:freshness` block in markdown fallback artifacts.
83
+ - **Task router**: MCP and CLI surfaces route a task to relevant files, scoped rules, installed skills, suggested commands, and safety warnings without executing skills automatically.
81
84
  - **Agent integrations**: installs Claude Code, Cursor, Windsurf, Codex, Antigravity, VS Code tasks, git hooks, and MCP configuration.
82
85
  - **Local and measurable**: no API calls for scan, summarize, rank, pack, stats, or benchmark; quality is measured with expected-file evals.
83
86
 
@@ -291,6 +294,40 @@ agentpack guard --agent auto --repair-stale --refresh-context
291
294
 
292
295
  `guard` checks pack freshness, task freshness, repo snapshot freshness, and installed agent rules/hooks. With `--repair-stale --refresh-context`, it repairs stale AgentPack rule files and refreshes missing or stale context before returning success. `agentpack pack` also self-heals stale AgentPack rule blocks for the active agent, so older installs that still run `pack` get upgraded opportunistically.
293
296
 
297
+ ## Task Router
298
+
299
+ AgentPack Router is the MCP-first path for agents that need a task map before loading full context. It returns:
300
+
301
+ - files to read first
302
+ - repo and tool rules to apply
303
+ - installed skills to consider
304
+ - commands to consider, never execute automatically
305
+ - safety warnings for external side-effect skills
306
+ - an agent-ready prompt block
307
+
308
+ Use MCP when available:
309
+
310
+ ```text
311
+ route_task("fix flaky payment webhook test")
312
+ ```
313
+
314
+ Use CLI for inspection or scripting:
315
+
316
+ ```bash
317
+ agentpack skills scan
318
+ agentpack skills index
319
+ agentpack route --task "fix flaky payment webhook test"
320
+ agentpack route --task "fix flaky payment webhook test" --format json
321
+ ```
322
+
323
+ Router reads skills and rules from `.claude/skills/`, `~/.claude/skills/`, `~/.codex/skills/`, `~/.agents/skills/`, `.agentpack/skills/`, `.cursor/rules/`, `AGENTS.md`, `CLAUDE.md`, and `GEMINI.md`. Rules are mandatory scoped instructions; skills are optional recommendations. The local `.agentpack/skills_index.json` stores metadata only and omits raw skill/rule bodies.
324
+
325
+ Safety defaults:
326
+
327
+ - skills are recommended, not executed
328
+ - suggested commands are returned as strings with reasons
329
+ - external side-effect skills, such as deploy or cloud mutation checklists, are warned and not selected unless explicitly allowed in config
330
+
294
331
  ## Before / After Agent Behavior
295
332
 
296
333
  Without AgentPack:
@@ -597,10 +634,14 @@ Command map:
597
634
  | `agentpack install` | Refresh or add an agent integration without changing project state |
598
635
  | `agentpack repair` | Restore missing or drifted integration files |
599
636
  | `agentpack pack` | Generate a ranked context pack for one task |
637
+ | `agentpack route` | Route a task to files, rules, skills, commands, and safety warnings |
638
+ | `agentpack skills scan` | Print discovered local/global skills and rules |
639
+ | `agentpack skills index` | Write `.agentpack/skills_index.json` metadata for faster routing |
600
640
  | `agentpack watch` | Keep the context pack fresh while you work |
601
641
  | `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
602
642
  | `agentpack explain` | Understand why a file was selected or omitted |
603
643
  | `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
644
+ | `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
604
645
  | `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
605
646
  | `agentpack status` | Inspect current pack freshness and metadata |
606
647
  | `agentpack diff` | Show what changed between context snapshots |
@@ -893,6 +934,32 @@ This keeps unrelated dirty files from consuming the whole context budget while p
893
934
 
894
935
  ---
895
936
 
937
+ ### `agentpack route`
938
+
939
+ Route a task without writing context files. This is the CLI debug/admin surface for the same router used by MCP `route_task`.
940
+
941
+ ```bash
942
+ agentpack route --task "fix flaky payment webhook test"
943
+ agentpack route --task "fix flaky payment webhook test" --format json
944
+ ```
945
+
946
+ Output includes relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt. It uses the existing AgentPack file ranker in memory and does not write `.agentpack/context.md`.
947
+
948
+ ---
949
+
950
+ ### `agentpack skills`
951
+
952
+ Inspect or index installed skills and rule files.
953
+
954
+ ```bash
955
+ agentpack skills scan
956
+ agentpack skills index
957
+ ```
958
+
959
+ `scan` prints discovered artifacts. `index` writes `.agentpack/skills_index.json` with metadata only; raw skill and rule bodies are omitted from the index.
960
+
961
+ ---
962
+
896
963
  ### `agentpack quickstart`
897
964
 
898
965
  Show the shortest useful path for the current repo.
@@ -984,6 +1051,9 @@ Register in Claude Code settings (`~/.claude/settings.json`):
984
1051
 
985
1052
  | Tool | Description |
986
1053
  |---|---|
1054
+ | `route_task(task)` | Read-only task router. Returns relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt as JSON. |
1055
+ | `get_skills()` | Return discovered skill/rule inventory as JSON. |
1056
+ | `explain_route(task)` | Return route JSON with positive skill score reasons for debugging router choices. |
987
1057
  | `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
988
1058
  | `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
989
1059
  | `get_context()` | Return the latest pack. If `.agentpack/task.md` or the repo snapshot differs from the packed metadata, it auto-refreshes before returning; otherwise it prepends a freshness header. |
@@ -1211,6 +1281,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
1211
1281
 
1212
1282
  ---
1213
1283
 
1284
+ ### `agentpack eval`
1285
+
1286
+ Run deterministic failure evals. AgentPack does not run the coding agent and
1287
+ does not use an LLM judge; it verifies the current or replayed worktree with
1288
+ commands and diff policies.
1289
+
1290
+ ```bash
1291
+ agentpack eval --init
1292
+ # edit .agentpack/evals.toml with real failures and checks
1293
+ agentpack eval
1294
+ agentpack eval --case auth-timeout --prove-targets
1295
+ agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
1296
+ agentpack eval --watch --until-pass
1297
+ agentpack eval --replay --prove-targets
1298
+ agentpack eval --variant baseline
1299
+ agentpack eval --variant agentpack
1300
+ agentpack eval --compare-variants baseline:agentpack
1301
+ agentpack eval --ci-template
1302
+ agentpack eval --report
1303
+ ```
1304
+
1305
+ Example case:
1306
+
1307
+ ```toml
1308
+ [[cases]]
1309
+ id = "auth-timeout"
1310
+ task = "fix auth token timeout"
1311
+ failure_class = "context"
1312
+ failure_source = "agent_failed"
1313
+ base_ref = "HEAD"
1314
+ patch_file = ".agentpack/evals/auth-timeout.patch"
1315
+ required_changed_files = ["src/auth/token.py"]
1316
+ forbidden_changed_files = ["src/db/**"]
1317
+ max_changed_files = 5
1318
+ max_changed_lines = 250
1319
+ agent = "codex"
1320
+ context_file = ".agentpack/context.md"
1321
+ context_hash = "..."
1322
+ selected_files = ["src/auth/token.py", "tests/test_auth.py"]
1323
+
1324
+ [[cases.checks]]
1325
+ name = "tests"
1326
+ command = "pytest tests/test_auth.py -q"
1327
+ timeout_s = 120
1328
+ retries = 1 # optional, marks pass-after-fail checks as flaky
1329
+ ```
1330
+
1331
+ Use `eval` after an agent run: capture the real failure, add deterministic
1332
+ checks such as tests, typecheck, lint, schema validation, API contract tests,
1333
+ diff size, forbidden files, or golden outputs, then rerun until the harness
1334
+ passes. The model can propose; the harness must verify.
1335
+
1336
+ For hands-free local iteration, keep `agentpack eval --watch --until-pass`
1337
+ running in a terminal while the agent or developer edits. It reruns when the
1338
+ case file, patch artifacts, golden files, or git diff content changes and stops
1339
+ when all deterministic checks pass. `--capture` stores the current patch under
1340
+ `.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
1341
+ `base_ref` into an isolated git worktree, applies that patch, and runs the same
1342
+ deterministic checks there. To measure AgentPack's contribution, run the same
1343
+ case with `--variant baseline` and then with `--variant agentpack`;
1344
+ `--compare-variants baseline:agentpack` reports which cases improved, regressed,
1345
+ stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
1346
+ GitHub Actions workflow for `benchmarks/evals.toml`.
1347
+
1348
+ Eval files are executable trust boundaries: commands in `checks.command` run
1349
+ locally and in CI. Review eval TOML from contributors with the same care as
1350
+ shell scripts or workflow files.
1351
+
1352
+ Captured patch artifacts are secret-scanned with the same local redactor used
1353
+ for context packs before they are written. If a patch line contains a real
1354
+ secret, the artifact stores `[REDACTED:<type>]` and the case records
1355
+ `patch_redaction_warnings`. Secret-bearing patches may replay with redacted
1356
+ values; replace secrets with safe fixture values when exact replay matters.
1357
+
1358
+ ---
1359
+
1214
1360
  ### `agentpack status`
1215
1361
 
1216
1362
  Check whether the context pack is stale.
@@ -1,13 +1,14 @@
1
1
  # AgentPack
2
2
 
3
3
  [![PyPI version](https://img.shields.io/pypi/v/agentpack-cli.svg)](https://pypi.org/project/agentpack-cli/)
4
+ [![PyPI Downloads](https://static.pepy.tech/personalized-badge/agentpack-cli?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/agentpack-cli)
4
5
  [![npm version](https://img.shields.io/npm/v/@vishal2612200/agentpack.svg)](https://www.npmjs.com/package/@vishal2612200/agentpack)
5
6
  [![npm downloads](https://img.shields.io/npm/dm/@vishal2612200/agentpack.svg)](https://www.npmjs.com/package/@vishal2612200/agentpack)
6
7
  [![Python versions](https://img.shields.io/pypi/pyversions/agentpack-cli.svg)](https://pypi.org/project/agentpack-cli/)
7
8
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
9
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
9
10
 
10
- > **Status: alpha (v0.3.9).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
11
+ > **Status: alpha (v0.3.11).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Public benchmark proof exists for the current suite, but broader repo coverage is still growing. API may change before 1.0.
11
12
  >
12
13
  > **Platform note:** macOS, Linux, and Windows are supported. Windows support targets PowerShell plus Git for Windows. `cmd.exe` and bare Git setups are not a supported path yet.
13
14
 
@@ -25,6 +26,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
25
26
  - [Quality Bar](#quality-bar)
26
27
  - [Download Stats](#download-stats)
27
28
  - [Debugging Selection](#debugging-selection)
29
+ - [Task Router](#task-router)
28
30
  - [Supported Integrations](#supported-integrations)
29
31
  - [Commands](#commands)
30
32
  - [Architecture](#architecture)
@@ -39,6 +41,7 @@ Use AgentPack when a repo is too large to paste and you want faster, more consis
39
41
  - **Local code intelligence**: extracts roles, domains, entrypoints, definitions, dependencies, env reads, side effects, and external systems using static analysis.
40
42
  - **Semantic repo map**: adds a compact module-level map before file context so agents orient faster.
41
43
  - **Freshness and deltas**: records task source, git state, snapshot hashes, selected-file deltas, stale-context warnings, MCP auto-refresh signals, and a machine-readable `agentpack:freshness` block in markdown fallback artifacts.
44
+ - **Task router**: MCP and CLI surfaces route a task to relevant files, scoped rules, installed skills, suggested commands, and safety warnings without executing skills automatically.
42
45
  - **Agent integrations**: installs Claude Code, Cursor, Windsurf, Codex, Antigravity, VS Code tasks, git hooks, and MCP configuration.
43
46
  - **Local and measurable**: no API calls for scan, summarize, rank, pack, stats, or benchmark; quality is measured with expected-file evals.
44
47
 
@@ -252,6 +255,40 @@ agentpack guard --agent auto --repair-stale --refresh-context
252
255
 
253
256
  `guard` checks pack freshness, task freshness, repo snapshot freshness, and installed agent rules/hooks. With `--repair-stale --refresh-context`, it repairs stale AgentPack rule files and refreshes missing or stale context before returning success. `agentpack pack` also self-heals stale AgentPack rule blocks for the active agent, so older installs that still run `pack` get upgraded opportunistically.
254
257
 
258
+ ## Task Router
259
+
260
+ AgentPack Router is the MCP-first path for agents that need a task map before loading full context. It returns:
261
+
262
+ - files to read first
263
+ - repo and tool rules to apply
264
+ - installed skills to consider
265
+ - commands to consider, never execute automatically
266
+ - safety warnings for external side-effect skills
267
+ - an agent-ready prompt block
268
+
269
+ Use MCP when available:
270
+
271
+ ```text
272
+ route_task("fix flaky payment webhook test")
273
+ ```
274
+
275
+ Use CLI for inspection or scripting:
276
+
277
+ ```bash
278
+ agentpack skills scan
279
+ agentpack skills index
280
+ agentpack route --task "fix flaky payment webhook test"
281
+ agentpack route --task "fix flaky payment webhook test" --format json
282
+ ```
283
+
284
+ Router reads skills and rules from `.claude/skills/`, `~/.claude/skills/`, `~/.codex/skills/`, `~/.agents/skills/`, `.agentpack/skills/`, `.cursor/rules/`, `AGENTS.md`, `CLAUDE.md`, and `GEMINI.md`. Rules are mandatory scoped instructions; skills are optional recommendations. The local `.agentpack/skills_index.json` stores metadata only and omits raw skill/rule bodies.
285
+
286
+ Safety defaults:
287
+
288
+ - skills are recommended, not executed
289
+ - suggested commands are returned as strings with reasons
290
+ - external side-effect skills, such as deploy or cloud mutation checklists, are warned and not selected unless explicitly allowed in config
291
+
255
292
  ## Before / After Agent Behavior
256
293
 
257
294
  Without AgentPack:
@@ -558,10 +595,14 @@ Command map:
558
595
  | `agentpack install` | Refresh or add an agent integration without changing project state |
559
596
  | `agentpack repair` | Restore missing or drifted integration files |
560
597
  | `agentpack pack` | Generate a ranked context pack for one task |
598
+ | `agentpack route` | Route a task to files, rules, skills, commands, and safety warnings |
599
+ | `agentpack skills scan` | Print discovered local/global skills and rules |
600
+ | `agentpack skills index` | Write `.agentpack/skills_index.json` metadata for faster routing |
561
601
  | `agentpack watch` | Keep the context pack fresh while you work |
562
602
  | `agentpack doctor` | Audit hooks, agent files, CLI path, and repo health |
563
603
  | `agentpack explain` | Understand why a file was selected or omitted |
564
604
  | `agentpack benchmark` | Measure recall, precision, and misses against real tasks |
605
+ | `agentpack eval` | Run deterministic failure evals with tests, diff limits, and taxonomy labels |
565
606
  | `agentpack tune` | Suggest fixes from recent pack metrics and benchmark misses |
566
607
  | `agentpack status` | Inspect current pack freshness and metadata |
567
608
  | `agentpack diff` | Show what changed between context snapshots |
@@ -854,6 +895,32 @@ This keeps unrelated dirty files from consuming the whole context budget while p
854
895
 
855
896
  ---
856
897
 
898
+ ### `agentpack route`
899
+
900
+ Route a task without writing context files. This is the CLI debug/admin surface for the same router used by MCP `route_task`.
901
+
902
+ ```bash
903
+ agentpack route --task "fix flaky payment webhook test"
904
+ agentpack route --task "fix flaky payment webhook test" --format json
905
+ ```
906
+
907
+ Output includes relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt. It uses the existing AgentPack file ranker in memory and does not write `.agentpack/context.md`.
908
+
909
+ ---
910
+
911
+ ### `agentpack skills`
912
+
913
+ Inspect or index installed skills and rule files.
914
+
915
+ ```bash
916
+ agentpack skills scan
917
+ agentpack skills index
918
+ ```
919
+
920
+ `scan` prints discovered artifacts. `index` writes `.agentpack/skills_index.json` with metadata only; raw skill and rule bodies are omitted from the index.
921
+
922
+ ---
923
+
857
924
  ### `agentpack quickstart`
858
925
 
859
926
  Show the shortest useful path for the current repo.
@@ -945,6 +1012,9 @@ Register in Claude Code settings (`~/.claude/settings.json`):
945
1012
 
946
1013
  | Tool | Description |
947
1014
  |---|---|
1015
+ | `route_task(task)` | Read-only task router. Returns relevant files, applied rules, recommended skills, suggested commands, safety warnings, and an agent prompt as JSON. |
1016
+ | `get_skills()` | Return discovered skill/rule inventory as JSON. |
1017
+ | `explain_route(task)` | Return route JSON with positive skill score reasons for debugging router choices. |
948
1018
  | `start_task(task, mode, budget, max_tokens)` | Recommended MCP-first entry point. Writes `.agentpack/task.md`, generates a ranked pack, and returns packed markdown. |
949
1019
  | `pack_context(task, mode, budget, max_tokens)` | Generate a ranked context pack. If `task` is provided, writes it to `.agentpack/task.md`; if omitted, reads `task.md` or infers from git. |
950
1020
  | `get_context()` | Return the latest pack. If `.agentpack/task.md` or the repo snapshot differs from the packed metadata, it auto-refreshes before returning; otherwise it prepends a freshness header. |
@@ -1172,6 +1242,82 @@ This command does not pretend a pack is correct. It gives the next thing to insp
1172
1242
 
1173
1243
  ---
1174
1244
 
1245
+ ### `agentpack eval`
1246
+
1247
+ Run deterministic failure evals. AgentPack does not run the coding agent and
1248
+ does not use an LLM judge; it verifies the current or replayed worktree with
1249
+ commands and diff policies.
1250
+
1251
+ ```bash
1252
+ agentpack eval --init
1253
+ # edit .agentpack/evals.toml with real failures and checks
1254
+ agentpack eval
1255
+ agentpack eval --case auth-timeout --prove-targets
1256
+ agentpack eval --capture auth-timeout --failure-class context --check "pytest tests/test_auth.py -q"
1257
+ agentpack eval --watch --until-pass
1258
+ agentpack eval --replay --prove-targets
1259
+ agentpack eval --variant baseline
1260
+ agentpack eval --variant agentpack
1261
+ agentpack eval --compare-variants baseline:agentpack
1262
+ agentpack eval --ci-template
1263
+ agentpack eval --report
1264
+ ```
1265
+
1266
+ Example case:
1267
+
1268
+ ```toml
1269
+ [[cases]]
1270
+ id = "auth-timeout"
1271
+ task = "fix auth token timeout"
1272
+ failure_class = "context"
1273
+ failure_source = "agent_failed"
1274
+ base_ref = "HEAD"
1275
+ patch_file = ".agentpack/evals/auth-timeout.patch"
1276
+ required_changed_files = ["src/auth/token.py"]
1277
+ forbidden_changed_files = ["src/db/**"]
1278
+ max_changed_files = 5
1279
+ max_changed_lines = 250
1280
+ agent = "codex"
1281
+ context_file = ".agentpack/context.md"
1282
+ context_hash = "..."
1283
+ selected_files = ["src/auth/token.py", "tests/test_auth.py"]
1284
+
1285
+ [[cases.checks]]
1286
+ name = "tests"
1287
+ command = "pytest tests/test_auth.py -q"
1288
+ timeout_s = 120
1289
+ retries = 1 # optional, marks pass-after-fail checks as flaky
1290
+ ```
1291
+
1292
+ Use `eval` after an agent run: capture the real failure, add deterministic
1293
+ checks such as tests, typecheck, lint, schema validation, API contract tests,
1294
+ diff size, forbidden files, or golden outputs, then rerun until the harness
1295
+ passes. The model can propose; the harness must verify.
1296
+
1297
+ For hands-free local iteration, keep `agentpack eval --watch --until-pass`
1298
+ running in a terminal while the agent or developer edits. It reruns when the
1299
+ case file, patch artifacts, golden files, or git diff content changes and stops
1300
+ when all deterministic checks pass. `--capture` stores the current patch under
1301
+ `.agentpack/evals/<case-id>.patch` plus context metadata; `--replay` checks out
1302
+ `base_ref` into an isolated git worktree, applies that patch, and runs the same
1303
+ deterministic checks there. To measure AgentPack's contribution, run the same
1304
+ case with `--variant baseline` and then with `--variant agentpack`;
1305
+ `--compare-variants baseline:agentpack` reports which cases improved, regressed,
1306
+ stayed unchanged, or still need both sides. Use `--ci-template` to scaffold a
1307
+ GitHub Actions workflow for `benchmarks/evals.toml`.
1308
+
1309
+ Eval files are executable trust boundaries: commands in `checks.command` run
1310
+ locally and in CI. Review eval TOML from contributors with the same care as
1311
+ shell scripts or workflow files.
1312
+
1313
+ Captured patch artifacts are secret-scanned with the same local redactor used
1314
+ for context packs before they are written. If a patch line contains a real
1315
+ secret, the artifact stores `[REDACTED:<type>]` and the case records
1316
+ `patch_redaction_warnings`. Secret-bearing patches may replay with redacted
1317
+ values; replace secrets with safe fixture values when exact replay matters.
1318
+
1319
+ ---
1320
+
1175
1321
  ### `agentpack status`
1176
1322
 
1177
1323
  Check whether the context pack is stale.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "agentpack-cli"
3
- version = "0.3.9"
3
+ version = "0.3.11"
4
4
  description = "Local context engine for AI coding agents that ranks relevant files and builds task-focused context packs."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,3 +1,3 @@
1
1
  """AgentPack — task-aware context packing for AI coding agents."""
2
2
 
3
- __version__ = "0.3.9"
3
+ __version__ = "0.3.11"
@@ -6,6 +6,7 @@ from agentpack.commands import (
6
6
  claude_cmd,
7
7
  diff,
8
8
  doctor,
9
+ eval_cmd,
9
10
  explain,
10
11
  guard,
11
12
  hook_cmd,
@@ -18,7 +19,9 @@ from agentpack.commands import (
18
19
  pack,
19
20
  quickstart,
20
21
  repair,
22
+ route,
21
23
  scan,
24
+ skills,
22
25
  stats,
23
26
  status,
24
27
  summarize,
@@ -55,11 +58,13 @@ for mod in [
55
58
  pack,
56
59
  install,
57
60
  repair,
61
+ route,
58
62
  migrate,
59
63
  monitor,
60
64
  explain,
61
65
  guard,
62
66
  doctor,
67
+ eval_cmd,
63
68
  tune,
64
69
  watch,
65
70
  claude_cmd,
@@ -67,6 +72,7 @@ for mod in [
67
72
  mcp_cmd,
68
73
  hook_cmd,
69
74
  quickstart,
75
+ skills,
70
76
  ]:
71
77
  mod.register(app)
72
78