agentpack-cli 0.1.22__tar.gz → 0.1.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/PKG-INFO +21 -3
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/README.md +20 -2
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/pyproject.toml +1 -1
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/__init__.py +1 -1
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/ranking.py +27 -3
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/application/pack_service.py +121 -10
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/benchmark.py +108 -19
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/pack.py +11 -5
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/context_pack.py +14 -1
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/git.py +32 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/models.py +4 -1
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/mcp_server.py +33 -1
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/renderers/markdown.py +20 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/.gitignore +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/LICENSE +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/dependency_graph.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/symbols.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/cli.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/doctor.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/explain.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/install.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/mcp_cmd.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/quickstart.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/stats.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/cache.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/config.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/antigravity.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/claude.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/codex.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/cursor.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/integrations/global_install.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/summaries/base.py +0 -0
- {agentpack_cli-0.1.22 → agentpack_cli-0.1.24}/src/agentpack/summaries/offline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.24
|
|
4
4
|
Summary: Task-aware context packing for AI coding agents — Claude, Cursor, Windsurf, Codex, and Antigravity
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
[](https://opensource.org/licenses/MIT)
|
|
45
45
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
46
46
|
|
|
47
|
-
> **Status: alpha (v0.1.
|
|
47
|
+
> **Status: alpha (v0.1.24).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
48
48
|
>
|
|
49
49
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
50
50
|
|
|
@@ -278,6 +278,17 @@ Requires Python 3.10+.
|
|
|
278
278
|
|
|
279
279
|
> **PyPI note:** The package is `agentpack-cli` (the name `agentpack` was already taken). The CLI command is still `agentpack`.
|
|
280
280
|
|
|
281
|
+
### npm wrapper
|
|
282
|
+
|
|
283
|
+
AgentPack can also be installed from npm:
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
npm install -g @vishal2612200/agentpack
|
|
287
|
+
agentpack --version
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
The npm package is a thin Node.js wrapper around the Python CLI. It requires Node.js 18+ and Python 3.10+, then installs the matching `agentpack-cli` PyPI package into a per-version virtual environment on first run. This keeps the implementation single-source while giving JavaScript-heavy teams a familiar install path.
|
|
291
|
+
|
|
281
292
|
---
|
|
282
293
|
|
|
283
294
|
## Start Once, Then Work Normally
|
|
@@ -885,6 +896,7 @@ Mode comparison: fix auth token expiry
|
|
|
885
896
|
[[cases]]
|
|
886
897
|
task = "fix auth token expiry"
|
|
887
898
|
mode = "balanced"
|
|
899
|
+
task_type = "backend-api"
|
|
888
900
|
expected_files = [
|
|
889
901
|
"src/auth/token.py",
|
|
890
902
|
"src/auth/session.py",
|
|
@@ -898,6 +910,8 @@ expected_files = [
|
|
|
898
910
|
|
|
899
911
|
Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
|
|
900
912
|
|
|
913
|
+
Add `task_type` to group results by workflow area. Benchmark summaries report average precision, recall, F1, and token noise by type, so a repo can show "backend-api is good, frontend-web is noisy" instead of hiding that under one aggregate.
|
|
914
|
+
|
|
901
915
|
---
|
|
902
916
|
|
|
903
917
|
### `agentpack scan`
|
|
@@ -938,7 +952,7 @@ agentpack benchmark --compare --misses
|
|
|
938
952
|
|
|
939
953
|
`--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
|
|
940
954
|
|
|
941
|
-
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
955
|
+
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, `task_type` labels, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
942
956
|
|
|
943
957
|
---
|
|
944
958
|
|
|
@@ -1566,7 +1580,10 @@ Useful checks before opening a PR:
|
|
|
1566
1580
|
|
|
1567
1581
|
```bash
|
|
1568
1582
|
pytest
|
|
1583
|
+
python -m ruff check src tests
|
|
1569
1584
|
python -m build
|
|
1585
|
+
npm test --prefix npm
|
|
1586
|
+
(cd npm && npm pack --dry-run)
|
|
1570
1587
|
agentpack benchmark --sample-fixtures --misses
|
|
1571
1588
|
```
|
|
1572
1589
|
|
|
@@ -1577,6 +1594,7 @@ Good contribution areas:
|
|
|
1577
1594
|
- Better symbol extraction for Go, Rust, Java, and Kotlin
|
|
1578
1595
|
- More precise import/dependency resolution for framework-heavy repos
|
|
1579
1596
|
- Ranking regressions with `expected_files` cases that reproduce misses
|
|
1597
|
+
- npm wrapper improvements that preserve the Python CLI as the source of truth
|
|
1580
1598
|
|
|
1581
1599
|
Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
|
|
1582
1600
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
> **Status: alpha (v0.1.
|
|
8
|
+
> **Status: alpha (v0.1.24).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
9
9
|
>
|
|
10
10
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
11
11
|
|
|
@@ -239,6 +239,17 @@ Requires Python 3.10+.
|
|
|
239
239
|
|
|
240
240
|
> **PyPI note:** The package is `agentpack-cli` (the name `agentpack` was already taken). The CLI command is still `agentpack`.
|
|
241
241
|
|
|
242
|
+
### npm wrapper
|
|
243
|
+
|
|
244
|
+
AgentPack can also be installed from npm:
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
npm install -g @vishal2612200/agentpack
|
|
248
|
+
agentpack --version
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
The npm package is a thin Node.js wrapper around the Python CLI. It requires Node.js 18+ and Python 3.10+, then installs the matching `agentpack-cli` PyPI package into a per-version virtual environment on first run. This keeps the implementation single-source while giving JavaScript-heavy teams a familiar install path.
|
|
252
|
+
|
|
242
253
|
---
|
|
243
254
|
|
|
244
255
|
## Start Once, Then Work Normally
|
|
@@ -846,6 +857,7 @@ Mode comparison: fix auth token expiry
|
|
|
846
857
|
[[cases]]
|
|
847
858
|
task = "fix auth token expiry"
|
|
848
859
|
mode = "balanced"
|
|
860
|
+
task_type = "backend-api"
|
|
849
861
|
expected_files = [
|
|
850
862
|
"src/auth/token.py",
|
|
851
863
|
"src/auth/session.py",
|
|
@@ -859,6 +871,8 @@ expected_files = [
|
|
|
859
871
|
|
|
860
872
|
Use `--misses` when recall is low. It prints each expected file that was not selected with status, rank, score, and scoring reasons, which helps separate ignored files, budget cuts, low scores, and missing dependency signals.
|
|
861
873
|
|
|
874
|
+
Add `task_type` to group results by workflow area. Benchmark summaries report average precision, recall, F1, and token noise by type, so a repo can show "backend-api is good, frontend-web is noisy" instead of hiding that under one aggregate.
|
|
875
|
+
|
|
862
876
|
---
|
|
863
877
|
|
|
864
878
|
### `agentpack scan`
|
|
@@ -899,7 +913,7 @@ agentpack benchmark --compare --misses
|
|
|
899
913
|
|
|
900
914
|
`--sample-fixtures` runs bundled FastAPI, Next.js, and mixed Python/TypeScript fixture evals from an AgentPack source checkout. It is a smoke test, not a claim about your repo.
|
|
901
915
|
|
|
902
|
-
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
916
|
+
For an 8+ usefulness signal, use `benchmark.toml` with real third-party or customer-style repos: 5-20 historical tasks, `task_type` labels, the files actually changed for each task, and `--compare` results for recall, F1, rank@K, and token noise. That is better than trusting generic benchmarks because it tells you whether AgentPack selects the files that matter in code the package has never seen.
|
|
903
917
|
|
|
904
918
|
---
|
|
905
919
|
|
|
@@ -1527,7 +1541,10 @@ Useful checks before opening a PR:
|
|
|
1527
1541
|
|
|
1528
1542
|
```bash
|
|
1529
1543
|
pytest
|
|
1544
|
+
python -m ruff check src tests
|
|
1530
1545
|
python -m build
|
|
1546
|
+
npm test --prefix npm
|
|
1547
|
+
(cd npm && npm pack --dry-run)
|
|
1531
1548
|
agentpack benchmark --sample-fixtures --misses
|
|
1532
1549
|
```
|
|
1533
1550
|
|
|
@@ -1538,6 +1555,7 @@ Good contribution areas:
|
|
|
1538
1555
|
- Better symbol extraction for Go, Rust, Java, and Kotlin
|
|
1539
1556
|
- More precise import/dependency resolution for framework-heavy repos
|
|
1540
1557
|
- Ranking regressions with `expected_files` cases that reproduce misses
|
|
1558
|
+
- npm wrapper improvements that preserve the Python CLI as the source of truth
|
|
1541
1559
|
|
|
1542
1560
|
Please include tests for ranking changes. A good ranking PR usually adds one focused unit test and one scenario in `tests/test_ranking_evals.py`.
|
|
1543
1561
|
|
|
@@ -16,6 +16,16 @@ _STOPWORDS = {
|
|
|
16
16
|
"use", "using", "used", "how", "what", "when", "where", "why",
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
_GENERIC_TASK_TERMS = {
|
|
20
|
+
"add", "added", "change", "changed", "changes", "clean", "cleanup",
|
|
21
|
+
"code", "commit", "context", "debug", "dev", "development", "doc",
|
|
22
|
+
"docs", "eval", "evals", "feature", "fix", "freshness", "general",
|
|
23
|
+
"impl", "implement", "implementation", "improve", "issue", "metric", "metrics",
|
|
24
|
+
"noise", "noisy", "package", "pack", "packs", "release", "repo",
|
|
25
|
+
"source", "sync", "task", "tasks", "test", "tests", "update", "use",
|
|
26
|
+
"useful", "usefulness", "version", "workflow", "workflows",
|
|
27
|
+
}
|
|
28
|
+
|
|
19
29
|
_CONCEPT_MAP: dict[str, frozenset[str]] = {
|
|
20
30
|
# rate limiting
|
|
21
31
|
"rate": frozenset({"throttle", "ratelimit", "leaky", "bucket", "debounce", "backoff", "quota"}),
|
|
@@ -219,15 +229,18 @@ def extract_keyword_weights(task: str) -> dict[str, float]:
|
|
|
219
229
|
continue
|
|
220
230
|
if word in _STOPWORDS:
|
|
221
231
|
continue
|
|
222
|
-
|
|
232
|
+
literal_weight = 0.25 if word in _GENERIC_TASK_TERMS else 1.0
|
|
233
|
+
_add_keyword_weight(keyword_weights, word, literal_weight)
|
|
223
234
|
if word in _VARIANTS:
|
|
224
|
-
|
|
235
|
+
variant = _VARIANTS[word]
|
|
236
|
+
variant_weight = 0.25 if variant in _GENERIC_TASK_TERMS else min(0.75, literal_weight)
|
|
237
|
+
_add_keyword_weight(keyword_weights, variant, variant_weight)
|
|
225
238
|
|
|
226
239
|
# Expand via concept map one level only. Expanded concepts are weaker than
|
|
227
240
|
# literal task words so broad terms like "task" do not dominate ranking.
|
|
228
241
|
expanded: dict[str, float] = {}
|
|
229
242
|
for kw in keyword_weights:
|
|
230
|
-
if kw in _CONCEPT_MAP:
|
|
243
|
+
if kw in _CONCEPT_MAP and kw not in _GENERIC_TASK_TERMS:
|
|
231
244
|
for synonym in _CONCEPT_MAP[kw]:
|
|
232
245
|
_add_keyword_weight(expanded, synonym, 0.35)
|
|
233
246
|
if synonym in _VARIANTS:
|
|
@@ -237,6 +250,17 @@ def extract_keyword_weights(task: str) -> dict[str, float]:
|
|
|
237
250
|
return keyword_weights
|
|
238
251
|
|
|
239
252
|
|
|
253
|
+
def generic_task_term_ratio(task: str) -> float:
|
|
254
|
+
words = [
|
|
255
|
+
word for word in re.split(r"[^a-zA-Z0-9]+", task.lower())
|
|
256
|
+
if len(word) >= 3 and word not in _STOPWORDS
|
|
257
|
+
]
|
|
258
|
+
if not words:
|
|
259
|
+
return 0.0
|
|
260
|
+
generic = sum(1 for word in words if word in _GENERIC_TASK_TERMS)
|
|
261
|
+
return generic / len(words)
|
|
262
|
+
|
|
263
|
+
|
|
240
264
|
def extract_keywords(task: str) -> set[str]:
|
|
241
265
|
return set(extract_keyword_weights(task))
|
|
242
266
|
|
|
@@ -22,6 +22,7 @@ from agentpack.analysis.ranking import (
|
|
|
22
22
|
enrich_keyword_weights_from_files,
|
|
23
23
|
boost_paired_tests,
|
|
24
24
|
boost_cross_layer_related,
|
|
25
|
+
generic_task_term_ratio,
|
|
25
26
|
)
|
|
26
27
|
from agentpack.analysis.tests import find_related_tests
|
|
27
28
|
from agentpack.analysis import dependency_graph as dep_graph_mod
|
|
@@ -37,6 +38,7 @@ class PackRequest:
|
|
|
37
38
|
budget: int
|
|
38
39
|
since: str | None
|
|
39
40
|
refresh: bool
|
|
41
|
+
task_source: str = "explicit"
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
@dataclass
|
|
@@ -57,6 +59,7 @@ class ChangeSet:
|
|
|
57
59
|
all_changed: set[str]
|
|
58
60
|
git_staged: set[str]
|
|
59
61
|
recently_modified: list[str]
|
|
62
|
+
source: str
|
|
60
63
|
current_snap: dict[str, Any] = field(default_factory=dict)
|
|
61
64
|
|
|
62
65
|
|
|
@@ -64,6 +67,7 @@ class ChangeSet:
|
|
|
64
67
|
class RankResult:
|
|
65
68
|
"""Result of keyword extraction and file scoring."""
|
|
66
69
|
keywords: set[str]
|
|
70
|
+
generic_ratio: float
|
|
67
71
|
scored: list[tuple[Any, float, list[str]]]
|
|
68
72
|
|
|
69
73
|
|
|
@@ -80,6 +84,8 @@ class PackPlan:
|
|
|
80
84
|
git_staged: set[str]
|
|
81
85
|
recently_modified: list[str]
|
|
82
86
|
keywords: set[str]
|
|
87
|
+
generic_task_ratio: float
|
|
88
|
+
changed_files_source: str
|
|
83
89
|
scored: list[tuple[Any, float, list[str]]]
|
|
84
90
|
selected: list[SelectedFile]
|
|
85
91
|
receipts: list[Receipt]
|
|
@@ -119,6 +125,7 @@ class ChangeDetector:
|
|
|
119
125
|
all_changed=changed_from_snap | git_changed,
|
|
120
126
|
git_staged=git_staged,
|
|
121
127
|
recently_modified=recently_modified,
|
|
128
|
+
source=_change_source(root, since, changed_from_snap, git_changed),
|
|
122
129
|
current_snap=current_snap,
|
|
123
130
|
)
|
|
124
131
|
|
|
@@ -140,6 +147,7 @@ class FileRanker:
|
|
|
140
147
|
keyword_weights = extract_keyword_weights(task)
|
|
141
148
|
keyword_weights = enrich_keyword_weights_from_files(keyword_weights, changes.all_changed, packable)
|
|
142
149
|
keywords = set(keyword_weights)
|
|
150
|
+
generic_ratio = generic_task_term_ratio(task)
|
|
143
151
|
all_paths = {f.path for f in packable}
|
|
144
152
|
|
|
145
153
|
for fi in packable:
|
|
@@ -165,7 +173,7 @@ class FileRanker:
|
|
|
165
173
|
)
|
|
166
174
|
scored = boost_cross_layer_related(scored, keyword_weights, weights=cfg.scoring)
|
|
167
175
|
scored = boost_paired_tests(scored, weights=cfg.scoring)
|
|
168
|
-
return RankResult(keywords=keywords, scored=scored)
|
|
176
|
+
return RankResult(keywords=keywords, generic_ratio=generic_ratio, scored=scored)
|
|
169
177
|
|
|
170
178
|
|
|
171
179
|
class PackPlanner:
|
|
@@ -217,8 +225,8 @@ class PackPlanner:
|
|
|
217
225
|
budget=effective_budget,
|
|
218
226
|
max_file_tokens=cfg.context.max_file_tokens,
|
|
219
227
|
keywords=rank_result.keywords,
|
|
220
|
-
min_summary_score=cfg.
|
|
221
|
-
max_summary_files=_summary_cap_for_mode(cfg, request.mode),
|
|
228
|
+
min_summary_score=_summary_score_floor(cfg, rank_result.generic_ratio),
|
|
229
|
+
max_summary_files=_summary_cap_for_mode(cfg, request.mode, rank_result.generic_ratio),
|
|
222
230
|
)
|
|
223
231
|
phase_times["select"] = time.perf_counter() - t0
|
|
224
232
|
|
|
@@ -233,6 +241,8 @@ class PackPlanner:
|
|
|
233
241
|
git_staged=changes.git_staged,
|
|
234
242
|
recently_modified=changes.recently_modified,
|
|
235
243
|
keywords=rank_result.keywords,
|
|
244
|
+
generic_task_ratio=rank_result.generic_ratio,
|
|
245
|
+
changed_files_source=changes.source,
|
|
236
246
|
scored=rank_result.scored,
|
|
237
247
|
selected=selected,
|
|
238
248
|
receipts=receipts,
|
|
@@ -279,6 +289,13 @@ class PackService:
|
|
|
279
289
|
saving_pct = (1 - packed_tokens / all_tokens) * 100 if all_tokens > 0 else 0.0
|
|
280
290
|
|
|
281
291
|
all_redaction_warnings = [w for sf in plan.selected for w in sf.redaction_warnings]
|
|
292
|
+
freshness = _build_freshness_metadata(
|
|
293
|
+
root,
|
|
294
|
+
request=request,
|
|
295
|
+
plan=plan,
|
|
296
|
+
snapshot_root_hash=plan.current_snap["root_hash"],
|
|
297
|
+
)
|
|
298
|
+
freshness_warnings = _freshness_warnings(root, request, freshness)
|
|
282
299
|
|
|
283
300
|
pack_obj = ContextPack(
|
|
284
301
|
task=request.task,
|
|
@@ -294,6 +311,8 @@ class PackService:
|
|
|
294
311
|
receipts=plan.receipts if cfg.context.include_receipts else [],
|
|
295
312
|
redaction_warnings=all_redaction_warnings,
|
|
296
313
|
stale=False,
|
|
314
|
+
freshness=freshness,
|
|
315
|
+
freshness_warnings=freshness_warnings,
|
|
297
316
|
)
|
|
298
317
|
|
|
299
318
|
adapter = AdapterRegistry.get(request.agent, cfg)
|
|
@@ -312,6 +331,8 @@ class PackService:
|
|
|
312
331
|
mode=request.mode,
|
|
313
332
|
budget=plan.budget,
|
|
314
333
|
token_estimate=packed_tokens,
|
|
334
|
+
freshness=freshness,
|
|
335
|
+
freshness_warnings=freshness_warnings,
|
|
315
336
|
)
|
|
316
337
|
excluded_receipts = [r for r in plan.receipts if r.action == "excluded"]
|
|
317
338
|
# Budget-cut: files that scored OK but didn't fit — more useful signal than "score too low"
|
|
@@ -359,14 +380,104 @@ def _sf_tokens(sf: SelectedFile) -> int:
|
|
|
359
380
|
return estimate_tokens("\n".join(parts)) if parts else 50
|
|
360
381
|
|
|
361
382
|
|
|
362
|
-
def
|
|
383
|
+
def _summary_score_floor(cfg: Any, generic_ratio: float) -> float:
|
|
384
|
+
floor = cfg.context.min_summary_score
|
|
385
|
+
if generic_ratio >= 0.5:
|
|
386
|
+
return floor + 15
|
|
387
|
+
if generic_ratio >= 0.35:
|
|
388
|
+
return floor + 8
|
|
389
|
+
return floor
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _summary_cap_for_mode(cfg: Any, mode: str, generic_ratio: float = 0.0) -> int:
|
|
363
393
|
if mode == "minimal":
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
394
|
+
cap = cfg.context.max_summary_files_minimal
|
|
395
|
+
elif mode == "balanced":
|
|
396
|
+
cap = cfg.context.max_summary_files_balanced
|
|
397
|
+
elif mode == "deep":
|
|
398
|
+
cap = cfg.context.max_summary_files_deep
|
|
399
|
+
else:
|
|
400
|
+
cap = 0
|
|
401
|
+
if cap > 0 and generic_ratio >= 0.5:
|
|
402
|
+
return max(8, cap // 2)
|
|
403
|
+
if cap > 0 and generic_ratio >= 0.35:
|
|
404
|
+
return max(12, int(cap * 0.75))
|
|
405
|
+
return cap
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _change_source(root: Path, since: str | None, snapshot_changed: set[str], git_changed: set[str]) -> str:
|
|
409
|
+
if not git.is_git_repo(root):
|
|
410
|
+
return "snapshot diff"
|
|
411
|
+
if since:
|
|
412
|
+
return f"git diff since {since} + snapshot diff"
|
|
413
|
+
if git_changed and snapshot_changed:
|
|
414
|
+
return "git working tree + snapshot diff"
|
|
415
|
+
if git_changed:
|
|
416
|
+
return "git working tree"
|
|
417
|
+
if snapshot_changed:
|
|
418
|
+
return "snapshot diff"
|
|
419
|
+
return "no live changes; ranking used task keywords and history"
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _task_md_body(root: Path) -> str | None:
|
|
423
|
+
task_md_path = root / ".agentpack" / "task.md"
|
|
424
|
+
if not task_md_path.exists():
|
|
425
|
+
return None
|
|
426
|
+
try:
|
|
427
|
+
content = task_md_path.read_text(encoding="utf-8").strip()
|
|
428
|
+
except OSError:
|
|
429
|
+
return None
|
|
430
|
+
lines = [ln for ln in content.splitlines() if ln.strip() and not ln.startswith("#")]
|
|
431
|
+
body = lines[0].strip() if lines else ""
|
|
432
|
+
placeholder = "Write or update the current coding task here."
|
|
433
|
+
if body and placeholder not in body:
|
|
434
|
+
return body
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _build_freshness_metadata(
|
|
439
|
+
root: Path,
|
|
440
|
+
*,
|
|
441
|
+
request: PackRequest,
|
|
442
|
+
plan: PackPlan,
|
|
443
|
+
snapshot_root_hash: str,
|
|
444
|
+
) -> dict[str, Any]:
|
|
445
|
+
dirty = git.dirty_files(root) if git.is_git_repo(root) else set()
|
|
446
|
+
metadata: dict[str, Any] = {
|
|
447
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
448
|
+
"task_source": request.task_source,
|
|
449
|
+
"changed_files_source": plan.changed_files_source,
|
|
450
|
+
"snapshot_root_hash": snapshot_root_hash,
|
|
451
|
+
"generic_task_ratio": round(plan.generic_task_ratio, 3),
|
|
452
|
+
"dirty_files_count": len(dirty),
|
|
453
|
+
}
|
|
454
|
+
if git.is_git_repo(root):
|
|
455
|
+
metadata["git_sha"] = git.current_sha(root)
|
|
456
|
+
metadata["git_branch"] = git.current_branch(root)
|
|
457
|
+
if dirty:
|
|
458
|
+
metadata["dirty_files_sample"] = sorted(dirty)[:8]
|
|
459
|
+
task_md = _task_md_body(root)
|
|
460
|
+
if task_md:
|
|
461
|
+
metadata["task_md"] = task_md
|
|
462
|
+
return metadata
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _freshness_warnings(root: Path, request: PackRequest, freshness: dict[str, Any]) -> list[str]:
|
|
466
|
+
warnings: list[str] = []
|
|
467
|
+
task_md = freshness.get("task_md")
|
|
468
|
+
if task_md and task_md != request.task:
|
|
469
|
+
warnings.append(
|
|
470
|
+
".agentpack/task.md differs from the packed task; rerun with --task auto if task.md should win."
|
|
471
|
+
)
|
|
472
|
+
if freshness.get("changed_files_source") == "no live changes; ranking used task keywords and history":
|
|
473
|
+
warnings.append("No live changed files were detected; treat selected files as keyword-based hints.")
|
|
474
|
+
if freshness.get("generic_task_ratio", 0) >= 0.5:
|
|
475
|
+
warnings.append("Task terms are broad/generic; pack tightened weak-summary selection.")
|
|
476
|
+
saved_sha = freshness.get("git_sha")
|
|
477
|
+
current_sha = git.current_sha(root) if git.is_git_repo(root) else None
|
|
478
|
+
if saved_sha and current_sha and saved_sha != current_sha:
|
|
479
|
+
warnings.append("Git HEAD changed since this pack was generated.")
|
|
480
|
+
return warnings
|
|
370
481
|
|
|
371
482
|
|
|
372
483
|
def _load_last_record(metrics_path: Path) -> dict[str, Any] | None:
|
|
@@ -23,6 +23,7 @@ class BenchmarkCase:
|
|
|
23
23
|
task: str
|
|
24
24
|
mode: str = "balanced"
|
|
25
25
|
expected_files: list[str] = field(default_factory=list)
|
|
26
|
+
task_type: str = "general"
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
@dataclass
|
|
@@ -60,43 +61,54 @@ def _sample_fixture_cases(fixtures_root: Path) -> list[FixtureCase]:
|
|
|
60
61
|
"py_fastapi_app",
|
|
61
62
|
"fix FastAPI auth token validation",
|
|
62
63
|
["src/app/auth.py", "tests/test_auth.py"],
|
|
64
|
+
"backend-api",
|
|
63
65
|
),
|
|
64
66
|
(
|
|
65
67
|
"py_fastapi_app",
|
|
66
68
|
"add user profile API endpoint",
|
|
67
69
|
["src/app/main.py", "src/app/users.py", "tests/test_users.py"],
|
|
70
|
+
"backend-api",
|
|
68
71
|
),
|
|
69
72
|
(
|
|
70
73
|
"nextjs_app",
|
|
71
74
|
"fix Next.js auth helper and API client",
|
|
72
75
|
["src/lib/auth.ts", "src/lib/api.ts"],
|
|
76
|
+
"frontend-web",
|
|
73
77
|
),
|
|
74
78
|
(
|
|
75
79
|
"nextjs_app",
|
|
76
80
|
"debug dashboard page data loading",
|
|
77
81
|
["src/app/page.tsx", "src/lib/api.ts"],
|
|
82
|
+
"frontend-web",
|
|
78
83
|
),
|
|
79
84
|
(
|
|
80
85
|
"mixed_repo",
|
|
81
86
|
"fix TypeScript API serialization utility",
|
|
82
87
|
["src/ts/api.ts", "src/ts/utils.ts"],
|
|
88
|
+
"typescript",
|
|
83
89
|
),
|
|
84
90
|
(
|
|
85
91
|
"mixed_repo",
|
|
86
92
|
"fix Python utility parsing edge case",
|
|
87
93
|
["src/py/utils.py"],
|
|
94
|
+
"python",
|
|
88
95
|
),
|
|
89
96
|
]
|
|
90
97
|
|
|
91
98
|
cases: list[FixtureCase] = []
|
|
92
|
-
for fixture, task, expected_files in specs:
|
|
99
|
+
for fixture, task, expected_files, task_type in specs:
|
|
93
100
|
fixture_root = fixtures_root / fixture
|
|
94
101
|
if fixture_root.exists():
|
|
95
102
|
cases.append(
|
|
96
103
|
FixtureCase(
|
|
97
104
|
fixture=fixture,
|
|
98
105
|
root=fixture_root,
|
|
99
|
-
case=BenchmarkCase(
|
|
106
|
+
case=BenchmarkCase(
|
|
107
|
+
task=task,
|
|
108
|
+
mode="balanced",
|
|
109
|
+
expected_files=expected_files,
|
|
110
|
+
task_type=task_type,
|
|
111
|
+
),
|
|
100
112
|
)
|
|
101
113
|
)
|
|
102
114
|
return cases
|
|
@@ -115,6 +127,7 @@ def _load_cases(path: Path) -> list[BenchmarkCase]:
|
|
|
115
127
|
task=raw["task"],
|
|
116
128
|
mode=raw.get("mode", "balanced"),
|
|
117
129
|
expected_files=raw.get("expected_files", []),
|
|
130
|
+
task_type=raw.get("task_type", "general"),
|
|
118
131
|
))
|
|
119
132
|
return cases
|
|
120
133
|
|
|
@@ -136,13 +149,15 @@ def _scaffold_cases(root: Path) -> Path:
|
|
|
136
149
|
'[[cases]]\n'
|
|
137
150
|
'task = "fix auth token expiry"\n'
|
|
138
151
|
'mode = "balanced"\n'
|
|
152
|
+
'task_type = "backend-api"\n'
|
|
139
153
|
'# expected_files = [\n'
|
|
140
154
|
'# "src/auth/token.py",\n'
|
|
141
155
|
'# "src/auth/session.py",\n'
|
|
142
156
|
'# ]\n\n'
|
|
143
157
|
'[[cases]]\n'
|
|
144
158
|
'task = "add rate limiting to API endpoints"\n'
|
|
145
|
-
'mode = "balanced"\n'
|
|
159
|
+
'mode = "balanced"\n'
|
|
160
|
+
'task_type = "backend-api"\n',
|
|
146
161
|
encoding="utf-8",
|
|
147
162
|
)
|
|
148
163
|
return out
|
|
@@ -170,7 +185,7 @@ def _load_history_cases(root: Path, n: int) -> list[BenchmarkCase]:
|
|
|
170
185
|
break
|
|
171
186
|
except json.JSONDecodeError:
|
|
172
187
|
pass
|
|
173
|
-
return [BenchmarkCase(task=t, mode=m) for t, m in seen]
|
|
188
|
+
return [BenchmarkCase(task=t, mode=m, task_type="history") for t, m in seen]
|
|
174
189
|
|
|
175
190
|
|
|
176
191
|
def _random_baseline(
|
|
@@ -268,20 +283,20 @@ def _run_case(root: Path, case: BenchmarkCase) -> CaseResult:
|
|
|
268
283
|
for expected_path in sorted(expected_set - selected_set):
|
|
269
284
|
fi = all_file_map.get(expected_path)
|
|
270
285
|
scored_info = scored_map.get(expected_path)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
status = "ranked but not selected" if scored_info else "not scored"
|
|
286
|
+
status = _miss_status(
|
|
287
|
+
fi=fi,
|
|
288
|
+
expected_path=expected_path,
|
|
289
|
+
receipt_map=receipt_map,
|
|
290
|
+
scored_info=scored_info,
|
|
291
|
+
changed_files_source=plan.changed_files_source,
|
|
292
|
+
)
|
|
279
293
|
missed_expected.append({
|
|
280
294
|
"path": expected_path,
|
|
281
295
|
"status": status,
|
|
282
296
|
"rank": scored_info["rank"] if scored_info else None,
|
|
283
297
|
"score": round(scored_info["score"], 1) if scored_info else None,
|
|
284
298
|
"reasons": scored_info["reasons"][:4] if scored_info else [],
|
|
299
|
+
"basis": plan.changed_files_source,
|
|
285
300
|
})
|
|
286
301
|
else:
|
|
287
302
|
missed_expected = []
|
|
@@ -320,12 +335,37 @@ def _precision_recall(result: CaseResult) -> tuple[float, float, float]:
|
|
|
320
335
|
return p, r, f1
|
|
321
336
|
|
|
322
337
|
|
|
338
|
+
def _miss_status(
|
|
339
|
+
*,
|
|
340
|
+
fi: Any,
|
|
341
|
+
expected_path: str,
|
|
342
|
+
receipt_map: dict[str, str],
|
|
343
|
+
scored_info: dict[str, Any] | None,
|
|
344
|
+
changed_files_source: str,
|
|
345
|
+
) -> str:
|
|
346
|
+
suffix = ""
|
|
347
|
+
if changed_files_source.startswith("no live changes"):
|
|
348
|
+
suffix = "; no live changed-file signal"
|
|
349
|
+
if fi is None:
|
|
350
|
+
return "not found in scanned files"
|
|
351
|
+
if fi.ignored or fi.binary:
|
|
352
|
+
return "ignored or binary"
|
|
353
|
+
if expected_path in receipt_map:
|
|
354
|
+
return receipt_map[expected_path] + suffix
|
|
355
|
+
if scored_info:
|
|
356
|
+
if scored_info["score"] <= 0:
|
|
357
|
+
return "scored too low" + suffix
|
|
358
|
+
return "ranked but not selected" + suffix
|
|
359
|
+
return "not scored" + suffix
|
|
360
|
+
|
|
361
|
+
|
|
323
362
|
def _persist_result(root: Path, result: CaseResult) -> None:
|
|
324
363
|
out = root / ".agentpack" / "benchmark_results.jsonl"
|
|
325
364
|
p, r, f1 = _precision_recall(result) if result.case.expected_files else (None, None, None)
|
|
326
365
|
record = {
|
|
327
366
|
"ts": datetime.now(timezone.utc).isoformat(),
|
|
328
367
|
"task": result.case.task,
|
|
368
|
+
"task_type": result.case.task_type,
|
|
329
369
|
"mode": result.case.mode,
|
|
330
370
|
"packed_tokens": result.packed_tokens,
|
|
331
371
|
"raw_tokens": result.raw_tokens,
|
|
@@ -356,7 +396,10 @@ def _print_case_detail(result: CaseResult, show_misses: bool = False) -> None:
|
|
|
356
396
|
has_gt = bool(result.case.expected_files)
|
|
357
397
|
p, r, f1 = _precision_recall(result) if has_gt else (0.0, 0.0, 0.0)
|
|
358
398
|
|
|
359
|
-
console.print(
|
|
399
|
+
console.print(
|
|
400
|
+
f"\n[bold cyan]{result.case.task}[/] "
|
|
401
|
+
f"[dim]mode={result.case.mode} type={result.case.task_type}[/]"
|
|
402
|
+
)
|
|
360
403
|
|
|
361
404
|
tbl = Table(box=box.SIMPLE, show_header=False, padding=(0, 2))
|
|
362
405
|
tbl.add_column(style="dim")
|
|
@@ -467,6 +510,42 @@ def _print_summary_table(results: list[CaseResult]) -> None:
|
|
|
467
510
|
console.print(tbl)
|
|
468
511
|
|
|
469
512
|
|
|
513
|
+
def _print_task_type_summary(results: list[CaseResult]) -> None:
|
|
514
|
+
grouped: dict[str, list[CaseResult]] = {}
|
|
515
|
+
for result in results:
|
|
516
|
+
if result.case.expected_files:
|
|
517
|
+
grouped.setdefault(result.case.task_type, []).append(result)
|
|
518
|
+
if not grouped:
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
tbl = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
|
|
522
|
+
tbl.add_column("task type", max_width=28)
|
|
523
|
+
tbl.add_column("cases", justify="right")
|
|
524
|
+
tbl.add_column("avg P", justify="right")
|
|
525
|
+
tbl.add_column("avg R", justify="right")
|
|
526
|
+
tbl.add_column("avg F1", justify="right")
|
|
527
|
+
tbl.add_column("avg noise", justify="right")
|
|
528
|
+
|
|
529
|
+
for task_type, rows in sorted(grouped.items()):
|
|
530
|
+
metrics = [_precision_recall(row) for row in rows]
|
|
531
|
+
avg_p = sum(item[0] for item in metrics) / len(metrics)
|
|
532
|
+
avg_r = sum(item[1] for item in metrics) / len(metrics)
|
|
533
|
+
avg_f1 = sum(item[2] for item in metrics) / len(metrics)
|
|
534
|
+
noise_values = [row.noise_pct for row in rows if row.noise_pct is not None]
|
|
535
|
+
avg_noise = sum(noise_values) / len(noise_values) if noise_values else None
|
|
536
|
+
tbl.add_row(
|
|
537
|
+
task_type,
|
|
538
|
+
str(len(rows)),
|
|
539
|
+
f"{avg_p:.1%}",
|
|
540
|
+
f"{avg_r:.1%}",
|
|
541
|
+
f"{avg_f1:.1%}",
|
|
542
|
+
f"{avg_noise:.0f}%" if avg_noise is not None else "-",
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
console.print("\n[bold]By Task Type[/]")
|
|
546
|
+
console.print(tbl)
|
|
547
|
+
|
|
548
|
+
|
|
470
549
|
def _print_miss_details(results: list[CaseResult]) -> None:
|
|
471
550
|
rows = [miss | {"task": result.case.task[:30]} for result in results for miss in result.missed_expected]
|
|
472
551
|
if not rows:
|
|
@@ -588,11 +667,12 @@ def register(app: typer.Typer) -> None:
|
|
|
588
667
|
FixtureCase(
|
|
589
668
|
fixture=fixture_case.fixture,
|
|
590
669
|
root=fixture_case.root,
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
670
|
+
case=BenchmarkCase(
|
|
671
|
+
task=fixture_case.case.task,
|
|
672
|
+
mode=fixture_mode,
|
|
673
|
+
expected_files=fixture_case.case.expected_files,
|
|
674
|
+
task_type=fixture_case.case.task_type,
|
|
675
|
+
),
|
|
596
676
|
)
|
|
597
677
|
)
|
|
598
678
|
fixture_cases = expanded_fixtures
|
|
@@ -625,6 +705,7 @@ def register(app: typer.Typer) -> None:
|
|
|
625
705
|
else:
|
|
626
706
|
console.print("\n[bold]Summary[/]")
|
|
627
707
|
_print_fixture_summary_table(results)
|
|
708
|
+
_print_task_type_summary(results)
|
|
628
709
|
if misses:
|
|
629
710
|
_print_miss_details(results)
|
|
630
711
|
return
|
|
@@ -654,7 +735,14 @@ def register(app: typer.Typer) -> None:
|
|
|
654
735
|
expanded: list[BenchmarkCase] = []
|
|
655
736
|
for c in bench_cases:
|
|
656
737
|
for m in ("minimal", "balanced", "deep"):
|
|
657
|
-
expanded.append(
|
|
738
|
+
expanded.append(
|
|
739
|
+
BenchmarkCase(
|
|
740
|
+
task=c.task,
|
|
741
|
+
mode=m,
|
|
742
|
+
expected_files=c.expected_files,
|
|
743
|
+
task_type=c.task_type,
|
|
744
|
+
)
|
|
745
|
+
)
|
|
658
746
|
bench_cases = expanded
|
|
659
747
|
|
|
660
748
|
console.print(f"\n[bold]Running {len(bench_cases)} benchmark case(s)...[/]\n")
|
|
@@ -686,5 +774,6 @@ def register(app: typer.Typer) -> None:
|
|
|
686
774
|
_print_case_detail(r, show_misses=misses)
|
|
687
775
|
console.print("\n[bold]Summary[/]")
|
|
688
776
|
_print_summary_table(results)
|
|
777
|
+
_print_task_type_summary(results)
|
|
689
778
|
if misses:
|
|
690
779
|
_print_miss_details(results)
|
|
@@ -33,7 +33,7 @@ def register(app: typer.Typer) -> None:
|
|
|
33
33
|
raise typer.Exit(1)
|
|
34
34
|
|
|
35
35
|
resolved_agent = _resolve_agent(agent)
|
|
36
|
-
resolved_task =
|
|
36
|
+
resolved_task, task_source = _resolve_task_with_source(task)
|
|
37
37
|
|
|
38
38
|
if watch or session:
|
|
39
39
|
_pack_watch(agent=resolved_agent, task=resolved_task, mode=mode, budget=budget,
|
|
@@ -48,6 +48,7 @@ def register(app: typer.Typer) -> None:
|
|
|
48
48
|
budget=budget,
|
|
49
49
|
since=since,
|
|
50
50
|
refresh=refresh,
|
|
51
|
+
task_source=task_source,
|
|
51
52
|
))
|
|
52
53
|
_print_pack_summary(result)
|
|
53
54
|
|
|
@@ -62,8 +63,13 @@ def _resolve_agent(agent: str) -> str:
|
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
def _resolve_task(task: str) -> str:
|
|
66
|
+
resolved, _source = _resolve_task_with_source(task)
|
|
67
|
+
return resolved
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _resolve_task_with_source(task: str) -> tuple[str, str]:
|
|
65
71
|
if task != "auto":
|
|
66
|
-
return task
|
|
72
|
+
return task, "explicit"
|
|
67
73
|
root = _root()
|
|
68
74
|
# task.md takes priority over all git heuristics
|
|
69
75
|
task_md_path = root / ".agentpack" / "task.md"
|
|
@@ -74,10 +80,10 @@ def _resolve_task(task: str) -> str:
|
|
|
74
80
|
_PLACEHOLDER = "Write or update the current coding task here."
|
|
75
81
|
if body and _PLACEHOLDER not in body:
|
|
76
82
|
console.print(f"[dim]Auto task (task.md): {body}[/]")
|
|
77
|
-
return body
|
|
83
|
+
return body, "task.md"
|
|
78
84
|
inferred, source = git.infer_task_with_source(root)
|
|
79
85
|
console.print(f"[dim]Auto task ({source}): {inferred}[/]")
|
|
80
|
-
return inferred
|
|
86
|
+
return inferred, source
|
|
81
87
|
|
|
82
88
|
|
|
83
89
|
def _print_pack_summary(result: PackResult) -> None:
|
|
@@ -224,7 +230,7 @@ def _pack_watch(
|
|
|
224
230
|
def _run_pack() -> None:
|
|
225
231
|
result = PackService().run(PackRequest(
|
|
226
232
|
root=root, agent=agent, task=task, mode=mode, budget=budget,
|
|
227
|
-
since=since, refresh=False,
|
|
233
|
+
since=since, refresh=False, task_source="watch",
|
|
228
234
|
))
|
|
229
235
|
_print_pack_summary(result)
|
|
230
236
|
|
|
@@ -55,17 +55,30 @@ def save_pack_metadata(
|
|
|
55
55
|
mode: str,
|
|
56
56
|
budget: int,
|
|
57
57
|
token_estimate: int = 0,
|
|
58
|
+
freshness: dict[str, Any] | None = None,
|
|
59
|
+
freshness_warnings: list[str] | None = None,
|
|
58
60
|
) -> None:
|
|
61
|
+
generated_at = (
|
|
62
|
+
freshness.get("generated_at")
|
|
63
|
+
if freshness and freshness.get("generated_at")
|
|
64
|
+
else datetime.now(timezone.utc).isoformat()
|
|
65
|
+
)
|
|
59
66
|
meta = {
|
|
60
67
|
"context_path": context_path,
|
|
61
|
-
"generated_at":
|
|
68
|
+
"generated_at": generated_at,
|
|
62
69
|
"snapshot_root_hash": snapshot_root_hash,
|
|
63
70
|
"task": task,
|
|
64
71
|
"agent": agent,
|
|
65
72
|
"mode": mode,
|
|
66
73
|
"budget": budget,
|
|
67
74
|
"token_estimate": token_estimate,
|
|
75
|
+
"freshness": freshness or {},
|
|
76
|
+
"freshness_warnings": freshness_warnings or [],
|
|
68
77
|
}
|
|
78
|
+
if freshness:
|
|
79
|
+
for key in ("git_sha", "git_branch", "task_source", "changed_files_source"):
|
|
80
|
+
if key in freshness:
|
|
81
|
+
meta[key] = freshness[key]
|
|
69
82
|
_metadata_path(root).write_text(json.dumps(meta, indent=2))
|
|
70
83
|
|
|
71
84
|
|
|
@@ -74,6 +74,38 @@ def changed_files_since(root: Path, ref: str) -> set[str]:
|
|
|
74
74
|
return result
|
|
75
75
|
|
|
76
76
|
|
|
77
|
+
def current_sha(root: Path) -> str | None:
|
|
78
|
+
out = _run(["git", "rev-parse", "HEAD"], root)
|
|
79
|
+
return out.strip() if out else None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def current_branch(root: Path) -> str | None:
|
|
83
|
+
out = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"], root)
|
|
84
|
+
if not out:
|
|
85
|
+
return None
|
|
86
|
+
branch = out.strip()
|
|
87
|
+
return branch if branch and branch != "HEAD" else None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def dirty_files(root: Path) -> set[str]:
|
|
91
|
+
"""Tracked and untracked files in git status --short output."""
|
|
92
|
+
out = _run(["git", "status", "--short"], root)
|
|
93
|
+
if not out:
|
|
94
|
+
return set()
|
|
95
|
+
paths: set[str] = set()
|
|
96
|
+
for line in out.splitlines():
|
|
97
|
+
line = line.strip()
|
|
98
|
+
if not line:
|
|
99
|
+
continue
|
|
100
|
+
# Handles ordinary status lines and simple renames.
|
|
101
|
+
raw_path = line[3:].strip() if len(line) > 3 else line
|
|
102
|
+
if " -> " in raw_path:
|
|
103
|
+
raw_path = raw_path.rsplit(" -> ", 1)[1]
|
|
104
|
+
if raw_path:
|
|
105
|
+
paths.add(raw_path)
|
|
106
|
+
return paths
|
|
107
|
+
|
|
108
|
+
|
|
77
109
|
def file_churn_counts(root: Path, max_commits: int = 200) -> dict[str, int]:
|
|
78
110
|
"""Return commit count per file from the last max_commits commits.
|
|
79
111
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Literal
|
|
3
|
-
from
|
|
3
|
+
from typing import Any
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ScanResult(BaseModel):
|
|
@@ -81,6 +82,8 @@ class ContextPack(BaseModel):
|
|
|
81
82
|
receipts: list[Receipt]
|
|
82
83
|
redaction_warnings: list[str] = []
|
|
83
84
|
stale: bool = False
|
|
85
|
+
freshness: dict[str, Any] = Field(default_factory=dict)
|
|
86
|
+
freshness_warnings: list[str] = Field(default_factory=list)
|
|
84
87
|
|
|
85
88
|
|
|
86
89
|
class DependencyNode(BaseModel):
|
|
@@ -27,6 +27,7 @@ import json
|
|
|
27
27
|
import sys
|
|
28
28
|
from pathlib import Path
|
|
29
29
|
|
|
30
|
+
from agentpack.core import git
|
|
30
31
|
from agentpack.core.token_estimator import estimate_tokens
|
|
31
32
|
|
|
32
33
|
|
|
@@ -110,15 +111,46 @@ def _get_context_impl(root: Path) -> str:
|
|
|
110
111
|
|
|
111
112
|
generated_at = metadata.get("generated_at", "unknown") if metadata else "unknown"
|
|
112
113
|
token_estimate = metadata.get("token_estimate", 0) if metadata else 0
|
|
114
|
+
stale_reasons: list[str] = []
|
|
113
115
|
|
|
114
116
|
if metadata is None or snapshot is None or metadata.get("snapshot_root_hash") != snapshot.get("root_hash"):
|
|
115
|
-
|
|
117
|
+
stale_reasons.append("repo snapshot changed")
|
|
118
|
+
if metadata:
|
|
119
|
+
saved_sha = metadata.get("git_sha") or (metadata.get("freshness") or {}).get("git_sha")
|
|
120
|
+
current_sha = git.current_sha(root) if git.is_git_repo(root) else None
|
|
121
|
+
if saved_sha and current_sha and saved_sha != current_sha:
|
|
122
|
+
stale_reasons.append("git HEAD changed")
|
|
123
|
+
task_md = _task_md_body(root)
|
|
124
|
+
if task_md and task_md != metadata.get("task"):
|
|
125
|
+
stale_reasons.append(".agentpack/task.md differs")
|
|
126
|
+
|
|
127
|
+
if stale_reasons:
|
|
128
|
+
reason_text = ", ".join(stale_reasons)
|
|
129
|
+
header = (
|
|
130
|
+
f"> **Stale context** — {reason_text} since last pack "
|
|
131
|
+
f"(generated: {generated_at}). Run pack_context() to refresh.\n\n"
|
|
132
|
+
)
|
|
116
133
|
else:
|
|
117
134
|
header = f"> Context is fresh (generated: {generated_at}, {token_estimate:,} tokens).\n\n"
|
|
118
135
|
|
|
119
136
|
return header + content
|
|
120
137
|
|
|
121
138
|
|
|
139
|
+
def _task_md_body(root: Path) -> str | None:
|
|
140
|
+
path = root / ".agentpack" / "task.md"
|
|
141
|
+
if not path.exists():
|
|
142
|
+
return None
|
|
143
|
+
try:
|
|
144
|
+
content = path.read_text(encoding="utf-8").strip()
|
|
145
|
+
except OSError:
|
|
146
|
+
return None
|
|
147
|
+
lines = [line for line in content.splitlines() if line.strip() and not line.startswith("#")]
|
|
148
|
+
body = lines[0].strip() if lines else ""
|
|
149
|
+
if body and "Write or update the current coding task here." not in body:
|
|
150
|
+
return body
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
|
|
122
154
|
def _explain_file_impl(root: Path, path: str, task: str = "") -> str:
|
|
123
155
|
"""Testable core of the explain_file MCP tool."""
|
|
124
156
|
from agentpack.application.pack_service import PackPlanner, PackRequest, _sf_tokens
|
|
@@ -71,6 +71,26 @@ def render_claude(pack: ContextPack) -> str:
|
|
|
71
71
|
|
|
72
72
|
sections.append("## Task")
|
|
73
73
|
sections.append("")
|
|
74
|
+
|
|
75
|
+
if pack.freshness or pack.freshness_warnings:
|
|
76
|
+
sections.append("## Freshness")
|
|
77
|
+
sections.append("")
|
|
78
|
+
if pack.freshness_warnings:
|
|
79
|
+
sections.append("> **Refresh recommended:** " + " ".join(pack.freshness_warnings))
|
|
80
|
+
sections.append("")
|
|
81
|
+
for label, key in (
|
|
82
|
+
("Generated", "generated_at"),
|
|
83
|
+
("Git branch", "git_branch"),
|
|
84
|
+
("Git SHA", "git_sha"),
|
|
85
|
+
("Task source", "task_source"),
|
|
86
|
+
("Changed-file source", "changed_files_source"),
|
|
87
|
+
("Snapshot hash", "snapshot_root_hash"),
|
|
88
|
+
("Dirty files at pack time", "dirty_files_count"),
|
|
89
|
+
):
|
|
90
|
+
value = pack.freshness.get(key)
|
|
91
|
+
if value is not None:
|
|
92
|
+
sections.append(f"- **{label}:** {value}")
|
|
93
|
+
sections.append("")
|
|
74
94
|
sections.append(pack.task)
|
|
75
95
|
sections.append("")
|
|
76
96
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|