agentpack-cli 0.1.20__tar.gz → 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/PKG-INFO +19 -9
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/README.md +18 -8
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/pyproject.toml +1 -1
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/__init__.py +1 -1
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/ranking.py +84 -32
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/application/pack_service.py +59 -5
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/doctor.py +34 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/explain.py +2 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/pack.py +41 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/stats.py +33 -11
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/config.py +8 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/context_pack.py +15 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/.gitignore +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/LICENSE +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/antigravity.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/base.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/claude.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/codex.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/cursor.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/detect.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/generic.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/windsurf.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/dependency_graph.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/go_imports.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/java_imports.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/js_ts_imports.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/python_imports.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/rust_imports.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/symbols.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/tests.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/application/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/cli.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/_shared.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/benchmark.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/claude_cmd.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/diff.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/hook_cmd.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/init.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/install.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/mcp_cmd.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/monitor.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/scan.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/status.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/summarize.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/watch.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/bootstrap.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/cache.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/diff.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/git.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/git_hooks.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/global_install.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/ignore.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/merkle.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/models.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/redactor.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/scanner.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/snapshot.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/token_estimator.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/data/agentpack.md +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/antigravity.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/claude.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/codex.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/cursor.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/windsurf.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/git_hooks.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/global_install.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/vscode_tasks.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/mcp_server.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/compact.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/markdown.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/receipts.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/session/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/session/state.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/__init__.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/base.py +0 -0
- {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/offline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentpack-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.21
|
|
4
4
|
Summary: Token-aware context packing for AI coding agents — Claude, Cursor, Windsurf, and Codex
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
[](https://opensource.org/licenses/MIT)
|
|
45
45
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
46
46
|
|
|
47
|
-
> **Status: alpha (v0.1.
|
|
47
|
+
> **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
48
48
|
>
|
|
49
49
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
50
50
|
|
|
@@ -72,8 +72,9 @@ AgentPack solves this with a one-time offline analysis pass:
|
|
|
72
72
|
|
|
73
73
|
1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
|
|
74
74
|
2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
|
|
75
|
-
3. **Packs a tight context document** — changed files get full content, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
|
|
76
|
-
4. **
|
|
75
|
+
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
|
|
76
|
+
4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
|
|
77
|
+
5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
|
|
77
78
|
|
|
78
79
|
The result: your agent starts every session with a focused, accurate picture of the relevant code — without you doing anything after opt-in.
|
|
79
80
|
|
|
@@ -546,6 +547,7 @@ Some checks failed. Run the suggested commands above to fix.
|
|
|
546
547
|
The new checks in `doctor`:
|
|
547
548
|
- **Local vs global hooks**: warns when Claude hooks are only in the per-project `.claude/settings.json` — context won't auto-inject in other repos
|
|
548
549
|
- **Slash command presence**: checks both local (`.claude/commands/`) and global (`~/.claude/commands/`) installations
|
|
550
|
+
- **Source checkout mismatch**: warns when you're inside an AgentPack source checkout but the `agentpack` executable imports the installed site-packages copy. Use `PYTHONPATH=src python -m agentpack.cli ...` or `pip install -e .` for local development.
|
|
549
551
|
|
|
550
552
|
---
|
|
551
553
|
|
|
@@ -632,9 +634,11 @@ Options:
|
|
|
632
634
|
|
|
633
635
|
| Mode | What's included |
|
|
634
636
|
|------|----------------|
|
|
635
|
-
| `minimal` | Changed files + direct configs
|
|
636
|
-
| `balanced` | Changed files + deps + reverse deps + tests + summaries |
|
|
637
|
-
| `deep` | Everything in balanced + docs + more full-content files |
|
|
637
|
+
| `minimal` | Changed files + direct configs, with a small summary cap |
|
|
638
|
+
| `balanced` | Changed files + deps + reverse deps + tests + capped summaries |
|
|
639
|
+
| `deep` | Everything in balanced + docs + more full-content files, uncapped summaries |
|
|
640
|
+
|
|
641
|
+
`pack` also prints diagnostics when the pack looks noisy: very short task text, no changed files, mostly filename matches, mostly summaries, many symbol matches, weak summaries excluded by the score floor, or summaries excluded by the mode cap.
|
|
638
642
|
|
|
639
643
|
---
|
|
640
644
|
|
|
@@ -832,7 +836,9 @@ Show session state, token statistics, and selection accuracy for the last pack.
|
|
|
832
836
|
agentpack stats
|
|
833
837
|
```
|
|
834
838
|
|
|
835
|
-
When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files
|
|
839
|
+
When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files from the latest pack and avg recall/precision/F1 over the last 10 runs.
|
|
840
|
+
|
|
841
|
+
Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
|
|
836
842
|
|
|
837
843
|
---
|
|
838
844
|
|
|
@@ -917,7 +923,7 @@ agentpack monitor --clear
|
|
|
917
923
|
| Large unrelated file | −50 |
|
|
918
924
|
| Ignored/binary | −100 |
|
|
919
925
|
|
|
920
|
-
Keyword scoring uses concept synonym expansion — "rate limiting"
|
|
926
|
+
Keyword scoring uses weighted concept synonym expansion — literal task terms are strongest, normalized variants are slightly weaker, and broad concept synonyms are weaker again. "rate limiting" still expands to `throttle`, `leaky`, `bucket`, `quota`, but broad expansions no longer dominate literal task terms. Matching is token-based, so `task` does not accidentally match every `tasks.py`.
|
|
921
927
|
|
|
922
928
|
---
|
|
923
929
|
|
|
@@ -934,6 +940,10 @@ ignore_file = ".agentignore"
|
|
|
934
940
|
default_budget = 25000
|
|
935
941
|
default_mode = "balanced"
|
|
936
942
|
max_file_tokens = 4000
|
|
943
|
+
min_summary_score = 60
|
|
944
|
+
max_summary_files_minimal = 15
|
|
945
|
+
max_summary_files_balanced = 40
|
|
946
|
+
max_summary_files_deep = 0
|
|
937
947
|
include_tests = true
|
|
938
948
|
include_configs = true
|
|
939
949
|
include_receipts = true
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
> **Status: alpha (v0.1.
|
|
8
|
+
> **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
|
|
9
9
|
>
|
|
10
10
|
> **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
|
|
11
11
|
|
|
@@ -33,8 +33,9 @@ AgentPack solves this with a one-time offline analysis pass:
|
|
|
33
33
|
|
|
34
34
|
1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
|
|
35
35
|
2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
|
|
36
|
-
3. **Packs a tight context document** — changed files get full content, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
|
|
37
|
-
4. **
|
|
36
|
+
3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
|
|
37
|
+
4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
|
|
38
|
+
5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
|
|
38
39
|
|
|
39
40
|
The result: your agent starts every session with a focused, accurate picture of the relevant code — without you doing anything after opt-in.
|
|
40
41
|
|
|
@@ -507,6 +508,7 @@ Some checks failed. Run the suggested commands above to fix.
|
|
|
507
508
|
The new checks in `doctor`:
|
|
508
509
|
- **Local vs global hooks**: warns when Claude hooks are only in the per-project `.claude/settings.json` — context won't auto-inject in other repos
|
|
509
510
|
- **Slash command presence**: checks both local (`.claude/commands/`) and global (`~/.claude/commands/`) installations
|
|
511
|
+
- **Source checkout mismatch**: warns when you're inside an AgentPack source checkout but the `agentpack` executable imports the installed site-packages copy. Use `PYTHONPATH=src python -m agentpack.cli ...` or `pip install -e .` for local development.
|
|
510
512
|
|
|
511
513
|
---
|
|
512
514
|
|
|
@@ -593,9 +595,11 @@ Options:
|
|
|
593
595
|
|
|
594
596
|
| Mode | What's included |
|
|
595
597
|
|------|----------------|
|
|
596
|
-
| `minimal` | Changed files + direct configs
|
|
597
|
-
| `balanced` | Changed files + deps + reverse deps + tests + summaries |
|
|
598
|
-
| `deep` | Everything in balanced + docs + more full-content files |
|
|
598
|
+
| `minimal` | Changed files + direct configs, with a small summary cap |
|
|
599
|
+
| `balanced` | Changed files + deps + reverse deps + tests + capped summaries |
|
|
600
|
+
| `deep` | Everything in balanced + docs + more full-content files, uncapped summaries |
|
|
601
|
+
|
|
602
|
+
`pack` also prints diagnostics when the pack looks noisy: very short task text, no changed files, mostly filename matches, mostly summaries, many symbol matches, weak summaries excluded by the score floor, or summaries excluded by the mode cap.
|
|
599
603
|
|
|
600
604
|
---
|
|
601
605
|
|
|
@@ -793,7 +797,9 @@ Show session state, token statistics, and selection accuracy for the last pack.
|
|
|
793
797
|
agentpack stats
|
|
794
798
|
```
|
|
795
799
|
|
|
796
|
-
When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files
|
|
800
|
+
When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files from the latest pack and avg recall/precision/F1 over the last 10 runs.
|
|
801
|
+
|
|
802
|
+
Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
|
|
797
803
|
|
|
798
804
|
---
|
|
799
805
|
|
|
@@ -878,7 +884,7 @@ agentpack monitor --clear
|
|
|
878
884
|
| Large unrelated file | −50 |
|
|
879
885
|
| Ignored/binary | −100 |
|
|
880
886
|
|
|
881
|
-
Keyword scoring uses concept synonym expansion — "rate limiting"
|
|
887
|
+
Keyword scoring uses weighted concept synonym expansion — literal task terms are strongest, normalized variants are slightly weaker, and broad concept synonyms are weaker again. "rate limiting" still expands to `throttle`, `leaky`, `bucket`, `quota`, but broad expansions no longer dominate literal task terms. Matching is token-based, so `task` does not accidentally match every `tasks.py`.
|
|
882
888
|
|
|
883
889
|
---
|
|
884
890
|
|
|
@@ -895,6 +901,10 @@ ignore_file = ".agentignore"
|
|
|
895
901
|
default_budget = 25000
|
|
896
902
|
default_mode = "balanced"
|
|
897
903
|
max_file_tokens = 4000
|
|
904
|
+
min_summary_score = 60
|
|
905
|
+
max_summary_files_minimal = 15
|
|
906
|
+
max_summary_files_balanced = 40
|
|
907
|
+
max_summary_files_deep = 0
|
|
898
908
|
include_tests = true
|
|
899
909
|
include_configs = true
|
|
900
910
|
include_receipts = true
|
|
@@ -181,29 +181,38 @@ CONFIG_NAMES = {
|
|
|
181
181
|
_DEFAULT_WEIGHTS = ScoringWeights()
|
|
182
182
|
|
|
183
183
|
|
|
184
|
-
def
|
|
184
|
+
def _add_keyword_weight(weights: dict[str, float], keyword: str, weight: float) -> None:
|
|
185
|
+
weights[keyword] = max(weights.get(keyword, 0.0), weight)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def extract_keyword_weights(task: str) -> dict[str, float]:
|
|
185
189
|
words = re.split(r"[^a-zA-Z0-9]+", task.lower())
|
|
186
|
-
|
|
190
|
+
keyword_weights: dict[str, float] = {}
|
|
187
191
|
for word in words:
|
|
188
192
|
if len(word) < 3:
|
|
189
193
|
continue
|
|
190
194
|
if word in _STOPWORDS:
|
|
191
195
|
continue
|
|
192
|
-
|
|
196
|
+
_add_keyword_weight(keyword_weights, word, 1.0)
|
|
193
197
|
if word in _VARIANTS:
|
|
194
|
-
|
|
198
|
+
_add_keyword_weight(keyword_weights, _VARIANTS[word], 0.75)
|
|
195
199
|
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
|
|
200
|
+
# Expand via concept map one level only. Expanded concepts are weaker than
|
|
201
|
+
# literal task words so broad terms like "task" do not dominate ranking.
|
|
202
|
+
expanded: dict[str, float] = {}
|
|
203
|
+
for kw in keyword_weights:
|
|
199
204
|
if kw in _CONCEPT_MAP:
|
|
200
205
|
for synonym in _CONCEPT_MAP[kw]:
|
|
201
|
-
expanded.
|
|
202
|
-
# also apply _VARIANTS to expanded terms
|
|
206
|
+
_add_keyword_weight(expanded, synonym, 0.35)
|
|
203
207
|
if synonym in _VARIANTS:
|
|
204
|
-
expanded
|
|
205
|
-
|
|
206
|
-
|
|
208
|
+
_add_keyword_weight(expanded, _VARIANTS[synonym], 0.35)
|
|
209
|
+
for kw, weight in expanded.items():
|
|
210
|
+
_add_keyword_weight(keyword_weights, kw, weight)
|
|
211
|
+
return keyword_weights
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def extract_keywords(task: str) -> set[str]:
|
|
215
|
+
return set(extract_keyword_weights(task))
|
|
207
216
|
|
|
208
217
|
|
|
209
218
|
def enrich_keywords_from_files(
|
|
@@ -255,21 +264,62 @@ def enrich_keywords_from_files(
|
|
|
255
264
|
return keywords | set(top)
|
|
256
265
|
|
|
257
266
|
|
|
258
|
-
def
|
|
259
|
-
|
|
260
|
-
|
|
267
|
+
def enrich_keyword_weights_from_files(
|
|
268
|
+
keyword_weights: dict[str, float],
|
|
269
|
+
changed_paths: set[str],
|
|
270
|
+
files: list[FileInfo],
|
|
271
|
+
max_new_keywords: int = 20,
|
|
272
|
+
) -> dict[str, float]:
|
|
273
|
+
enriched = dict(keyword_weights)
|
|
274
|
+
enriched_keywords = enrich_keywords_from_files(set(keyword_weights), changed_paths, files, max_new_keywords)
|
|
275
|
+
for keyword in enriched_keywords - set(keyword_weights):
|
|
276
|
+
enriched[keyword] = 0.5
|
|
277
|
+
return enriched
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _tokens_for_match(text: str) -> set[str]:
|
|
281
|
+
"""Return identifier-ish tokens for exact keyword matching."""
|
|
282
|
+
spaced = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", text)
|
|
283
|
+
raw_tokens = re.split(r"[^a-zA-Z0-9]+", spaced.lower())
|
|
284
|
+
return {tok for tok in raw_tokens if tok}
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _keyword_token_weights(keywords: set[str] | dict[str, float]) -> dict[str, float]:
|
|
288
|
+
if isinstance(keywords, dict):
|
|
289
|
+
items = keywords.items()
|
|
290
|
+
else:
|
|
291
|
+
items = ((keyword, 1.0) for keyword in keywords)
|
|
292
|
+
|
|
293
|
+
token_weights: dict[str, float] = {}
|
|
294
|
+
for keyword, weight in items:
|
|
295
|
+
for token in _tokens_for_match(keyword):
|
|
296
|
+
if len(token) >= 3:
|
|
297
|
+
token_weights[token] = max(token_weights.get(token, 0.0), weight)
|
|
298
|
+
return token_weights
|
|
299
|
+
|
|
261
300
|
|
|
301
|
+
def _match_weight(text: str, keywords: set[str] | dict[str, float]) -> float:
|
|
302
|
+
token_weights = _keyword_token_weights(keywords)
|
|
303
|
+
matches = _tokens_for_match(text) & set(token_weights)
|
|
304
|
+
return max((token_weights[token] for token in matches), default=0.0)
|
|
262
305
|
|
|
263
|
-
def _content_matches_keywords(text: str, keywords: set[str]) -> int:
|
|
264
|
-
text_lower = text.lower()
|
|
265
|
-
return sum(1 for kw in keywords if kw in text_lower)
|
|
266
306
|
|
|
307
|
+
def _path_matches_keywords(path: str, keywords: set[str] | dict[str, float]) -> float:
|
|
308
|
+
return _match_weight(path, keywords)
|
|
267
309
|
|
|
268
|
-
|
|
310
|
+
|
|
311
|
+
def _content_matches_keywords(text: str, keywords: set[str] | dict[str, float]) -> tuple[int, float]:
|
|
312
|
+
token_weights = _keyword_token_weights(keywords)
|
|
313
|
+
text_tokens = _tokens_for_match(text)
|
|
314
|
+
matches = text_tokens & set(token_weights)
|
|
315
|
+
return len(matches), sum(token_weights[token] for token in matches)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _symbol_matches_keywords(symbols: list[str], keywords: set[str] | dict[str, float]) -> float:
|
|
319
|
+
best_weight = 0.0
|
|
269
320
|
for sym in symbols:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
return False
|
|
321
|
+
best_weight = max(best_weight, _match_weight(sym, keywords))
|
|
322
|
+
return best_weight
|
|
273
323
|
|
|
274
324
|
|
|
275
325
|
def score_files(
|
|
@@ -278,7 +328,7 @@ def score_files(
|
|
|
278
328
|
staged_paths: set[str],
|
|
279
329
|
recently_modified: list[str],
|
|
280
330
|
dep_graph: "DependencyGraph | dict",
|
|
281
|
-
keywords: set[str],
|
|
331
|
+
keywords: set[str] | dict[str, float],
|
|
282
332
|
include_tests: bool = True,
|
|
283
333
|
include_configs: bool = True,
|
|
284
334
|
weights: ScoringWeights | None = None,
|
|
@@ -315,8 +365,9 @@ def score_files(
|
|
|
315
365
|
score += w.staged
|
|
316
366
|
reasons.append("staged")
|
|
317
367
|
|
|
318
|
-
|
|
319
|
-
|
|
368
|
+
filename_weight = _path_matches_keywords(fi.path, keywords)
|
|
369
|
+
if filename_weight > 0:
|
|
370
|
+
score += w.filename_keyword * filename_weight
|
|
320
371
|
reasons.append("filename keyword match")
|
|
321
372
|
|
|
322
373
|
node = dep_graph.get(fi.path)
|
|
@@ -327,27 +378,28 @@ def score_files(
|
|
|
327
378
|
(s["name"] if isinstance(s, dict) else s.name)
|
|
328
379
|
for s in raw_syms
|
|
329
380
|
]
|
|
330
|
-
|
|
331
|
-
|
|
381
|
+
symbol_weight = _symbol_matches_keywords(sym_names, keywords)
|
|
382
|
+
if symbol_weight > 0:
|
|
383
|
+
score += w.symbol_keyword * symbol_weight
|
|
332
384
|
reasons.append("symbol keyword match")
|
|
333
385
|
|
|
334
386
|
if fi.content is not None:
|
|
335
|
-
hits = _content_matches_keywords(fi.content, keywords)
|
|
387
|
+
hits, hit_weight = _content_matches_keywords(fi.content, keywords)
|
|
336
388
|
if hits > 0:
|
|
337
|
-
score += min(w.content_keyword_max,
|
|
389
|
+
score += min(w.content_keyword_max, hit_weight * w.content_keyword_per_hit)
|
|
338
390
|
reasons.append(f"content keyword match ({hits})")
|
|
339
391
|
elif fi.abs_path.exists():
|
|
340
392
|
try:
|
|
341
393
|
text = fi.abs_path.read_text(errors="replace")
|
|
342
|
-
hits = _content_matches_keywords(text, keywords)
|
|
394
|
+
hits, hit_weight = _content_matches_keywords(text, keywords)
|
|
343
395
|
if hits > 0:
|
|
344
|
-
score += min(w.content_keyword_max,
|
|
396
|
+
score += min(w.content_keyword_max, hit_weight * w.content_keyword_per_hit)
|
|
345
397
|
reasons.append(f"content keyword match ({hits})")
|
|
346
398
|
except OSError:
|
|
347
399
|
pass
|
|
348
400
|
|
|
349
401
|
for dep_path in node.imports:
|
|
350
|
-
if dep_path in changed_paths or _path_matches_keywords(dep_path, keywords):
|
|
402
|
+
if dep_path in changed_paths or _path_matches_keywords(dep_path, keywords) > 0:
|
|
351
403
|
score += w.direct_dep
|
|
352
404
|
reasons.append("direct dependency of changed file")
|
|
353
405
|
break
|
|
@@ -16,7 +16,12 @@ from agentpack.core import git
|
|
|
16
16
|
from agentpack.core.context_pack import select_files, save_pack_metadata
|
|
17
17
|
from agentpack.core.models import ContextPack, DependencyGraph, FileInfo, ScanResult, SelectedFile, Receipt
|
|
18
18
|
from agentpack.core.token_estimator import estimate_tokens
|
|
19
|
-
from agentpack.analysis.ranking import
|
|
19
|
+
from agentpack.analysis.ranking import (
|
|
20
|
+
score_files,
|
|
21
|
+
extract_keyword_weights,
|
|
22
|
+
enrich_keyword_weights_from_files,
|
|
23
|
+
boost_paired_tests,
|
|
24
|
+
)
|
|
20
25
|
from agentpack.analysis.tests import find_related_tests
|
|
21
26
|
from agentpack.analysis import dependency_graph as dep_graph_mod
|
|
22
27
|
from agentpack.summaries.base import build_all_summaries
|
|
@@ -131,8 +136,9 @@ class FileRanker:
|
|
|
131
136
|
root: Path | None = None,
|
|
132
137
|
) -> RankResult:
|
|
133
138
|
from agentpack.core import git as _git
|
|
134
|
-
|
|
135
|
-
|
|
139
|
+
keyword_weights = extract_keyword_weights(task)
|
|
140
|
+
keyword_weights = enrich_keyword_weights_from_files(keyword_weights, changes.all_changed, packable)
|
|
141
|
+
keywords = set(keyword_weights)
|
|
136
142
|
all_paths = {f.path for f in packable}
|
|
137
143
|
|
|
138
144
|
for fi in packable:
|
|
@@ -149,7 +155,7 @@ class FileRanker:
|
|
|
149
155
|
staged_paths=changes.git_staged,
|
|
150
156
|
recently_modified=changes.recently_modified,
|
|
151
157
|
dep_graph=dep_graph,
|
|
152
|
-
keywords=
|
|
158
|
+
keywords=keyword_weights,
|
|
153
159
|
include_tests=cfg.context.include_tests,
|
|
154
160
|
include_configs=cfg.context.include_configs,
|
|
155
161
|
weights=cfg.scoring,
|
|
@@ -209,6 +215,8 @@ class PackPlanner:
|
|
|
209
215
|
budget=effective_budget,
|
|
210
216
|
max_file_tokens=cfg.context.max_file_tokens,
|
|
211
217
|
keywords=rank_result.keywords,
|
|
218
|
+
min_summary_score=cfg.context.min_summary_score,
|
|
219
|
+
max_summary_files=_summary_cap_for_mode(cfg, request.mode),
|
|
212
220
|
)
|
|
213
221
|
phase_times["select"] = time.perf_counter() - t0
|
|
214
222
|
|
|
@@ -317,6 +325,8 @@ class PackService:
|
|
|
317
325
|
selected_count=len(plan.selected),
|
|
318
326
|
changed_count=len(plan.all_changed),
|
|
319
327
|
selected_paths=[sf.path for sf in plan.selected],
|
|
328
|
+
selected_tokens={sf.path: _sf_tokens(sf) for sf in plan.selected},
|
|
329
|
+
selected_modes={sf.path: sf.include_mode for sf in plan.selected},
|
|
320
330
|
selected_hints=[{"path": sf.path, "why": sf.reasons[0] if sf.reasons else ""} for sf in plan.selected[:8]],
|
|
321
331
|
current_changed=plan.all_changed,
|
|
322
332
|
excluded_count=len(excluded_receipts),
|
|
@@ -347,6 +357,16 @@ def _sf_tokens(sf: SelectedFile) -> int:
|
|
|
347
357
|
return estimate_tokens("\n".join(parts)) if parts else 50
|
|
348
358
|
|
|
349
359
|
|
|
360
|
+
def _summary_cap_for_mode(cfg: Any, mode: str) -> int:
|
|
361
|
+
if mode == "minimal":
|
|
362
|
+
return cfg.context.max_summary_files_minimal
|
|
363
|
+
if mode == "balanced":
|
|
364
|
+
return cfg.context.max_summary_files_balanced
|
|
365
|
+
if mode == "deep":
|
|
366
|
+
return cfg.context.max_summary_files_deep
|
|
367
|
+
return 0
|
|
368
|
+
|
|
369
|
+
|
|
350
370
|
def _load_last_record(metrics_path: Path) -> dict[str, Any] | None:
|
|
351
371
|
"""Return the most recent metrics record that has selected_paths."""
|
|
352
372
|
if not metrics_path.exists():
|
|
@@ -390,11 +410,41 @@ def _compute_selection_accuracy(
|
|
|
390
410
|
recall = len(hits) / len(actual_changed)
|
|
391
411
|
precision = len(hits) / len(prev_selected)
|
|
392
412
|
f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0
|
|
393
|
-
|
|
413
|
+
result = {
|
|
394
414
|
"selection_recall": round(recall, 3),
|
|
395
415
|
"selection_precision": round(precision, 3),
|
|
396
416
|
"selection_f1": round(f1, 3),
|
|
397
417
|
}
|
|
418
|
+
token_map = prev.get("selected_tokens") or {}
|
|
419
|
+
if isinstance(token_map, dict):
|
|
420
|
+
total_tokens = sum(v for v in token_map.values() if isinstance(v, int | float))
|
|
421
|
+
hit_tokens = sum(
|
|
422
|
+
token_map.get(path, 0)
|
|
423
|
+
for path in hits
|
|
424
|
+
if isinstance(token_map.get(path, 0), int | float)
|
|
425
|
+
)
|
|
426
|
+
if total_tokens > 0:
|
|
427
|
+
token_precision = hit_tokens / total_tokens
|
|
428
|
+
result["selection_token_precision"] = round(token_precision, 3)
|
|
429
|
+
result["selection_noise_pct"] = round((1 - token_precision) * 100, 1)
|
|
430
|
+
mode_map = prev.get("selected_modes") or {}
|
|
431
|
+
if isinstance(mode_map, dict):
|
|
432
|
+
for mode in ("full", "symbols", "summary"):
|
|
433
|
+
mode_paths = {path for path, value in mode_map.items() if value == mode}
|
|
434
|
+
mode_total = sum(
|
|
435
|
+
token_map.get(path, 0)
|
|
436
|
+
for path in mode_paths
|
|
437
|
+
if isinstance(token_map.get(path, 0), int | float)
|
|
438
|
+
)
|
|
439
|
+
if mode_total <= 0:
|
|
440
|
+
continue
|
|
441
|
+
mode_hit_tokens = sum(
|
|
442
|
+
token_map.get(path, 0)
|
|
443
|
+
for path in mode_paths & hits
|
|
444
|
+
if isinstance(token_map.get(path, 0), int | float)
|
|
445
|
+
)
|
|
446
|
+
result[f"selection_token_precision_{mode}"] = round(mode_hit_tokens / mode_total, 3)
|
|
447
|
+
return result
|
|
398
448
|
|
|
399
449
|
|
|
400
450
|
def _record_metrics(
|
|
@@ -409,6 +459,8 @@ def _record_metrics(
|
|
|
409
459
|
selected_count: int,
|
|
410
460
|
changed_count: int,
|
|
411
461
|
selected_paths: list[str],
|
|
462
|
+
selected_tokens: dict[str, int],
|
|
463
|
+
selected_modes: dict[str, str],
|
|
412
464
|
current_changed: set[str],
|
|
413
465
|
selected_hints: list[dict] | None = None,
|
|
414
466
|
excluded_count: int = 0,
|
|
@@ -428,6 +480,8 @@ def _record_metrics(
|
|
|
428
480
|
"excluded_files": excluded_count,
|
|
429
481
|
"excluded_paths": excluded_paths or [],
|
|
430
482
|
"selected_paths": selected_paths,
|
|
483
|
+
"selected_tokens": selected_tokens,
|
|
484
|
+
"selected_modes": selected_modes,
|
|
431
485
|
"selected_hints": selected_hints or [],
|
|
432
486
|
"phases": {k: round(v, 3) for k, v in phase_times.items()},
|
|
433
487
|
"total_s": round(sum(phase_times.values()), 3),
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
5
|
import subprocess
|
|
6
|
+
import sys
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
|
|
8
9
|
import typer
|
|
@@ -37,6 +38,15 @@ def register(app: typer.Typer) -> None:
|
|
|
37
38
|
console.print(" [red]✗[/] agentpack not on PATH — run: pipx install agentpack-cli")
|
|
38
39
|
ok = False
|
|
39
40
|
|
|
41
|
+
try:
|
|
42
|
+
root = _root()
|
|
43
|
+
warning = _source_checkout_warning(root, Path(__file__), sys.executable, binary)
|
|
44
|
+
if warning:
|
|
45
|
+
console.print(f" [yellow]![/] {warning}")
|
|
46
|
+
ok = False
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
40
50
|
# --- Git template hooks ---
|
|
41
51
|
console.print("\n[bold]Git template hooks (~/.git-templates/hooks/)[/]")
|
|
42
52
|
hooks_dir = _GIT_TEMPLATE_DIR / "hooks"
|
|
@@ -234,6 +244,30 @@ def _check_agent_file(root: Path, filename: str, agent: str) -> None:
|
|
|
234
244
|
console.print(f" [dim]-[/] {filename} not present (optional)")
|
|
235
245
|
|
|
236
246
|
|
|
247
|
+
def _source_checkout_warning(
|
|
248
|
+
root: Path,
|
|
249
|
+
package_file: Path,
|
|
250
|
+
executable: str,
|
|
251
|
+
binary: str | None,
|
|
252
|
+
) -> str | None:
|
|
253
|
+
source_pkg = root / "src" / "agentpack"
|
|
254
|
+
if not source_pkg.exists():
|
|
255
|
+
return None
|
|
256
|
+
try:
|
|
257
|
+
package_path = package_file.resolve()
|
|
258
|
+
source_path = source_pkg.resolve()
|
|
259
|
+
except OSError:
|
|
260
|
+
return None
|
|
261
|
+
if package_path.is_relative_to(source_path):
|
|
262
|
+
return None
|
|
263
|
+
binary_text = f" via {binary}" if binary else ""
|
|
264
|
+
return (
|
|
265
|
+
"source checkout detected, but CLI imports installed package "
|
|
266
|
+
f"at {package_path}{binary_text}. Use `PYTHONPATH=src python -m agentpack.cli ...` "
|
|
267
|
+
"or install editable with `pip install -e .`."
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
237
271
|
def _print_summary(ok: bool) -> None:
|
|
238
272
|
console.print("")
|
|
239
273
|
if ok:
|
|
@@ -167,6 +167,8 @@ def register(app: typer.Typer) -> None:
|
|
|
167
167
|
budget=deep_budget,
|
|
168
168
|
max_file_tokens=cfg.context.max_file_tokens,
|
|
169
169
|
keywords=plan.keywords,
|
|
170
|
+
min_summary_score=cfg.context.min_summary_score,
|
|
171
|
+
max_summary_files=0,
|
|
170
172
|
)
|
|
171
173
|
deep_selected_paths = {
|
|
172
174
|
r.path for r in deep_receipts if r.action in ("included", "summarized")
|
|
@@ -133,6 +133,16 @@ def _print_pack_summary(result: PackResult) -> None:
|
|
|
133
133
|
console.print()
|
|
134
134
|
console.print(Columns([stats, files_tbl], equal=False, expand=False))
|
|
135
135
|
|
|
136
|
+
diagnostics = _pack_diagnostics(result)
|
|
137
|
+
if diagnostics:
|
|
138
|
+
diag_text = "\n".join(f" [yellow]![/] {line}" for line in diagnostics)
|
|
139
|
+
console.print(Panel(
|
|
140
|
+
diag_text,
|
|
141
|
+
title="[bold yellow]Pack diagnostics[/]",
|
|
142
|
+
border_style="yellow",
|
|
143
|
+
padding=(0, 1),
|
|
144
|
+
))
|
|
145
|
+
|
|
136
146
|
if changed_files:
|
|
137
147
|
console.print(f"\n[bold]Changed files[/] ({len(changed_files)}):")
|
|
138
148
|
console.print(changed_lines)
|
|
@@ -161,6 +171,37 @@ def _print_pack_summary(result: PackResult) -> None:
|
|
|
161
171
|
console.print()
|
|
162
172
|
|
|
163
173
|
|
|
174
|
+
def _pack_diagnostics(result: PackResult) -> list[str]:
|
|
175
|
+
selected = result.pack.selected_files
|
|
176
|
+
receipts = result.pack.receipts
|
|
177
|
+
diagnostics: list[str] = []
|
|
178
|
+
summary_count = sum(1 for sf in selected if sf.include_mode == "summary")
|
|
179
|
+
filename_matches = sum(1 for sf in selected if "filename keyword match" in sf.reasons)
|
|
180
|
+
symbol_matches = sum(1 for sf in selected if "symbol keyword match" in sf.reasons)
|
|
181
|
+
score_floor_excluded = sum(1 for r in receipts if r.reason == "summary score below floor")
|
|
182
|
+
summary_cap_excluded = sum(1 for r in receipts if r.reason == "summary cap reached")
|
|
183
|
+
|
|
184
|
+
task_words = [
|
|
185
|
+
part for part in result.pack.task.replace("_", " ").replace("-", " ").split()
|
|
186
|
+
if len(part) >= 3
|
|
187
|
+
]
|
|
188
|
+
if len(task_words) <= 3:
|
|
189
|
+
diagnostics.append("Task is very short; add subsystem, file, or symptom words for better precision.")
|
|
190
|
+
if not result.changed_files:
|
|
191
|
+
diagnostics.append("No changed files detected; pack relies mostly on task keywords and cached summaries.")
|
|
192
|
+
if selected and filename_matches / len(selected) >= 0.6:
|
|
193
|
+
diagnostics.append("Most selected files matched by filename; task terms may be broad.")
|
|
194
|
+
if selected and summary_count / len(selected) >= 0.7:
|
|
195
|
+
diagnostics.append("Pack is mostly summaries; use minimal mode or a more specific task for edit work.")
|
|
196
|
+
if symbol_matches > 25:
|
|
197
|
+
diagnostics.append(f"Many symbol matches selected ({symbol_matches}); inspect repeated task terms with explain.")
|
|
198
|
+
if score_floor_excluded:
|
|
199
|
+
diagnostics.append(f"{score_floor_excluded} weak summaries excluded by score floor.")
|
|
200
|
+
if summary_cap_excluded:
|
|
201
|
+
diagnostics.append(f"{summary_cap_excluded} summaries excluded by mode cap.")
|
|
202
|
+
return diagnostics[:5]
|
|
203
|
+
|
|
204
|
+
|
|
164
205
|
def _pack_watch(
|
|
165
206
|
agent: str,
|
|
166
207
|
task: str,
|
|
@@ -45,13 +45,8 @@ def register(app: typer.Typer) -> None:
|
|
|
45
45
|
+ content.count("Included as: **symbols**")
|
|
46
46
|
)
|
|
47
47
|
|
|
48
|
-
full_files = [f for f in scan_result.packable
|
|
49
|
-
if f.estimated_tokens <= cfg.context.max_file_tokens]
|
|
50
|
-
manual_estimate = min(after_ignore, sum(f.estimated_tokens for f in full_files[:20]))
|
|
51
|
-
vs_manual = (1 - packed / manual_estimate) * 100 if manual_estimate > 0 else 0
|
|
52
|
-
|
|
53
48
|
# --- Session info ---
|
|
54
|
-
from agentpack.session.state import load_session
|
|
49
|
+
from agentpack.session.state import load_session
|
|
55
50
|
session = load_session(root)
|
|
56
51
|
|
|
57
52
|
if session:
|
|
@@ -80,9 +75,19 @@ def register(app: typer.Typer) -> None:
|
|
|
80
75
|
except Exception:
|
|
81
76
|
pass
|
|
82
77
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
78
|
+
if meta:
|
|
79
|
+
context_path_obj = root / meta.get("context_path", "")
|
|
80
|
+
if context_path_obj.exists():
|
|
81
|
+
top_files = _parse_top_files(context_path_obj)
|
|
82
|
+
|
|
83
|
+
token_by_path = {f.path: f.estimated_tokens for f in scan_result.packable}
|
|
84
|
+
top_estimate = sum(token_by_path.get(path, 0) for path, _mode, _why in top_files[:20])
|
|
85
|
+
if top_estimate <= 0:
|
|
86
|
+
full_files = [f for f in scan_result.packable
|
|
87
|
+
if f.estimated_tokens <= cfg.context.max_file_tokens]
|
|
88
|
+
top_estimate = sum(f.estimated_tokens for f in full_files[:20])
|
|
89
|
+
top_estimate = min(after_ignore, top_estimate)
|
|
90
|
+
vs_top_files = (1 - packed / top_estimate) * 100 if top_estimate > 0 else 0
|
|
86
91
|
|
|
87
92
|
# --- Token table ---
|
|
88
93
|
token_tbl = Table(title="Last Context", box=box.SIMPLE, show_header=False, padding=(0, 2))
|
|
@@ -92,7 +97,7 @@ def register(app: typer.Typer) -> None:
|
|
|
92
97
|
token_tbl.add_row("after ignore", f"{after_ignore:,}")
|
|
93
98
|
token_tbl.add_row("packed tokens", f"{packed:,}")
|
|
94
99
|
token_tbl.add_row("vs raw repo", f"[green]{saving:.1f}% smaller[/]")
|
|
95
|
-
token_tbl.add_row("vs
|
|
100
|
+
token_tbl.add_row("vs top-20 full files", f"[green]{vs_top_files:.1f}% smaller[/]")
|
|
96
101
|
token_tbl.add_row("files ignored", f"{ignored_count:,}")
|
|
97
102
|
token_tbl.add_row("files full", f"{included_count:,}")
|
|
98
103
|
token_tbl.add_row("files summarized", f"{summarized_count:,}")
|
|
@@ -115,17 +120,34 @@ def register(app: typer.Typer) -> None:
|
|
|
115
120
|
avg_recall = sum(r["selection_recall"] for r in accuracy_rows) / len(accuracy_rows)
|
|
116
121
|
avg_precision = sum(r["selection_precision"] for r in accuracy_rows) / len(accuracy_rows)
|
|
117
122
|
avg_f1 = sum(r["selection_f1"] for r in accuracy_rows) / len(accuracy_rows)
|
|
123
|
+
token_rows = [r for r in accuracy_rows if "selection_token_precision" in r]
|
|
124
|
+
avg_token_precision = (
|
|
125
|
+
sum(r["selection_token_precision"] for r in token_rows) / len(token_rows)
|
|
126
|
+
if token_rows else None
|
|
127
|
+
)
|
|
128
|
+
mode_token_precision: dict[str, float] = {}
|
|
129
|
+
for mode in ("full", "symbols", "summary"):
|
|
130
|
+
key = f"selection_token_precision_{mode}"
|
|
131
|
+
rows = [r for r in accuracy_rows if key in r]
|
|
132
|
+
if rows:
|
|
133
|
+
mode_token_precision[mode] = sum(r[key] for r in rows) / len(rows)
|
|
118
134
|
console.print()
|
|
119
135
|
acc_tbl = Table(title=f"Selection Accuracy (last {len(accuracy_rows)} runs)", box=box.SIMPLE, show_header=False, padding=(0, 2))
|
|
120
136
|
acc_tbl.add_column(style="dim")
|
|
121
137
|
acc_tbl.add_column(justify="right", style="bold")
|
|
122
138
|
acc_tbl.add_row("avg recall", f"{avg_recall:.1%}")
|
|
123
139
|
acc_tbl.add_row("avg precision", f"{avg_precision:.1%}")
|
|
140
|
+
if avg_token_precision is not None:
|
|
141
|
+
acc_tbl.add_row("avg token precision", f"{avg_token_precision:.1%}")
|
|
142
|
+
for mode, value in mode_token_precision.items():
|
|
143
|
+
acc_tbl.add_row(f"{mode} token precision", f"{value:.1%}")
|
|
124
144
|
acc_tbl.add_row("avg F1", f"{avg_f1:.1%}")
|
|
125
145
|
console.print(acc_tbl)
|
|
126
146
|
console.print("[dim]recall = how many changed files were in the previous pack[/]")
|
|
147
|
+
if avg_token_precision is not None:
|
|
148
|
+
console.print("[dim]token precision = share of previous pack tokens spent on files later changed[/]")
|
|
127
149
|
|
|
128
|
-
console.print("[dim]'
|
|
150
|
+
console.print("[dim]'top-20 full files' = raw full contents for top included files, capped at 20[/]")
|
|
129
151
|
|
|
130
152
|
|
|
131
153
|
def _load_accuracy_rows(metrics_path: Path, n: int = 10) -> list[dict]:
|
|
@@ -22,6 +22,10 @@ class ContextConfig(BaseModel):
|
|
|
22
22
|
default_budget: int = 25000
|
|
23
23
|
default_mode: str = "balanced"
|
|
24
24
|
max_file_tokens: int = 4000
|
|
25
|
+
min_summary_score: float = 60
|
|
26
|
+
max_summary_files_minimal: int = 15
|
|
27
|
+
max_summary_files_balanced: int = 40
|
|
28
|
+
max_summary_files_deep: int = 0
|
|
25
29
|
include_tests: bool = True
|
|
26
30
|
include_configs: bool = True
|
|
27
31
|
include_receipts: bool = True
|
|
@@ -91,6 +95,10 @@ exclude_globs = []
|
|
|
91
95
|
default_budget = 25000 # token budget per pack
|
|
92
96
|
default_mode = "balanced" # minimal | balanced | deep
|
|
93
97
|
max_file_tokens = 4000 # files larger than this are summarised, not inlined
|
|
98
|
+
min_summary_score = 60 # unchanged summary files below this score are excluded
|
|
99
|
+
max_summary_files_minimal = 15 # 0 = no cap
|
|
100
|
+
max_summary_files_balanced = 40 # 0 = no cap
|
|
101
|
+
max_summary_files_deep = 0 # deep mode stays uncapped
|
|
94
102
|
include_tests = true
|
|
95
103
|
include_configs = true
|
|
96
104
|
include_receipts = true
|
|
@@ -124,11 +124,14 @@ def select_files(
|
|
|
124
124
|
budget: int,
|
|
125
125
|
max_file_tokens: int,
|
|
126
126
|
keywords: set[str] | None = None,
|
|
127
|
+
min_summary_score: float = 0,
|
|
128
|
+
max_summary_files: int = 0,
|
|
127
129
|
) -> tuple[list[SelectedFile], list[Receipt]]:
|
|
128
130
|
opts = _MODE_WEIGHTS[mode]
|
|
129
131
|
selected: list[SelectedFile] = []
|
|
130
132
|
receipts: list[Receipt] = []
|
|
131
133
|
tokens_used = 0
|
|
134
|
+
summaries_used = 0
|
|
132
135
|
kw = keywords or set()
|
|
133
136
|
|
|
134
137
|
for fi, score, reasons in sorted(scored, key=lambda x: -x[1]):
|
|
@@ -142,6 +145,12 @@ def select_files(
|
|
|
142
145
|
|
|
143
146
|
is_changed = fi.path in changed_paths
|
|
144
147
|
summary_data = summaries.get(fi.path)
|
|
148
|
+
will_be_summary = not is_changed and not (
|
|
149
|
+
opts["extra_full"] and fi.estimated_tokens <= max_file_tokens
|
|
150
|
+
)
|
|
151
|
+
if will_be_summary and score < min_summary_score:
|
|
152
|
+
receipts.append(Receipt(path=fi.path, action="excluded", reason="summary score below floor"))
|
|
153
|
+
continue
|
|
145
154
|
|
|
146
155
|
# Determine inclusion mode
|
|
147
156
|
if is_changed and fi.estimated_tokens <= max_file_tokens:
|
|
@@ -163,11 +172,17 @@ def select_files(
|
|
|
163
172
|
content = None
|
|
164
173
|
tok = min(fi.estimated_tokens, 200)
|
|
165
174
|
|
|
175
|
+
if mode_str == "summary" and max_summary_files > 0 and summaries_used >= max_summary_files:
|
|
176
|
+
receipts.append(Receipt(path=fi.path, action="excluded", reason="summary cap reached"))
|
|
177
|
+
continue
|
|
178
|
+
|
|
166
179
|
if tokens_used + tok > budget:
|
|
167
180
|
receipts.append(Receipt(path=fi.path, action="excluded", reason="budget exhausted"))
|
|
168
181
|
continue
|
|
169
182
|
|
|
170
183
|
tokens_used += tok
|
|
184
|
+
if mode_str == "summary":
|
|
185
|
+
summaries_used += 1
|
|
171
186
|
|
|
172
187
|
# Build symbol list
|
|
173
188
|
syms: list[Symbol] = []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|