agentpack-cli 0.1.20__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/PKG-INFO +19 -9
  2. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/README.md +18 -8
  3. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/pyproject.toml +1 -1
  4. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/__init__.py +1 -1
  5. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/ranking.py +84 -32
  6. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/application/pack_service.py +59 -5
  7. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/doctor.py +34 -0
  8. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/explain.py +2 -0
  9. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/pack.py +41 -0
  10. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/stats.py +33 -11
  11. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/config.py +8 -0
  12. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/context_pack.py +15 -0
  13. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/.gitignore +0 -0
  14. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/LICENSE +0 -0
  15. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/__init__.py +0 -0
  16. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/antigravity.py +0 -0
  17. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/base.py +0 -0
  18. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/claude.py +0 -0
  19. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/codex.py +0 -0
  20. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/cursor.py +0 -0
  21. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/detect.py +0 -0
  22. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/generic.py +0 -0
  23. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/adapters/windsurf.py +0 -0
  24. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/__init__.py +0 -0
  25. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/dependency_graph.py +0 -0
  26. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/go_imports.py +0 -0
  27. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/java_imports.py +0 -0
  28. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/js_ts_imports.py +0 -0
  29. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/python_imports.py +0 -0
  30. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/rust_imports.py +0 -0
  31. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/symbols.py +0 -0
  32. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/analysis/tests.py +0 -0
  33. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/application/__init__.py +0 -0
  34. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/cli.py +0 -0
  35. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/__init__.py +0 -0
  36. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/_shared.py +0 -0
  37. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/benchmark.py +0 -0
  38. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/claude_cmd.py +0 -0
  39. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/diff.py +0 -0
  40. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/hook_cmd.py +0 -0
  41. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/init.py +0 -0
  42. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/install.py +0 -0
  43. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/mcp_cmd.py +0 -0
  44. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/monitor.py +0 -0
  45. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/scan.py +0 -0
  46. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/status.py +0 -0
  47. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/summarize.py +0 -0
  48. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/commands/watch.py +0 -0
  49. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/__init__.py +0 -0
  50. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/bootstrap.py +0 -0
  51. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/cache.py +0 -0
  52. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/diff.py +0 -0
  53. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/git.py +0 -0
  54. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/git_hooks.py +0 -0
  55. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/global_install.py +0 -0
  56. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/ignore.py +0 -0
  57. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/merkle.py +0 -0
  58. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/models.py +0 -0
  59. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/redactor.py +0 -0
  60. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/scanner.py +0 -0
  61. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/snapshot.py +0 -0
  62. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/token_estimator.py +0 -0
  63. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/core/vscode_tasks.py +0 -0
  64. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/data/agentpack.md +0 -0
  65. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/__init__.py +0 -0
  66. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/antigravity.py +0 -0
  67. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/claude.py +0 -0
  68. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/codex.py +0 -0
  69. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/cursor.py +0 -0
  70. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/installers/windsurf.py +0 -0
  71. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/__init__.py +0 -0
  72. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/git_hooks.py +0 -0
  73. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/global_install.py +0 -0
  74. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/integrations/vscode_tasks.py +0 -0
  75. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/mcp_server.py +0 -0
  76. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/__init__.py +0 -0
  77. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/compact.py +0 -0
  78. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/markdown.py +0 -0
  79. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/renderers/receipts.py +0 -0
  80. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/session/__init__.py +0 -0
  81. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/session/state.py +0 -0
  82. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/__init__.py +0 -0
  83. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/base.py +0 -0
  84. {agentpack_cli-0.1.20 → agentpack_cli-0.1.21}/src/agentpack/summaries/offline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentpack-cli
3
- Version: 0.1.20
3
+ Version: 0.1.21
4
4
  Summary: Token-aware context packing for AI coding agents — Claude, Cursor, Windsurf, and Codex
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
44
44
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
45
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
46
46
 
47
- > **Status: alpha (v0.1.20).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
47
+ > **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
48
48
  >
49
49
  > **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
50
50
 
@@ -72,8 +72,9 @@ AgentPack solves this with a one-time offline analysis pass:
72
72
 
73
73
  1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
74
74
  2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
75
- 3. **Packs a tight context document** — changed files get full content, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
76
- 4. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
75
+ 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
76
+ 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
77
+ 5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
77
78
 
78
79
  The result: your agent starts every session with a focused, accurate picture of the relevant code — without you doing anything after opt-in.
79
80
 
@@ -546,6 +547,7 @@ Some checks failed. Run the suggested commands above to fix.
546
547
  The new checks in `doctor`:
547
548
  - **Local vs global hooks**: warns when Claude hooks are only in the per-project `.claude/settings.json` — context won't auto-inject in other repos
548
549
  - **Slash command presence**: checks both local (`.claude/commands/`) and global (`~/.claude/commands/`) installations
550
+ - **Source checkout mismatch**: warns when you're inside an AgentPack source checkout but the `agentpack` executable imports the installed site-packages copy. Use `PYTHONPATH=src python -m agentpack.cli ...` or `pip install -e .` for local development.
549
551
 
550
552
  ---
551
553
 
@@ -632,9 +634,11 @@ Options:
632
634
 
633
635
  | Mode | What's included |
634
636
  |------|----------------|
635
- | `minimal` | Changed files + direct configs only |
636
- | `balanced` | Changed files + deps + reverse deps + tests + summaries |
637
- | `deep` | Everything in balanced + docs + more full-content files |
637
+ | `minimal` | Changed files + direct configs, with a small summary cap |
638
+ | `balanced` | Changed files + deps + reverse deps + tests + capped summaries |
639
+ | `deep` | Everything in balanced + docs + more full-content files, uncapped summaries |
640
+
641
+ `pack` also prints diagnostics when the pack looks noisy: very short task text, no changed files, mostly filename matches, mostly summaries, many symbol matches, weak summaries excluded by the score floor, or summaries excluded by the mode cap.
638
642
 
639
643
  ---
640
644
 
@@ -832,7 +836,9 @@ Show session state, token statistics, and selection accuracy for the last pack.
832
836
  agentpack stats
833
837
  ```
834
838
 
835
- When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files by score and avg recall/precision/F1 over the last 10 runs.
839
+ When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files from the latest pack and avg recall/precision/F1 over the last 10 runs.
840
+
841
+ Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
836
842
 
837
843
  ---
838
844
 
@@ -917,7 +923,7 @@ agentpack monitor --clear
917
923
  | Large unrelated file | −50 |
918
924
  | Ignored/binary | −100 |
919
925
 
920
- Keyword scoring uses concept synonym expansion — "rate limiting" in the task expands to `throttle`, `leaky`, `bucket`, `quota` etc., so `leaky_bucket.py` ranks correctly even if the file name doesn't literally contain "rate".
926
+ Keyword scoring uses weighted concept synonym expansion — literal task terms are strongest, normalized variants are slightly weaker, and broad concept synonyms are weaker again. "rate limiting" still expands to `throttle`, `leaky`, `bucket`, `quota`, but broad expansions no longer dominate literal task terms. Matching is token-based, so `task` does not accidentally match every `tasks.py`.
921
927
 
922
928
  ---
923
929
 
@@ -934,6 +940,10 @@ ignore_file = ".agentignore"
934
940
  default_budget = 25000
935
941
  default_mode = "balanced"
936
942
  max_file_tokens = 4000
943
+ min_summary_score = 60
944
+ max_summary_files_minimal = 15
945
+ max_summary_files_balanced = 40
946
+ max_summary_files_deep = 0
937
947
  include_tests = true
938
948
  include_configs = true
939
949
  include_receipts = true
@@ -5,7 +5,7 @@
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
  [![CI](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml/badge.svg)](https://github.com/vishal2612200/agentpack/actions/workflows/ci.yml)
7
7
 
8
- > **Status: alpha (v0.1.20).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
8
+ > **Status: alpha (v0.1.21).** Works, tested, used in real sessions. Python and JavaScript/TypeScript are the best-supported languages. Not yet validated across a wide range of repos. API may change before 1.0.
9
9
  >
10
10
  > **Platform note:** macOS and Linux are fully supported. Windows support is not yet implemented (git hooks use POSIX shell; the Claude Code session hooks use `python3`/`rm -f`). Contributions welcome.
11
11
 
@@ -33,8 +33,9 @@ AgentPack solves this with a one-time offline analysis pass:
33
33
 
34
34
  1. **Scans your repo once** — builds a summary cache of every file (signatures, imports, responsibilities). No API calls. Takes a few seconds.
35
35
  2. **On each task** — uses git diff + import graph traversal + keyword scoring to rank every file by relevance to what you're working on.
36
- 3. **Packs a tight context document** — changed files get full content, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
37
- 4. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
36
+ 3. **Packs a tight context document** — changed files get full content, large changed files get relevant symbol bodies, dependencies get summaries, everything else gets dropped. Typically 8k–20k tokens for a 200-file repo.
37
+ 4. **Explains pack quality** — noisy-pack diagnostics, score receipts, and token-precision metrics show when the pack is broad and where token noise lives.
38
+ 5. **Stays current** — auto-repacks silently on commit, so next session starts fresh.
38
39
 
39
40
  The result: your agent starts every session with a focused, accurate picture of the relevant code — without you doing anything after opt-in.
40
41
 
@@ -507,6 +508,7 @@ Some checks failed. Run the suggested commands above to fix.
507
508
  The new checks in `doctor`:
508
509
  - **Local vs global hooks**: warns when Claude hooks are only in the per-project `.claude/settings.json` — context won't auto-inject in other repos
509
510
  - **Slash command presence**: checks both local (`.claude/commands/`) and global (`~/.claude/commands/`) installations
511
+ - **Source checkout mismatch**: warns when you're inside an AgentPack source checkout but the `agentpack` executable imports the installed site-packages copy. Use `PYTHONPATH=src python -m agentpack.cli ...` or `pip install -e .` for local development.
510
512
 
511
513
  ---
512
514
 
@@ -593,9 +595,11 @@ Options:
593
595
 
594
596
  | Mode | What's included |
595
597
  |------|----------------|
596
- | `minimal` | Changed files + direct configs only |
597
- | `balanced` | Changed files + deps + reverse deps + tests + summaries |
598
- | `deep` | Everything in balanced + docs + more full-content files |
598
+ | `minimal` | Changed files + direct configs, with a small summary cap |
599
+ | `balanced` | Changed files + deps + reverse deps + tests + capped summaries |
600
+ | `deep` | Everything in balanced + docs + more full-content files, uncapped summaries |
601
+
602
+ `pack` also prints diagnostics when the pack looks noisy: very short task text, no changed files, mostly filename matches, mostly summaries, many symbol matches, weak summaries excluded by the score floor, or summaries excluded by the mode cap.
599
603
 
600
604
  ---
601
605
 
@@ -793,7 +797,9 @@ Show session state, token statistics, and selection accuracy for the last pack.
793
797
  agentpack stats
794
798
  ```
795
799
 
796
- When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files by score and avg recall/precision/F1 over the last 10 runs.
800
+ When a session is active, shows session panel (agent, mode, started, refresh count) above token stats. Also lists top included files from the latest pack and avg recall/precision/F1 over the last 10 runs.
801
+
802
+ Newer metrics include token-weighted precision. File precision answers "how many selected files were later changed"; token precision answers "how many selected tokens were spent on files later changed." `stats` also breaks token precision down by inclusion mode (`full`, `symbols`, `summary`) so summary noise is visible.
797
803
 
798
804
  ---
799
805
 
@@ -878,7 +884,7 @@ agentpack monitor --clear
878
884
  | Large unrelated file | −50 |
879
885
  | Ignored/binary | −100 |
880
886
 
881
- Keyword scoring uses concept synonym expansion — "rate limiting" in the task expands to `throttle`, `leaky`, `bucket`, `quota` etc., so `leaky_bucket.py` ranks correctly even if the file name doesn't literally contain "rate".
887
+ Keyword scoring uses weighted concept synonym expansion — literal task terms are strongest, normalized variants are slightly weaker, and broad concept synonyms are weaker again. "rate limiting" still expands to `throttle`, `leaky`, `bucket`, `quota`, but broad expansions no longer dominate literal task terms. Matching is token-based, so `task` does not accidentally match every `tasks.py`.
882
888
 
883
889
  ---
884
890
 
@@ -895,6 +901,10 @@ ignore_file = ".agentignore"
895
901
  default_budget = 25000
896
902
  default_mode = "balanced"
897
903
  max_file_tokens = 4000
904
+ min_summary_score = 60
905
+ max_summary_files_minimal = 15
906
+ max_summary_files_balanced = 40
907
+ max_summary_files_deep = 0
898
908
  include_tests = true
899
909
  include_configs = true
900
910
  include_receipts = true
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "agentpack-cli"
3
- version = "0.1.20"
3
+ version = "0.1.21"
4
4
  description = "Token-aware context packing for AI coding agents — Claude, Cursor, Windsurf, and Codex"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,3 +1,3 @@
1
1
  """AgentPack — token-aware context packing for AI coding agents."""
2
2
 
3
- __version__ = "0.1.20"
3
+ __version__ = "0.1.21"
@@ -181,29 +181,38 @@ CONFIG_NAMES = {
181
181
  _DEFAULT_WEIGHTS = ScoringWeights()
182
182
 
183
183
 
184
- def extract_keywords(task: str) -> set[str]:
184
+ def _add_keyword_weight(weights: dict[str, float], keyword: str, weight: float) -> None:
185
+ weights[keyword] = max(weights.get(keyword, 0.0), weight)
186
+
187
+
188
+ def extract_keyword_weights(task: str) -> dict[str, float]:
185
189
  words = re.split(r"[^a-zA-Z0-9]+", task.lower())
186
- keywords: set[str] = set()
190
+ keyword_weights: dict[str, float] = {}
187
191
  for word in words:
188
192
  if len(word) < 3:
189
193
  continue
190
194
  if word in _STOPWORDS:
191
195
  continue
192
- keywords.add(word)
196
+ _add_keyword_weight(keyword_weights, word, 1.0)
193
197
  if word in _VARIANTS:
194
- keywords.add(_VARIANTS[word])
198
+ _add_keyword_weight(keyword_weights, _VARIANTS[word], 0.75)
195
199
 
196
- # expand via concept map (one level only no recursion to avoid explosion)
197
- expanded: set[str] = set()
198
- for kw in keywords:
200
+ # Expand via concept map one level only. Expanded concepts are weaker than
201
+ # literal task words so broad terms like "task" do not dominate ranking.
202
+ expanded: dict[str, float] = {}
203
+ for kw in keyword_weights:
199
204
  if kw in _CONCEPT_MAP:
200
205
  for synonym in _CONCEPT_MAP[kw]:
201
- expanded.add(synonym)
202
- # also apply _VARIANTS to expanded terms
206
+ _add_keyword_weight(expanded, synonym, 0.35)
203
207
  if synonym in _VARIANTS:
204
- expanded.add(_VARIANTS[synonym])
205
- keywords.update(expanded)
206
- return keywords
208
+ _add_keyword_weight(expanded, _VARIANTS[synonym], 0.35)
209
+ for kw, weight in expanded.items():
210
+ _add_keyword_weight(keyword_weights, kw, weight)
211
+ return keyword_weights
212
+
213
+
214
+ def extract_keywords(task: str) -> set[str]:
215
+ return set(extract_keyword_weights(task))
207
216
 
208
217
 
209
218
  def enrich_keywords_from_files(
@@ -255,21 +264,62 @@ def enrich_keywords_from_files(
255
264
  return keywords | set(top)
256
265
 
257
266
 
258
- def _path_matches_keywords(path: str, keywords: set[str]) -> bool:
259
- path_lower = path.lower()
260
- return any(kw in path_lower for kw in keywords)
267
+ def enrich_keyword_weights_from_files(
268
+ keyword_weights: dict[str, float],
269
+ changed_paths: set[str],
270
+ files: list[FileInfo],
271
+ max_new_keywords: int = 20,
272
+ ) -> dict[str, float]:
273
+ enriched = dict(keyword_weights)
274
+ enriched_keywords = enrich_keywords_from_files(set(keyword_weights), changed_paths, files, max_new_keywords)
275
+ for keyword in enriched_keywords - set(keyword_weights):
276
+ enriched[keyword] = 0.5
277
+ return enriched
278
+
279
+
280
+ def _tokens_for_match(text: str) -> set[str]:
281
+ """Return identifier-ish tokens for exact keyword matching."""
282
+ spaced = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", text)
283
+ raw_tokens = re.split(r"[^a-zA-Z0-9]+", spaced.lower())
284
+ return {tok for tok in raw_tokens if tok}
285
+
286
+
287
+ def _keyword_token_weights(keywords: set[str] | dict[str, float]) -> dict[str, float]:
288
+ if isinstance(keywords, dict):
289
+ items = keywords.items()
290
+ else:
291
+ items = ((keyword, 1.0) for keyword in keywords)
292
+
293
+ token_weights: dict[str, float] = {}
294
+ for keyword, weight in items:
295
+ for token in _tokens_for_match(keyword):
296
+ if len(token) >= 3:
297
+ token_weights[token] = max(token_weights.get(token, 0.0), weight)
298
+ return token_weights
299
+
261
300
 
301
+ def _match_weight(text: str, keywords: set[str] | dict[str, float]) -> float:
302
+ token_weights = _keyword_token_weights(keywords)
303
+ matches = _tokens_for_match(text) & set(token_weights)
304
+ return max((token_weights[token] for token in matches), default=0.0)
262
305
 
263
- def _content_matches_keywords(text: str, keywords: set[str]) -> int:
264
- text_lower = text.lower()
265
- return sum(1 for kw in keywords if kw in text_lower)
266
306
 
307
+ def _path_matches_keywords(path: str, keywords: set[str] | dict[str, float]) -> float:
308
+ return _match_weight(path, keywords)
267
309
 
268
- def _symbol_matches_keywords(symbols: list[str], keywords: set[str]) -> bool:
310
+
311
+ def _content_matches_keywords(text: str, keywords: set[str] | dict[str, float]) -> tuple[int, float]:
312
+ token_weights = _keyword_token_weights(keywords)
313
+ text_tokens = _tokens_for_match(text)
314
+ matches = text_tokens & set(token_weights)
315
+ return len(matches), sum(token_weights[token] for token in matches)
316
+
317
+
318
+ def _symbol_matches_keywords(symbols: list[str], keywords: set[str] | dict[str, float]) -> float:
319
+ best_weight = 0.0
269
320
  for sym in symbols:
270
- if any(kw in sym.lower() for kw in keywords):
271
- return True
272
- return False
321
+ best_weight = max(best_weight, _match_weight(sym, keywords))
322
+ return best_weight
273
323
 
274
324
 
275
325
  def score_files(
@@ -278,7 +328,7 @@ def score_files(
278
328
  staged_paths: set[str],
279
329
  recently_modified: list[str],
280
330
  dep_graph: "DependencyGraph | dict",
281
- keywords: set[str],
331
+ keywords: set[str] | dict[str, float],
282
332
  include_tests: bool = True,
283
333
  include_configs: bool = True,
284
334
  weights: ScoringWeights | None = None,
@@ -315,8 +365,9 @@ def score_files(
315
365
  score += w.staged
316
366
  reasons.append("staged")
317
367
 
318
- if _path_matches_keywords(fi.path, keywords):
319
- score += w.filename_keyword
368
+ filename_weight = _path_matches_keywords(fi.path, keywords)
369
+ if filename_weight > 0:
370
+ score += w.filename_keyword * filename_weight
320
371
  reasons.append("filename keyword match")
321
372
 
322
373
  node = dep_graph.get(fi.path)
@@ -327,27 +378,28 @@ def score_files(
327
378
  (s["name"] if isinstance(s, dict) else s.name)
328
379
  for s in raw_syms
329
380
  ]
330
- if _symbol_matches_keywords(sym_names, keywords):
331
- score += w.symbol_keyword
381
+ symbol_weight = _symbol_matches_keywords(sym_names, keywords)
382
+ if symbol_weight > 0:
383
+ score += w.symbol_keyword * symbol_weight
332
384
  reasons.append("symbol keyword match")
333
385
 
334
386
  if fi.content is not None:
335
- hits = _content_matches_keywords(fi.content, keywords)
387
+ hits, hit_weight = _content_matches_keywords(fi.content, keywords)
336
388
  if hits > 0:
337
- score += min(w.content_keyword_max, hits * w.content_keyword_per_hit)
389
+ score += min(w.content_keyword_max, hit_weight * w.content_keyword_per_hit)
338
390
  reasons.append(f"content keyword match ({hits})")
339
391
  elif fi.abs_path.exists():
340
392
  try:
341
393
  text = fi.abs_path.read_text(errors="replace")
342
- hits = _content_matches_keywords(text, keywords)
394
+ hits, hit_weight = _content_matches_keywords(text, keywords)
343
395
  if hits > 0:
344
- score += min(w.content_keyword_max, hits * w.content_keyword_per_hit)
396
+ score += min(w.content_keyword_max, hit_weight * w.content_keyword_per_hit)
345
397
  reasons.append(f"content keyword match ({hits})")
346
398
  except OSError:
347
399
  pass
348
400
 
349
401
  for dep_path in node.imports:
350
- if dep_path in changed_paths or _path_matches_keywords(dep_path, keywords):
402
+ if dep_path in changed_paths or _path_matches_keywords(dep_path, keywords) > 0:
351
403
  score += w.direct_dep
352
404
  reasons.append("direct dependency of changed file")
353
405
  break
@@ -16,7 +16,12 @@ from agentpack.core import git
16
16
  from agentpack.core.context_pack import select_files, save_pack_metadata
17
17
  from agentpack.core.models import ContextPack, DependencyGraph, FileInfo, ScanResult, SelectedFile, Receipt
18
18
  from agentpack.core.token_estimator import estimate_tokens
19
- from agentpack.analysis.ranking import score_files, extract_keywords, enrich_keywords_from_files, boost_paired_tests
19
+ from agentpack.analysis.ranking import (
20
+ score_files,
21
+ extract_keyword_weights,
22
+ enrich_keyword_weights_from_files,
23
+ boost_paired_tests,
24
+ )
20
25
  from agentpack.analysis.tests import find_related_tests
21
26
  from agentpack.analysis import dependency_graph as dep_graph_mod
22
27
  from agentpack.summaries.base import build_all_summaries
@@ -131,8 +136,9 @@ class FileRanker:
131
136
  root: Path | None = None,
132
137
  ) -> RankResult:
133
138
  from agentpack.core import git as _git
134
- keywords = extract_keywords(task)
135
- keywords = enrich_keywords_from_files(keywords, changes.all_changed, packable)
139
+ keyword_weights = extract_keyword_weights(task)
140
+ keyword_weights = enrich_keyword_weights_from_files(keyword_weights, changes.all_changed, packable)
141
+ keywords = set(keyword_weights)
136
142
  all_paths = {f.path for f in packable}
137
143
 
138
144
  for fi in packable:
@@ -149,7 +155,7 @@ class FileRanker:
149
155
  staged_paths=changes.git_staged,
150
156
  recently_modified=changes.recently_modified,
151
157
  dep_graph=dep_graph,
152
- keywords=keywords,
158
+ keywords=keyword_weights,
153
159
  include_tests=cfg.context.include_tests,
154
160
  include_configs=cfg.context.include_configs,
155
161
  weights=cfg.scoring,
@@ -209,6 +215,8 @@ class PackPlanner:
209
215
  budget=effective_budget,
210
216
  max_file_tokens=cfg.context.max_file_tokens,
211
217
  keywords=rank_result.keywords,
218
+ min_summary_score=cfg.context.min_summary_score,
219
+ max_summary_files=_summary_cap_for_mode(cfg, request.mode),
212
220
  )
213
221
  phase_times["select"] = time.perf_counter() - t0
214
222
 
@@ -317,6 +325,8 @@ class PackService:
317
325
  selected_count=len(plan.selected),
318
326
  changed_count=len(plan.all_changed),
319
327
  selected_paths=[sf.path for sf in plan.selected],
328
+ selected_tokens={sf.path: _sf_tokens(sf) for sf in plan.selected},
329
+ selected_modes={sf.path: sf.include_mode for sf in plan.selected},
320
330
  selected_hints=[{"path": sf.path, "why": sf.reasons[0] if sf.reasons else ""} for sf in plan.selected[:8]],
321
331
  current_changed=plan.all_changed,
322
332
  excluded_count=len(excluded_receipts),
@@ -347,6 +357,16 @@ def _sf_tokens(sf: SelectedFile) -> int:
347
357
  return estimate_tokens("\n".join(parts)) if parts else 50
348
358
 
349
359
 
360
+ def _summary_cap_for_mode(cfg: Any, mode: str) -> int:
361
+ if mode == "minimal":
362
+ return cfg.context.max_summary_files_minimal
363
+ if mode == "balanced":
364
+ return cfg.context.max_summary_files_balanced
365
+ if mode == "deep":
366
+ return cfg.context.max_summary_files_deep
367
+ return 0
368
+
369
+
350
370
  def _load_last_record(metrics_path: Path) -> dict[str, Any] | None:
351
371
  """Return the most recent metrics record that has selected_paths."""
352
372
  if not metrics_path.exists():
@@ -390,11 +410,41 @@ def _compute_selection_accuracy(
390
410
  recall = len(hits) / len(actual_changed)
391
411
  precision = len(hits) / len(prev_selected)
392
412
  f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0
393
- return {
413
+ result = {
394
414
  "selection_recall": round(recall, 3),
395
415
  "selection_precision": round(precision, 3),
396
416
  "selection_f1": round(f1, 3),
397
417
  }
418
+ token_map = prev.get("selected_tokens") or {}
419
+ if isinstance(token_map, dict):
420
+ total_tokens = sum(v for v in token_map.values() if isinstance(v, int | float))
421
+ hit_tokens = sum(
422
+ token_map.get(path, 0)
423
+ for path in hits
424
+ if isinstance(token_map.get(path, 0), int | float)
425
+ )
426
+ if total_tokens > 0:
427
+ token_precision = hit_tokens / total_tokens
428
+ result["selection_token_precision"] = round(token_precision, 3)
429
+ result["selection_noise_pct"] = round((1 - token_precision) * 100, 1)
430
+ mode_map = prev.get("selected_modes") or {}
431
+ if isinstance(mode_map, dict):
432
+ for mode in ("full", "symbols", "summary"):
433
+ mode_paths = {path for path, value in mode_map.items() if value == mode}
434
+ mode_total = sum(
435
+ token_map.get(path, 0)
436
+ for path in mode_paths
437
+ if isinstance(token_map.get(path, 0), int | float)
438
+ )
439
+ if mode_total <= 0:
440
+ continue
441
+ mode_hit_tokens = sum(
442
+ token_map.get(path, 0)
443
+ for path in mode_paths & hits
444
+ if isinstance(token_map.get(path, 0), int | float)
445
+ )
446
+ result[f"selection_token_precision_{mode}"] = round(mode_hit_tokens / mode_total, 3)
447
+ return result
398
448
 
399
449
 
400
450
  def _record_metrics(
@@ -409,6 +459,8 @@ def _record_metrics(
409
459
  selected_count: int,
410
460
  changed_count: int,
411
461
  selected_paths: list[str],
462
+ selected_tokens: dict[str, int],
463
+ selected_modes: dict[str, str],
412
464
  current_changed: set[str],
413
465
  selected_hints: list[dict] | None = None,
414
466
  excluded_count: int = 0,
@@ -428,6 +480,8 @@ def _record_metrics(
428
480
  "excluded_files": excluded_count,
429
481
  "excluded_paths": excluded_paths or [],
430
482
  "selected_paths": selected_paths,
483
+ "selected_tokens": selected_tokens,
484
+ "selected_modes": selected_modes,
431
485
  "selected_hints": selected_hints or [],
432
486
  "phases": {k: round(v, 3) for k, v in phase_times.items()},
433
487
  "total_s": round(sum(phase_times.values()), 3),
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import os
4
4
  import shutil
5
5
  import subprocess
6
+ import sys
6
7
  from pathlib import Path
7
8
 
8
9
  import typer
@@ -37,6 +38,15 @@ def register(app: typer.Typer) -> None:
37
38
  console.print(" [red]✗[/] agentpack not on PATH — run: pipx install agentpack-cli")
38
39
  ok = False
39
40
 
41
+ try:
42
+ root = _root()
43
+ warning = _source_checkout_warning(root, Path(__file__), sys.executable, binary)
44
+ if warning:
45
+ console.print(f" [yellow]![/] {warning}")
46
+ ok = False
47
+ except Exception:
48
+ pass
49
+
40
50
  # --- Git template hooks ---
41
51
  console.print("\n[bold]Git template hooks (~/.git-templates/hooks/)[/]")
42
52
  hooks_dir = _GIT_TEMPLATE_DIR / "hooks"
@@ -234,6 +244,30 @@ def _check_agent_file(root: Path, filename: str, agent: str) -> None:
234
244
  console.print(f" [dim]-[/] {filename} not present (optional)")
235
245
 
236
246
 
247
+ def _source_checkout_warning(
248
+ root: Path,
249
+ package_file: Path,
250
+ executable: str,
251
+ binary: str | None,
252
+ ) -> str | None:
253
+ source_pkg = root / "src" / "agentpack"
254
+ if not source_pkg.exists():
255
+ return None
256
+ try:
257
+ package_path = package_file.resolve()
258
+ source_path = source_pkg.resolve()
259
+ except OSError:
260
+ return None
261
+ if package_path.is_relative_to(source_path):
262
+ return None
263
+ binary_text = f" via {binary}" if binary else ""
264
+ return (
265
+ "source checkout detected, but CLI imports installed package "
266
+ f"at {package_path}{binary_text}. Use `PYTHONPATH=src python -m agentpack.cli ...` "
267
+ "or install editable with `pip install -e .`."
268
+ )
269
+
270
+
237
271
  def _print_summary(ok: bool) -> None:
238
272
  console.print("")
239
273
  if ok:
@@ -167,6 +167,8 @@ def register(app: typer.Typer) -> None:
167
167
  budget=deep_budget,
168
168
  max_file_tokens=cfg.context.max_file_tokens,
169
169
  keywords=plan.keywords,
170
+ min_summary_score=cfg.context.min_summary_score,
171
+ max_summary_files=0,
170
172
  )
171
173
  deep_selected_paths = {
172
174
  r.path for r in deep_receipts if r.action in ("included", "summarized")
@@ -133,6 +133,16 @@ def _print_pack_summary(result: PackResult) -> None:
133
133
  console.print()
134
134
  console.print(Columns([stats, files_tbl], equal=False, expand=False))
135
135
 
136
+ diagnostics = _pack_diagnostics(result)
137
+ if diagnostics:
138
+ diag_text = "\n".join(f" [yellow]![/] {line}" for line in diagnostics)
139
+ console.print(Panel(
140
+ diag_text,
141
+ title="[bold yellow]Pack diagnostics[/]",
142
+ border_style="yellow",
143
+ padding=(0, 1),
144
+ ))
145
+
136
146
  if changed_files:
137
147
  console.print(f"\n[bold]Changed files[/] ({len(changed_files)}):")
138
148
  console.print(changed_lines)
@@ -161,6 +171,37 @@ def _print_pack_summary(result: PackResult) -> None:
161
171
  console.print()
162
172
 
163
173
 
174
+ def _pack_diagnostics(result: PackResult) -> list[str]:
175
+ selected = result.pack.selected_files
176
+ receipts = result.pack.receipts
177
+ diagnostics: list[str] = []
178
+ summary_count = sum(1 for sf in selected if sf.include_mode == "summary")
179
+ filename_matches = sum(1 for sf in selected if "filename keyword match" in sf.reasons)
180
+ symbol_matches = sum(1 for sf in selected if "symbol keyword match" in sf.reasons)
181
+ score_floor_excluded = sum(1 for r in receipts if r.reason == "summary score below floor")
182
+ summary_cap_excluded = sum(1 for r in receipts if r.reason == "summary cap reached")
183
+
184
+ task_words = [
185
+ part for part in result.pack.task.replace("_", " ").replace("-", " ").split()
186
+ if len(part) >= 3
187
+ ]
188
+ if len(task_words) <= 3:
189
+ diagnostics.append("Task is very short; add subsystem, file, or symptom words for better precision.")
190
+ if not result.changed_files:
191
+ diagnostics.append("No changed files detected; pack relies mostly on task keywords and cached summaries.")
192
+ if selected and filename_matches / len(selected) >= 0.6:
193
+ diagnostics.append("Most selected files matched by filename; task terms may be broad.")
194
+ if selected and summary_count / len(selected) >= 0.7:
195
+ diagnostics.append("Pack is mostly summaries; use minimal mode or a more specific task for edit work.")
196
+ if symbol_matches > 25:
197
+ diagnostics.append(f"Many symbol matches selected ({symbol_matches}); inspect repeated task terms with explain.")
198
+ if score_floor_excluded:
199
+ diagnostics.append(f"{score_floor_excluded} weak summaries excluded by score floor.")
200
+ if summary_cap_excluded:
201
+ diagnostics.append(f"{summary_cap_excluded} summaries excluded by mode cap.")
202
+ return diagnostics[:5]
203
+
204
+
164
205
  def _pack_watch(
165
206
  agent: str,
166
207
  task: str,
@@ -45,13 +45,8 @@ def register(app: typer.Typer) -> None:
45
45
  + content.count("Included as: **symbols**")
46
46
  )
47
47
 
48
- full_files = [f for f in scan_result.packable
49
- if f.estimated_tokens <= cfg.context.max_file_tokens]
50
- manual_estimate = min(after_ignore, sum(f.estimated_tokens for f in full_files[:20]))
51
- vs_manual = (1 - packed / manual_estimate) * 100 if manual_estimate > 0 else 0
52
-
53
48
  # --- Session info ---
54
- from agentpack.session.state import load_session, CONTEXT_FILE
49
+ from agentpack.session.state import load_session
55
50
  session = load_session(root)
56
51
 
57
52
  if session:
@@ -80,9 +75,19 @@ def register(app: typer.Typer) -> None:
80
75
  except Exception:
81
76
  pass
82
77
 
83
- context_path_obj = root / CONTEXT_FILE
84
- if context_path_obj.exists():
85
- top_files = _parse_top_files(context_path_obj)
78
+ if meta:
79
+ context_path_obj = root / meta.get("context_path", "")
80
+ if context_path_obj.exists():
81
+ top_files = _parse_top_files(context_path_obj)
82
+
83
+ token_by_path = {f.path: f.estimated_tokens for f in scan_result.packable}
84
+ top_estimate = sum(token_by_path.get(path, 0) for path, _mode, _why in top_files[:20])
85
+ if top_estimate <= 0:
86
+ full_files = [f for f in scan_result.packable
87
+ if f.estimated_tokens <= cfg.context.max_file_tokens]
88
+ top_estimate = sum(f.estimated_tokens for f in full_files[:20])
89
+ top_estimate = min(after_ignore, top_estimate)
90
+ vs_top_files = (1 - packed / top_estimate) * 100 if top_estimate > 0 else 0
86
91
 
87
92
  # --- Token table ---
88
93
  token_tbl = Table(title="Last Context", box=box.SIMPLE, show_header=False, padding=(0, 2))
@@ -92,7 +97,7 @@ def register(app: typer.Typer) -> None:
92
97
  token_tbl.add_row("after ignore", f"{after_ignore:,}")
93
98
  token_tbl.add_row("packed tokens", f"{packed:,}")
94
99
  token_tbl.add_row("vs raw repo", f"[green]{saving:.1f}% smaller[/]")
95
- token_tbl.add_row("vs manual (~20 files)", f"[green]{vs_manual:.1f}% smaller[/]")
100
+ token_tbl.add_row("vs top-20 full files", f"[green]{vs_top_files:.1f}% smaller[/]")
96
101
  token_tbl.add_row("files ignored", f"{ignored_count:,}")
97
102
  token_tbl.add_row("files full", f"{included_count:,}")
98
103
  token_tbl.add_row("files summarized", f"{summarized_count:,}")
@@ -115,17 +120,34 @@ def register(app: typer.Typer) -> None:
115
120
  avg_recall = sum(r["selection_recall"] for r in accuracy_rows) / len(accuracy_rows)
116
121
  avg_precision = sum(r["selection_precision"] for r in accuracy_rows) / len(accuracy_rows)
117
122
  avg_f1 = sum(r["selection_f1"] for r in accuracy_rows) / len(accuracy_rows)
123
+ token_rows = [r for r in accuracy_rows if "selection_token_precision" in r]
124
+ avg_token_precision = (
125
+ sum(r["selection_token_precision"] for r in token_rows) / len(token_rows)
126
+ if token_rows else None
127
+ )
128
+ mode_token_precision: dict[str, float] = {}
129
+ for mode in ("full", "symbols", "summary"):
130
+ key = f"selection_token_precision_{mode}"
131
+ rows = [r for r in accuracy_rows if key in r]
132
+ if rows:
133
+ mode_token_precision[mode] = sum(r[key] for r in rows) / len(rows)
118
134
  console.print()
119
135
  acc_tbl = Table(title=f"Selection Accuracy (last {len(accuracy_rows)} runs)", box=box.SIMPLE, show_header=False, padding=(0, 2))
120
136
  acc_tbl.add_column(style="dim")
121
137
  acc_tbl.add_column(justify="right", style="bold")
122
138
  acc_tbl.add_row("avg recall", f"{avg_recall:.1%}")
123
139
  acc_tbl.add_row("avg precision", f"{avg_precision:.1%}")
140
+ if avg_token_precision is not None:
141
+ acc_tbl.add_row("avg token precision", f"{avg_token_precision:.1%}")
142
+ for mode, value in mode_token_precision.items():
143
+ acc_tbl.add_row(f"{mode} token precision", f"{value:.1%}")
124
144
  acc_tbl.add_row("avg F1", f"{avg_f1:.1%}")
125
145
  console.print(acc_tbl)
126
146
  console.print("[dim]recall = how many changed files were in the previous pack[/]")
147
+ if avg_token_precision is not None:
148
+ console.print("[dim]token precision = share of previous pack tokens spent on files later changed[/]")
127
149
 
128
- console.print("[dim]'manual' = hand-picking 20 most relevant full files[/]")
150
+ console.print("[dim]'top-20 full files' = raw full contents for top included files, capped at 20[/]")
129
151
 
130
152
 
131
153
  def _load_accuracy_rows(metrics_path: Path, n: int = 10) -> list[dict]:
@@ -22,6 +22,10 @@ class ContextConfig(BaseModel):
22
22
  default_budget: int = 25000
23
23
  default_mode: str = "balanced"
24
24
  max_file_tokens: int = 4000
25
+ min_summary_score: float = 60
26
+ max_summary_files_minimal: int = 15
27
+ max_summary_files_balanced: int = 40
28
+ max_summary_files_deep: int = 0
25
29
  include_tests: bool = True
26
30
  include_configs: bool = True
27
31
  include_receipts: bool = True
@@ -91,6 +95,10 @@ exclude_globs = []
91
95
  default_budget = 25000 # token budget per pack
92
96
  default_mode = "balanced" # minimal | balanced | deep
93
97
  max_file_tokens = 4000 # files larger than this are summarised, not inlined
98
+ min_summary_score = 60 # unchanged summary files below this score are excluded
99
+ max_summary_files_minimal = 15 # 0 = no cap
100
+ max_summary_files_balanced = 40 # 0 = no cap
101
+ max_summary_files_deep = 0 # deep mode stays uncapped
94
102
  include_tests = true
95
103
  include_configs = true
96
104
  include_receipts = true
@@ -124,11 +124,14 @@ def select_files(
124
124
  budget: int,
125
125
  max_file_tokens: int,
126
126
  keywords: set[str] | None = None,
127
+ min_summary_score: float = 0,
128
+ max_summary_files: int = 0,
127
129
  ) -> tuple[list[SelectedFile], list[Receipt]]:
128
130
  opts = _MODE_WEIGHTS[mode]
129
131
  selected: list[SelectedFile] = []
130
132
  receipts: list[Receipt] = []
131
133
  tokens_used = 0
134
+ summaries_used = 0
132
135
  kw = keywords or set()
133
136
 
134
137
  for fi, score, reasons in sorted(scored, key=lambda x: -x[1]):
@@ -142,6 +145,12 @@ def select_files(
142
145
 
143
146
  is_changed = fi.path in changed_paths
144
147
  summary_data = summaries.get(fi.path)
148
+ will_be_summary = not is_changed and not (
149
+ opts["extra_full"] and fi.estimated_tokens <= max_file_tokens
150
+ )
151
+ if will_be_summary and score < min_summary_score:
152
+ receipts.append(Receipt(path=fi.path, action="excluded", reason="summary score below floor"))
153
+ continue
145
154
 
146
155
  # Determine inclusion mode
147
156
  if is_changed and fi.estimated_tokens <= max_file_tokens:
@@ -163,11 +172,17 @@ def select_files(
163
172
  content = None
164
173
  tok = min(fi.estimated_tokens, 200)
165
174
 
175
+ if mode_str == "summary" and max_summary_files > 0 and summaries_used >= max_summary_files:
176
+ receipts.append(Receipt(path=fi.path, action="excluded", reason="summary cap reached"))
177
+ continue
178
+
166
179
  if tokens_used + tok > budget:
167
180
  receipts.append(Receipt(path=fi.path, action="excluded", reason="budget exhausted"))
168
181
  continue
169
182
 
170
183
  tokens_used += tok
184
+ if mode_str == "summary":
185
+ summaries_used += 1
171
186
 
172
187
  # Build symbol list
173
188
  syms: list[Symbol] = []
File without changes