repotrim 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repotrim-0.1.0/LICENSE +21 -0
- repotrim-0.1.0/PKG-INFO +155 -0
- repotrim-0.1.0/README.md +141 -0
- repotrim-0.1.0/__init__.py +3 -0
- repotrim-0.1.0/agents/__init__.py +6 -0
- repotrim-0.1.0/agents/report.py +64 -0
- repotrim-0.1.0/agents/reviewer.py +227 -0
- repotrim-0.1.0/agents/state.py +30 -0
- repotrim-0.1.0/agents/workflow.py +182 -0
- repotrim-0.1.0/config.py +233 -0
- repotrim-0.1.0/ignore_rules.py +57 -0
- repotrim-0.1.0/log_compressor.py +85 -0
- repotrim-0.1.0/main.py +241 -0
- repotrim-0.1.0/models.py +228 -0
- repotrim-0.1.0/packet_generator.py +204 -0
- repotrim-0.1.0/pyproject.toml +27 -0
- repotrim-0.1.0/ranker.py +8 -0
- repotrim-0.1.0/relevance_ranker.py +428 -0
- repotrim-0.1.0/repo_config.py +40 -0
- repotrim-0.1.0/reporter.py +95 -0
- repotrim-0.1.0/repotrim.egg-info/PKG-INFO +155 -0
- repotrim-0.1.0/repotrim.egg-info/SOURCES.txt +55 -0
- repotrim-0.1.0/repotrim.egg-info/dependency_links.txt +1 -0
- repotrim-0.1.0/repotrim.egg-info/entry_points.txt +2 -0
- repotrim-0.1.0/repotrim.egg-info/requires.txt +5 -0
- repotrim-0.1.0/repotrim.egg-info/top_level.txt +1 -0
- repotrim-0.1.0/scanner.py +199 -0
- repotrim-0.1.0/semantic.py +352 -0
- repotrim-0.1.0/setup.cfg +4 -0
- repotrim-0.1.0/task_analyzer.py +101 -0
- repotrim-0.1.0/tests/test_agent_plan.py +191 -0
- repotrim-0.1.0/tests/test_task_flow.py +472 -0
- repotrim-0.1.0/token_savings.py +72 -0
- repotrim-0.1.0/tokenizer.py +12 -0
- repotrim-0.1.0/utils.py +35 -0
repotrim-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sudhakhar Katta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
repotrim-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: repotrim
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Create minimal context packets for AI coding agents.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: rich>=13
|
|
10
|
+
Requires-Dist: typer>=0.9
|
|
11
|
+
Provides-Extra: semantic
|
|
12
|
+
Requires-Dist: sentence-transformers>=2.7; extra == "semantic"
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
RepoTrim turns a messy codebase into a focused context packet for AI coding agents.
|
|
16
|
+
|
|
17
|
+
# RepoTrim
|
|
18
|
+
|
|
19
|
+
RepoTrim is a local CLI context engine for Codex, Claude Code, Cursor, and other AI coding tools. It scans a repository, filters out junk and risky files, understands the task, ranks the files most likely to matter, optionally applies semantic search, and writes an agent-ready context packet.
|
|
20
|
+
|
|
21
|
+
## Why RepoTrim
|
|
22
|
+
|
|
23
|
+
Most AI coding sessions start with too much context or the wrong context.
|
|
24
|
+
|
|
25
|
+
| Before | After |
|
|
26
|
+
| --- | --- |
|
|
27
|
+
| Paste or attach a whole repo | Generate a focused packet |
|
|
28
|
+
| Generated files, caches, lock files, and duplicates add noise | Junk and generated paths are ignored |
|
|
29
|
+
| Secrets can slip into prompts | Secret-looking files are skipped |
|
|
30
|
+
| The agent guesses where to start | RepoTrim ranks task-relevant entry points |
|
|
31
|
+
| Context windows fill with unrelated files | Token count is usually reduced by 70-90% |
|
|
32
|
+
| Hard to audit what was sent | Packet, token savings, and agent reports are written to disk |
|
|
33
|
+
|
|
34
|
+
RepoTrim is a pre-step. Run it before opening Codex, Claude Code, or Cursor, then give the generated packet to the agent instead of the entire repository.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install repotrim
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Optional semantic search support:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install "repotrim[semantic]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
repotrim task "restrict admin screens" --semantic
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Sample output:
|
|
55
|
+
|
|
56
|
+
```text
|
|
57
|
+
RepoTrim Task Report
|
|
58
|
+
|
|
59
|
+
Task:
|
|
60
|
+
restrict admin screens
|
|
61
|
+
|
|
62
|
+
Task type:
|
|
63
|
+
Access control / RBAC
|
|
64
|
+
|
|
65
|
+
Primary files:
|
|
66
|
+
1. lib/rbac.ts
|
|
67
|
+
Score: 142
|
|
68
|
+
Tokens: 420
|
|
69
|
+
Why:
|
|
70
|
+
- prioritized core RBAC policy file
|
|
71
|
+
|
|
72
|
+
2. middleware.ts
|
|
73
|
+
Score: 118
|
|
74
|
+
Tokens: 310
|
|
75
|
+
Why:
|
|
76
|
+
- prioritized middleware/protected route file
|
|
77
|
+
|
|
78
|
+
Semantic search: 12 chunks from lexical-fallback (repotrim-lexical-fallback)
|
|
79
|
+
context_packet.md written - paste into Claude or Codex to one-shot this.
|
|
80
|
+
|
|
81
|
+
RepoTrim Token Savings
|
|
82
|
+
----------------------
|
|
83
|
+
Files counted: 184
|
|
84
|
+
Full repo estimate: 154,963 tokens
|
|
85
|
+
Context packet estimate: 15,994 tokens
|
|
86
|
+
Tokens saved: 138,969 tokens
|
|
87
|
+
Reduction: 89.7%
|
|
88
|
+
Compression ratio: 9.7x smaller
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
RepoTrim writes the main packet to:
|
|
92
|
+
|
|
93
|
+
```text
|
|
94
|
+
.repotrim/context_packet.md
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## How It Works
|
|
98
|
+
|
|
99
|
+
Pipeline:
|
|
100
|
+
|
|
101
|
+
```text
|
|
102
|
+
task -> scan -> classify -> rank -> semantic boost -> token fit -> packet -> agent report
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Briefly:
|
|
106
|
+
|
|
107
|
+
1. Parse the coding task and classify the intent.
|
|
108
|
+
2. Scan the repo with shared ignore rules for caches, generated output, secrets, binaries, and oversized files.
|
|
109
|
+
3. Extract lightweight file metadata: language, symbols, imports, comments, size, and token estimate.
|
|
110
|
+
4. Rank files using deterministic task-aware scoring.
|
|
111
|
+
5. Optionally chunk and semantically search source files, then blend those matches into the ranking.
|
|
112
|
+
6. Fit selected files into a context budget.
|
|
113
|
+
7. Write `context_packet.md`, token savings data, and agent planning artifacts.
|
|
114
|
+
|
|
115
|
+
## Key Features
|
|
116
|
+
|
|
117
|
+
- Local-first: no repo upload required
|
|
118
|
+
- No AI API required for the core workflow
|
|
119
|
+
- Deterministic ranking by default
|
|
120
|
+
- Secret and generated-file filtering
|
|
121
|
+
- Task-aware file ranking for common engineering work
|
|
122
|
+
- Optional semantic search with local fallback
|
|
123
|
+
- Token savings report
|
|
124
|
+
- Agent plan report
|
|
125
|
+
- JSON output for automation
|
|
126
|
+
- Designed as a pre-step before Codex, Claude Code, and Cursor
|
|
127
|
+
|
|
128
|
+
## Who It's For
|
|
129
|
+
|
|
130
|
+
- Developers using AI agents on medium or large repositories
|
|
131
|
+
- Teams that want repeatable context selection instead of ad hoc file picking
|
|
132
|
+
- Maintainers who need to avoid sending secrets, caches, and generated output to AI tools
|
|
133
|
+
- Engineers building automation around AI-assisted code changes
|
|
134
|
+
- Anyone trying to keep agent prompts small enough to be useful
|
|
135
|
+
|
|
136
|
+
## Roadmap
|
|
137
|
+
|
|
138
|
+
- Better scoring for framework-specific project layouts
|
|
139
|
+
- Evaluation benchmark for ranking quality and token reduction
|
|
140
|
+
- Dedicated Codex, Claude Code, and Cursor output formats
|
|
141
|
+
|
|
142
|
+
## Contributing
|
|
143
|
+
|
|
144
|
+
Issues and pull requests are welcome. Keep changes focused, add tests for ranking or packet behavior, and include a real CLI example when changing user-facing output.
|
|
145
|
+
|
|
146
|
+
For local development:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
pip install -e .
|
|
150
|
+
pytest
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
MIT
|
repotrim-0.1.0/README.md
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
RepoTrim turns a messy codebase into a focused context packet for AI coding agents.
|
|
2
|
+
|
|
3
|
+
# RepoTrim
|
|
4
|
+
|
|
5
|
+
RepoTrim is a local CLI context engine for Codex, Claude Code, Cursor, and other AI coding tools. It scans a repository, filters out junk and risky files, understands the task, ranks the files most likely to matter, optionally applies semantic search, and writes an agent-ready context packet.
|
|
6
|
+
|
|
7
|
+
## Why RepoTrim
|
|
8
|
+
|
|
9
|
+
Most AI coding sessions start with too much context or the wrong context.
|
|
10
|
+
|
|
11
|
+
| Before | After |
|
|
12
|
+
| --- | --- |
|
|
13
|
+
| Paste or attach a whole repo | Generate a focused packet |
|
|
14
|
+
| Generated files, caches, lock files, and duplicates add noise | Junk and generated paths are ignored |
|
|
15
|
+
| Secrets can slip into prompts | Secret-looking files are skipped |
|
|
16
|
+
| The agent guesses where to start | RepoTrim ranks task-relevant entry points |
|
|
17
|
+
| Context windows fill with unrelated files | Token count is usually reduced by 70-90% |
|
|
18
|
+
| Hard to audit what was sent | Packet, token savings, and agent reports are written to disk |
|
|
19
|
+
|
|
20
|
+
RepoTrim is a pre-step. Run it before opening Codex, Claude Code, or Cursor, then give the generated packet to the agent instead of the entire repository.
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install repotrim
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Optional semantic search support:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install "repotrim[semantic]"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
repotrim task "restrict admin screens" --semantic
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Sample output:
|
|
41
|
+
|
|
42
|
+
```text
|
|
43
|
+
RepoTrim Task Report
|
|
44
|
+
|
|
45
|
+
Task:
|
|
46
|
+
restrict admin screens
|
|
47
|
+
|
|
48
|
+
Task type:
|
|
49
|
+
Access control / RBAC
|
|
50
|
+
|
|
51
|
+
Primary files:
|
|
52
|
+
1. lib/rbac.ts
|
|
53
|
+
Score: 142
|
|
54
|
+
Tokens: 420
|
|
55
|
+
Why:
|
|
56
|
+
- prioritized core RBAC policy file
|
|
57
|
+
|
|
58
|
+
2. middleware.ts
|
|
59
|
+
Score: 118
|
|
60
|
+
Tokens: 310
|
|
61
|
+
Why:
|
|
62
|
+
- prioritized middleware/protected route file
|
|
63
|
+
|
|
64
|
+
Semantic search: 12 chunks from lexical-fallback (repotrim-lexical-fallback)
|
|
65
|
+
context_packet.md written - paste into Claude or Codex to one-shot this.
|
|
66
|
+
|
|
67
|
+
RepoTrim Token Savings
|
|
68
|
+
----------------------
|
|
69
|
+
Files counted: 184
|
|
70
|
+
Full repo estimate: 154,963 tokens
|
|
71
|
+
Context packet estimate: 15,994 tokens
|
|
72
|
+
Tokens saved: 138,969 tokens
|
|
73
|
+
Reduction: 89.7%
|
|
74
|
+
Compression ratio: 9.7x smaller
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
RepoTrim writes the main packet to:
|
|
78
|
+
|
|
79
|
+
```text
|
|
80
|
+
.repotrim/context_packet.md
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## How It Works
|
|
84
|
+
|
|
85
|
+
Pipeline:
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
task -> scan -> classify -> rank -> semantic boost -> token fit -> packet -> agent report
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Briefly:
|
|
92
|
+
|
|
93
|
+
1. Parse the coding task and classify the intent.
|
|
94
|
+
2. Scan the repo with shared ignore rules for caches, generated output, secrets, binaries, and oversized files.
|
|
95
|
+
3. Extract lightweight file metadata: language, symbols, imports, comments, size, and token estimate.
|
|
96
|
+
4. Rank files using deterministic task-aware scoring.
|
|
97
|
+
5. Optionally chunk and semantically search source files, then blend those matches into the ranking.
|
|
98
|
+
6. Fit selected files into a context budget.
|
|
99
|
+
7. Write `context_packet.md`, token savings data, and agent planning artifacts.
|
|
100
|
+
|
|
101
|
+
## Key Features
|
|
102
|
+
|
|
103
|
+
- Local-first: no repo upload required
|
|
104
|
+
- No AI API required for the core workflow
|
|
105
|
+
- Deterministic ranking by default
|
|
106
|
+
- Secret and generated-file filtering
|
|
107
|
+
- Task-aware file ranking for common engineering work
|
|
108
|
+
- Optional semantic search with local fallback
|
|
109
|
+
- Token savings report
|
|
110
|
+
- Agent plan report
|
|
111
|
+
- JSON output for automation
|
|
112
|
+
- Designed as a pre-step before Codex, Claude Code, and Cursor
|
|
113
|
+
|
|
114
|
+
## Who It's For
|
|
115
|
+
|
|
116
|
+
- Developers using AI agents on medium or large repositories
|
|
117
|
+
- Teams that want repeatable context selection instead of ad hoc file picking
|
|
118
|
+
- Maintainers who need to avoid sending secrets, caches, and generated output to AI tools
|
|
119
|
+
- Engineers building automation around AI-assisted code changes
|
|
120
|
+
- Anyone trying to keep agent prompts small enough to be useful
|
|
121
|
+
|
|
122
|
+
## Roadmap
|
|
123
|
+
|
|
124
|
+
- Better scoring for framework-specific project layouts
|
|
125
|
+
- Evaluation benchmark for ranking quality and token reduction
|
|
126
|
+
- Dedicated Codex, Claude Code, and Cursor output formats
|
|
127
|
+
|
|
128
|
+
## Contributing
|
|
129
|
+
|
|
130
|
+
Issues and pull requests are welcome. Keep changes focused, add tests for ranking or packet behavior, and include a real CLI example when changing user-facing output.
|
|
131
|
+
|
|
132
|
+
For local development:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
pip install -e .
|
|
136
|
+
pytest
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Report writers for the agentic context builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..models import RankedFile
|
|
8
|
+
from ..utils import ensure_cache_dir, write_json
|
|
9
|
+
from .state import AgentState
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _why_lines(items: list[RankedFile]) -> list[str]:
|
|
13
|
+
lines: list[str] = []
|
|
14
|
+
for item in items:
|
|
15
|
+
reason = item.reasons[0] if item.reasons else "selected by task relevance"
|
|
16
|
+
lines.append(f"- `{item.file.relative_path}`: {reason}")
|
|
17
|
+
return lines or ["- No files were selected."]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def write_agent_report(repo_path: Path, state: AgentState, primary: list[RankedFile], related: list[RankedFile]) -> Path:
|
|
21
|
+
output_path = ensure_cache_dir(repo_path) / "agent_report.md"
|
|
22
|
+
lines = [
|
|
23
|
+
"# RepoTrim Agent Report",
|
|
24
|
+
"",
|
|
25
|
+
"## Task",
|
|
26
|
+
state.task,
|
|
27
|
+
"",
|
|
28
|
+
"## Task Interpretation",
|
|
29
|
+
state.interpretation or state.task_type,
|
|
30
|
+
"",
|
|
31
|
+
"## Agent Steps",
|
|
32
|
+
]
|
|
33
|
+
lines.extend(f"{index}. {step}" for index, step in enumerate(state.steps_taken, start=1))
|
|
34
|
+
lines.extend(["", "## Selected Files", "### Primary"])
|
|
35
|
+
lines.extend(f"- {item.file.relative_path}" for item in primary)
|
|
36
|
+
if not primary:
|
|
37
|
+
lines.append("- None")
|
|
38
|
+
lines.extend(["", "### Related / Supporting"])
|
|
39
|
+
lines.extend(f"- {item.file.relative_path}" for item in related)
|
|
40
|
+
if not related:
|
|
41
|
+
lines.append("- None")
|
|
42
|
+
lines.extend(["", "## Why These Files Matter"])
|
|
43
|
+
lines.extend(_why_lines([*primary, *related]))
|
|
44
|
+
lines.extend(["", "## Missing Context / Warnings"])
|
|
45
|
+
lines.extend(f"- {warning}" for warning in state.warnings)
|
|
46
|
+
if not state.warnings:
|
|
47
|
+
lines.append("- None")
|
|
48
|
+
lines.extend([
|
|
49
|
+
"",
|
|
50
|
+
"## Context Quality Score",
|
|
51
|
+
f"{state.quality_score}/100",
|
|
52
|
+
"",
|
|
53
|
+
"## Suggested Prompt For Coding Agent",
|
|
54
|
+
"Use the selected context to implement the task. Pay special attention to the warnings above. Keep the patch minimal and add/update tests if possible.",
|
|
55
|
+
"",
|
|
56
|
+
])
|
|
57
|
+
output_path.write_text("\n".join(lines), encoding="utf-8", newline="\n")
|
|
58
|
+
return output_path
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def write_agent_json(repo_path: Path, state: AgentState) -> Path:
|
|
62
|
+
output_path = ensure_cache_dir(repo_path) / "agent.json"
|
|
63
|
+
write_json(output_path, state.to_dict())
|
|
64
|
+
return output_path
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Rule-based context review and quality scoring."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..models import RankedFile
|
|
8
|
+
from ..reporter import INTERPRETATIONS
|
|
9
|
+
from ..scanner import read_text_safely
|
|
10
|
+
from ..task_analyzer import INTENT_LABELS
|
|
11
|
+
from .state import AgentState
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
CATEGORY_TERMS = {
|
|
15
|
+
"test": {"test", "tests", "spec", "fixture"},
|
|
16
|
+
"ui": {"component", "page", "view", "screen", "ui", "tsx", "jsx", "vue", "svelte", "css", "scss"},
|
|
17
|
+
"backend": {"api", "route", "routes", "controller", "service", "server", "middleware", "handler"},
|
|
18
|
+
"auth": {"auth", "role", "roles", "permission", "permissions", "rbac", "guard", "admin", "policy", "user"},
|
|
19
|
+
"route": {"route", "routes", "router", "routing", "page"},
|
|
20
|
+
"model": {"model", "schema", "entity", "repository", "migration", "prisma"},
|
|
21
|
+
"config": {"config", "settings", "env", "toml", "yaml", "json", "dockerfile"},
|
|
22
|
+
"docs": {"readme", "docs", "md"},
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
REQUIRED_BY_INTENT = {
|
|
26
|
+
"access_control": {"auth", "route", "test"},
|
|
27
|
+
"authentication": {"auth", "route", "test"},
|
|
28
|
+
"frontend_feature": {"ui", "route", "test"},
|
|
29
|
+
"backend_feature": {"backend", "model", "test"},
|
|
30
|
+
"database_feature": {"model", "config", "test"},
|
|
31
|
+
"config_deployment": {"config", "test"},
|
|
32
|
+
"testing_task": {"test"},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
RBAC_REQUIRED_CATEGORIES = {
|
|
36
|
+
"rbac_rules",
|
|
37
|
+
"auth_role_source",
|
|
38
|
+
"middleware_routes",
|
|
39
|
+
"navigation_visibility",
|
|
40
|
+
"protected_pages",
|
|
41
|
+
"backend_enforcement",
|
|
42
|
+
"tests",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _path_tokens(path: str) -> set[str]:
|
|
47
|
+
value = path.replace("\\", "/").lower()
|
|
48
|
+
name = Path(value).name
|
|
49
|
+
raw = value.replace(".", "/").replace("-", "/").replace("_", "/")
|
|
50
|
+
return {part for part in raw.split("/") if part} | {name}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def covered_categories(paths: list[str]) -> set[str]:
|
|
54
|
+
covered: set[str] = set()
|
|
55
|
+
for path in paths:
|
|
56
|
+
tokens = _path_tokens(path)
|
|
57
|
+
for category, terms in CATEGORY_TERMS.items():
|
|
58
|
+
if tokens & terms or any(term in path.lower() for term in terms):
|
|
59
|
+
covered.add(category)
|
|
60
|
+
return covered
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _selected_text(state: AgentState, ranked_files: list[RankedFile]) -> dict[str, str]:
|
|
64
|
+
text_by_path: dict[str, str] = {}
|
|
65
|
+
for item in ranked_files:
|
|
66
|
+
relative = item.file.relative_path
|
|
67
|
+
text = read_text_safely(Path(item.file.path))
|
|
68
|
+
if text is not None:
|
|
69
|
+
text_by_path[relative] = text
|
|
70
|
+
repo_path = Path(state.repo_path)
|
|
71
|
+
for relative in state.selected_files:
|
|
72
|
+
if relative in text_by_path:
|
|
73
|
+
continue
|
|
74
|
+
text = read_text_safely(repo_path / relative)
|
|
75
|
+
if text is not None:
|
|
76
|
+
text_by_path[relative] = text
|
|
77
|
+
return text_by_path
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_rbac_review_task(state: AgentState) -> bool:
|
|
81
|
+
task = state.task.lower()
|
|
82
|
+
task_terms = {"rbac", "admin", "access", "access-control", "permission", "permissions", "role", "roles", "protected"}
|
|
83
|
+
return state.task_type == "access_control" or any(term in task for term in task_terms)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _has_app_page_path(path: str) -> bool:
|
|
87
|
+
parts = path.split("/")
|
|
88
|
+
return (
|
|
89
|
+
len(parts) >= 3
|
|
90
|
+
and parts[0] == "app"
|
|
91
|
+
and parts[1] == "app"
|
|
92
|
+
and parts[-1] in {"page.tsx", "page.jsx", "layout.tsx", "layout.jsx"}
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _detect_rbac_categories(state: AgentState, ranked_files: list[RankedFile]) -> set[str]:
|
|
97
|
+
categories: set[str] = set()
|
|
98
|
+
text_by_path = _selected_text(state, ranked_files)
|
|
99
|
+
selected = state.selected_files or [item.file.relative_path for item in ranked_files]
|
|
100
|
+
|
|
101
|
+
for relative in selected:
|
|
102
|
+
path = relative.replace("\\", "/").lower()
|
|
103
|
+
name = Path(path).name
|
|
104
|
+
text = text_by_path.get(relative, "")
|
|
105
|
+
combined = f"{path}\n{text}".lower()
|
|
106
|
+
|
|
107
|
+
if name.endswith((".test.ts", ".test.tsx", ".spec.ts", ".spec.tsx", "_test.py")) or "/tests/" in f"/{path}" or "/__tests__/" in f"/{path}":
|
|
108
|
+
categories.add("tests")
|
|
109
|
+
if path.endswith("lib/rbac.ts") or "rbac" in path or "permission" in path or "role" in path or "canaccess" in combined or "policy" in combined:
|
|
110
|
+
categories.add("rbac_rules")
|
|
111
|
+
if "lib/auth" in path or "/auth/" in path or "session" in combined or "user.role" in combined or "getserversession" in combined:
|
|
112
|
+
categories.add("auth_role_source")
|
|
113
|
+
if name == "middleware.ts" or "middleware" in path or "protected route" in combined or "canaccesspath" in combined:
|
|
114
|
+
categories.add("middleware_routes")
|
|
115
|
+
if "sidebar" in path or "navigation" in path or "/nav" in path or "/menu" in path or "menuitem" in combined:
|
|
116
|
+
categories.add("navigation_visibility")
|
|
117
|
+
if _has_app_page_path(path) or (name in {"page.tsx", "page.jsx", "layout.tsx", "layout.jsx"} and any(term in path for term in ("/admin", "/dashboard", "/settings", "/tools"))):
|
|
118
|
+
categories.add("protected_pages")
|
|
119
|
+
is_server_surface = path.startswith("actions/") or "/actions/" in path or path.startswith("app/api/") or "/app/api/" in path
|
|
120
|
+
calls_server_guard = "requiretoolaccess" in combined or "canaccesspath" in combined or "canaccesstool" in combined
|
|
121
|
+
if is_server_surface or "use server" in combined or (calls_server_guard and name not in {"auth.ts", "rbac.ts"}):
|
|
122
|
+
categories.add("backend_enforcement")
|
|
123
|
+
|
|
124
|
+
return categories
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _apply_rbac_review(state: AgentState, warnings: list[str], missing: list[str], score: int, ranked_files: list[RankedFile]) -> tuple[int, list[str], list[str]]:
|
|
128
|
+
categories = _detect_rbac_categories(state, ranked_files)
|
|
129
|
+
missing_categories = RBAC_REQUIRED_CATEGORIES - categories
|
|
130
|
+
|
|
131
|
+
if "protected_pages" in missing_categories:
|
|
132
|
+
warnings.append("Actual protected page/screen files may be missing.")
|
|
133
|
+
missing.append("actual protected page/screen file")
|
|
134
|
+
score = min(score, 85)
|
|
135
|
+
if "backend_enforcement" in missing_categories:
|
|
136
|
+
warnings.append("Backend/server action enforcement may be missing.")
|
|
137
|
+
missing.append("backend/server action/API enforcement file")
|
|
138
|
+
score = min(score, 85)
|
|
139
|
+
if "navigation_visibility" in categories and "backend_enforcement" in missing_categories:
|
|
140
|
+
warnings.append("Sidebar/UI hiding exists, but server-side enforcement should also be checked.")
|
|
141
|
+
if "tests" in missing_categories:
|
|
142
|
+
warnings.append("No test file found for this access-control task.")
|
|
143
|
+
missing.append("test or spec file")
|
|
144
|
+
score = min(score, 90)
|
|
145
|
+
|
|
146
|
+
ui_categories = categories & {"navigation_visibility", "protected_pages"}
|
|
147
|
+
backend_categories = categories & {"rbac_rules", "auth_role_source", "middleware_routes", "backend_enforcement"}
|
|
148
|
+
if ui_categories and not backend_categories:
|
|
149
|
+
score = min(score, 75)
|
|
150
|
+
if backend_categories and not ui_categories:
|
|
151
|
+
score = min(score, 85)
|
|
152
|
+
if missing_categories:
|
|
153
|
+
score = min(score, 99)
|
|
154
|
+
else:
|
|
155
|
+
score += 15
|
|
156
|
+
|
|
157
|
+
return score, warnings, missing
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def review_context(state: AgentState, ranked_files: list[RankedFile]) -> AgentState:
|
|
161
|
+
paths = state.selected_files
|
|
162
|
+
covered = covered_categories(paths)
|
|
163
|
+
warnings: list[str] = []
|
|
164
|
+
missing: list[str] = []
|
|
165
|
+
|
|
166
|
+
if not ranked_files:
|
|
167
|
+
warnings.append("No strong primary file was found for this task.")
|
|
168
|
+
missing.append("primary task-relevant source file")
|
|
169
|
+
|
|
170
|
+
required = REQUIRED_BY_INTENT.get(state.task_type, set())
|
|
171
|
+
for category in sorted(required - covered):
|
|
172
|
+
if category == "test":
|
|
173
|
+
warnings.append("No test file found for this task.")
|
|
174
|
+
missing.append("test or spec file")
|
|
175
|
+
elif category == "auth":
|
|
176
|
+
warnings.append("Auth, role, or permission file may be missing.")
|
|
177
|
+
missing.append("auth/role/permission file")
|
|
178
|
+
elif category == "backend":
|
|
179
|
+
warnings.append("Backend route/controller/service context may be missing.")
|
|
180
|
+
missing.append("backend route/controller/service file")
|
|
181
|
+
elif category == "route":
|
|
182
|
+
warnings.append("Route or navigation context may be missing.")
|
|
183
|
+
missing.append("route/navigation file")
|
|
184
|
+
elif category == "model":
|
|
185
|
+
warnings.append("Model/schema/repository context may be missing.")
|
|
186
|
+
missing.append("model/schema/repository file")
|
|
187
|
+
elif category == "config":
|
|
188
|
+
warnings.append("Configuration context may be missing.")
|
|
189
|
+
missing.append("config file")
|
|
190
|
+
elif category == "ui":
|
|
191
|
+
warnings.append("Frontend component/page context may be missing.")
|
|
192
|
+
missing.append("frontend component/page file")
|
|
193
|
+
|
|
194
|
+
if state.task_type in {"access_control", "authentication"}:
|
|
195
|
+
if "ui" in covered and "backend" not in covered:
|
|
196
|
+
warnings.append("Only UI files found; security-sensitive tasks usually need backend checks too.")
|
|
197
|
+
if "backend" not in covered:
|
|
198
|
+
warnings.append("Backend enforcement file may be missing.")
|
|
199
|
+
|
|
200
|
+
score = 0
|
|
201
|
+
if state.interpretation:
|
|
202
|
+
score += 30
|
|
203
|
+
if ranked_files:
|
|
204
|
+
score += 25
|
|
205
|
+
if state.added_related_files:
|
|
206
|
+
score += 15
|
|
207
|
+
if "test" in covered:
|
|
208
|
+
score += 10
|
|
209
|
+
if state.estimated_tokens <= state.token_budget:
|
|
210
|
+
score += 10
|
|
211
|
+
if required and len(required & covered) >= max(1, len(required) - 1):
|
|
212
|
+
score += 10
|
|
213
|
+
score -= 10 * len(warnings)
|
|
214
|
+
|
|
215
|
+
if _is_rbac_review_task(state):
|
|
216
|
+
score, warnings, missing = _apply_rbac_review(state, warnings, missing, score, ranked_files)
|
|
217
|
+
|
|
218
|
+
state.missing_context = list(dict.fromkeys(missing))
|
|
219
|
+
state.warnings = list(dict.fromkeys(warnings))
|
|
220
|
+
state.quality_score = max(0, min(100, score))
|
|
221
|
+
return state
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def task_interpretation(intent: str) -> str:
|
|
225
|
+
label = INTENT_LABELS.get(intent, "General code task")
|
|
226
|
+
detail = INTERPRETATIONS.get(intent, INTERPRETATIONS["general_code_task"])
|
|
227
|
+
return f"{label}. {detail}"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""State tracked by the deterministic agent context builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AgentState:
|
|
12
|
+
repo_path: str
|
|
13
|
+
task: str
|
|
14
|
+
task_type: str = "general_code_task"
|
|
15
|
+
interpretation: str = ""
|
|
16
|
+
selected_files: list[str] = field(default_factory=list)
|
|
17
|
+
added_related_files: list[str] = field(default_factory=list)
|
|
18
|
+
missing_context: list[str] = field(default_factory=list)
|
|
19
|
+
warnings: list[str] = field(default_factory=list)
|
|
20
|
+
quality_score: int = 0
|
|
21
|
+
token_budget: int = 30000
|
|
22
|
+
estimated_tokens: int = 0
|
|
23
|
+
steps_taken: list[str] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def create(cls, repo_path: Path, task: str, token_budget: int) -> "AgentState":
|
|
27
|
+
return cls(repo_path=str(repo_path.resolve()), task=task, token_budget=token_budget)
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
return asdict(self)
|