repotrim 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repotrim-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sudhakhar Katta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: repotrim
3
+ Version: 0.1.0
4
+ Summary: Create minimal context packets for AI coding agents.
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: rich>=13
10
+ Requires-Dist: typer>=0.9
11
+ Provides-Extra: semantic
12
+ Requires-Dist: sentence-transformers>=2.7; extra == "semantic"
13
+ Dynamic: license-file
14
+
15
+ RepoTrim turns a messy codebase into a focused context packet for AI coding agents.
16
+
17
+ # RepoTrim
18
+
19
+ RepoTrim is a local CLI context engine for Codex, Claude Code, Cursor, and other AI coding tools. It scans a repository, filters out junk and risky files, understands the task, ranks the files most likely to matter, optionally applies semantic search, and writes an agent-ready context packet.
20
+
21
+ ## Why RepoTrim
22
+
23
+ Most AI coding sessions start with too much context or the wrong context.
24
+
25
+ | Before | After |
26
+ | --- | --- |
27
+ | Paste or attach a whole repo | Generate a focused packet |
28
+ | Generated files, caches, lock files, and duplicates add noise | Junk and generated paths are ignored |
29
+ | Secrets can slip into prompts | Secret-looking files are skipped |
30
+ | The agent guesses where to start | RepoTrim ranks task-relevant entry points |
31
+ | Context windows fill with unrelated files | Token count is usually reduced by 70-90% |
32
+ | Hard to audit what was sent | Packet, token savings, and agent reports are written to disk |
33
+
34
+ RepoTrim is a pre-step. Run it before opening Codex, Claude Code, or Cursor, then give the generated packet to the agent instead of the entire repository.
35
+
36
+ ## Install
37
+
38
+ ```bash
39
+ pip install repotrim
40
+ ```
41
+
42
+ Optional semantic search support:
43
+
44
+ ```bash
45
+ pip install "repotrim[semantic]"
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ ```bash
51
+ repotrim task "restrict admin screens" --semantic
52
+ ```
53
+
54
+ Sample output:
55
+
56
+ ```text
57
+ RepoTrim Task Report
58
+
59
+ Task:
60
+ restrict admin screens
61
+
62
+ Task type:
63
+ Access control / RBAC
64
+
65
+ Primary files:
66
+ 1. lib/rbac.ts
67
+ Score: 142
68
+ Tokens: 420
69
+ Why:
70
+ - prioritized core RBAC policy file
71
+
72
+ 2. middleware.ts
73
+ Score: 118
74
+ Tokens: 310
75
+ Why:
76
+ - prioritized middleware/protected route file
77
+
78
+ Semantic search: 12 chunks from lexical-fallback (repotrim-lexical-fallback)
79
+ context_packet.md written - paste into Claude or Codex to one-shot this.
80
+
81
+ RepoTrim Token Savings
82
+ ----------------------
83
+ Files counted: 184
84
+ Full repo estimate: 154,963 tokens
85
+ Context packet estimate: 15,994 tokens
86
+ Tokens saved: 138,969 tokens
87
+ Reduction: 89.7%
88
+ Compression ratio: 9.7x smaller
89
+ ```
90
+
91
+ RepoTrim writes the main packet to:
92
+
93
+ ```text
94
+ .repotrim/context_packet.md
95
+ ```
96
+
97
+ ## How It Works
98
+
99
+ Pipeline:
100
+
101
+ ```text
102
+ task -> scan -> classify -> rank -> semantic boost -> token fit -> packet -> agent report
103
+ ```
104
+
105
+ Briefly:
106
+
107
+ 1. Parse the coding task and classify the intent.
108
+ 2. Scan the repo with shared ignore rules for caches, generated output, secrets, binaries, and oversized files.
109
+ 3. Extract lightweight file metadata: language, symbols, imports, comments, size, and token estimate.
110
+ 4. Rank files using deterministic task-aware scoring.
111
+ 5. Optionally chunk and semantically search source files, then blend those matches into the ranking.
112
+ 6. Fit selected files into a context budget.
113
+ 7. Write `context_packet.md`, token savings data, and agent planning artifacts.
114
+
115
+ ## Key Features
116
+
117
+ - Local-first: no repo upload required
118
+ - No AI API required for the core workflow
119
+ - Deterministic ranking by default
120
+ - Secret and generated-file filtering
121
+ - Task-aware file ranking for common engineering work
122
+ - Optional semantic search with local fallback
123
+ - Token savings report
124
+ - Agent plan report
125
+ - JSON output for automation
126
+ - Designed as a pre-step before Codex, Claude Code, and Cursor
127
+
128
+ ## Who It's For
129
+
130
+ - Developers using AI agents on medium or large repositories
131
+ - Teams that want repeatable context selection instead of ad hoc file picking
132
+ - Maintainers who need to avoid sending secrets, caches, and generated output to AI tools
133
+ - Engineers building automation around AI-assisted code changes
134
+ - Anyone trying to keep agent prompts small enough to be useful
135
+
136
+ ## Roadmap
137
+
138
+ - Better scoring for framework-specific project layouts
139
+ - Evaluation benchmark for ranking quality and token reduction
140
+ - Dedicated Codex, Claude Code, and Cursor output formats
141
+
142
+ ## Contributing
143
+
144
+ Issues and pull requests are welcome. Keep changes focused, add tests for ranking or packet behavior, and include a real CLI example when changing user-facing output.
145
+
146
+ For local development:
147
+
148
+ ```bash
149
+ pip install -e .
150
+ pytest
151
+ ```
152
+
153
+ ## License
154
+
155
+ MIT
@@ -0,0 +1,141 @@
1
+ RepoTrim turns a messy codebase into a focused context packet for AI coding agents.
2
+
3
+ # RepoTrim
4
+
5
+ RepoTrim is a local CLI context engine for Codex, Claude Code, Cursor, and other AI coding tools. It scans a repository, filters out junk and risky files, understands the task, ranks the files most likely to matter, optionally applies semantic search, and writes an agent-ready context packet.
6
+
7
+ ## Why RepoTrim
8
+
9
+ Most AI coding sessions start with too much context or the wrong context.
10
+
11
+ | Before | After |
12
+ | --- | --- |
13
+ | Paste or attach a whole repo | Generate a focused packet |
14
+ | Generated files, caches, lock files, and duplicates add noise | Junk and generated paths are ignored |
15
+ | Secrets can slip into prompts | Secret-looking files are skipped |
16
+ | The agent guesses where to start | RepoTrim ranks task-relevant entry points |
17
+ | Context windows fill with unrelated files | Token count is usually reduced by 70-90% |
18
+ | Hard to audit what was sent | Packet, token savings, and agent reports are written to disk |
19
+
20
+ RepoTrim is a pre-step. Run it before opening Codex, Claude Code, or Cursor, then give the generated packet to the agent instead of the entire repository.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install repotrim
26
+ ```
27
+
28
+ Optional semantic search support:
29
+
30
+ ```bash
31
+ pip install "repotrim[semantic]"
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ```bash
37
+ repotrim task "restrict admin screens" --semantic
38
+ ```
39
+
40
+ Sample output:
41
+
42
+ ```text
43
+ RepoTrim Task Report
44
+
45
+ Task:
46
+ restrict admin screens
47
+
48
+ Task type:
49
+ Access control / RBAC
50
+
51
+ Primary files:
52
+ 1. lib/rbac.ts
53
+ Score: 142
54
+ Tokens: 420
55
+ Why:
56
+ - prioritized core RBAC policy file
57
+
58
+ 2. middleware.ts
59
+ Score: 118
60
+ Tokens: 310
61
+ Why:
62
+ - prioritized middleware/protected route file
63
+
64
+ Semantic search: 12 chunks from lexical-fallback (repotrim-lexical-fallback)
65
+ context_packet.md written - paste into Claude or Codex to one-shot this.
66
+
67
+ RepoTrim Token Savings
68
+ ----------------------
69
+ Files counted: 184
70
+ Full repo estimate: 154,963 tokens
71
+ Context packet estimate: 15,994 tokens
72
+ Tokens saved: 138,969 tokens
73
+ Reduction: 89.7%
74
+ Compression ratio: 9.7x smaller
75
+ ```
76
+
77
+ RepoTrim writes the main packet to:
78
+
79
+ ```text
80
+ .repotrim/context_packet.md
81
+ ```
82
+
83
+ ## How It Works
84
+
85
+ Pipeline:
86
+
87
+ ```text
88
+ task -> scan -> classify -> rank -> semantic boost -> token fit -> packet -> agent report
89
+ ```
90
+
91
+ Briefly:
92
+
93
+ 1. Parse the coding task and classify the intent.
94
+ 2. Scan the repo with shared ignore rules for caches, generated output, secrets, binaries, and oversized files.
95
+ 3. Extract lightweight file metadata: language, symbols, imports, comments, size, and token estimate.
96
+ 4. Rank files using deterministic task-aware scoring.
97
+ 5. Optionally chunk and semantically search source files, then blend those matches into the ranking.
98
+ 6. Fit selected files into a context budget.
99
+ 7. Write `context_packet.md`, token savings data, and agent planning artifacts.
100
+
101
+ ## Key Features
102
+
103
+ - Local-first: no repo upload required
104
+ - No AI API required for the core workflow
105
+ - Deterministic ranking by default
106
+ - Secret and generated-file filtering
107
+ - Task-aware file ranking for common engineering work
108
+ - Optional semantic search with local fallback
109
+ - Token savings report
110
+ - Agent plan report
111
+ - JSON output for automation
112
+ - Designed as a pre-step before Codex, Claude Code, and Cursor
113
+
114
+ ## Who It's For
115
+
116
+ - Developers using AI agents on medium or large repositories
117
+ - Teams that want repeatable context selection instead of ad hoc file picking
118
+ - Maintainers who need to avoid sending secrets, caches, and generated output to AI tools
119
+ - Engineers building automation around AI-assisted code changes
120
+ - Anyone trying to keep agent prompts small enough to be useful
121
+
122
+ ## Roadmap
123
+
124
+ - Better scoring for framework-specific project layouts
125
+ - Evaluation benchmark for ranking quality and token reduction
126
+ - Dedicated Codex, Claude Code, and Cursor output formats
127
+
128
+ ## Contributing
129
+
130
+ Issues and pull requests are welcome. Keep changes focused, add tests for ranking or packet behavior, and include a real CLI example when changing user-facing output.
131
+
132
+ For local development:
133
+
134
+ ```bash
135
+ pip install -e .
136
+ pytest
137
+ ```
138
+
139
+ ## License
140
+
141
+ MIT
@@ -0,0 +1,3 @@
1
+ """RepoTrim: minimal context packets for AI coding agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,6 @@
1
+ """Deterministic agentic planning helpers for RepoTrim."""
2
+
3
+ from .state import AgentState
4
+ from .workflow import run_agent_plan
5
+
6
+ __all__ = ["AgentState", "run_agent_plan"]
@@ -0,0 +1,64 @@
1
+ """Report writers for the agentic context builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from ..models import RankedFile
8
+ from ..utils import ensure_cache_dir, write_json
9
+ from .state import AgentState
10
+
11
+
12
+ def _why_lines(items: list[RankedFile]) -> list[str]:
13
+ lines: list[str] = []
14
+ for item in items:
15
+ reason = item.reasons[0] if item.reasons else "selected by task relevance"
16
+ lines.append(f"- `{item.file.relative_path}`: {reason}")
17
+ return lines or ["- No files were selected."]
18
+
19
+
20
+ def write_agent_report(repo_path: Path, state: AgentState, primary: list[RankedFile], related: list[RankedFile]) -> Path:
21
+ output_path = ensure_cache_dir(repo_path) / "agent_report.md"
22
+ lines = [
23
+ "# RepoTrim Agent Report",
24
+ "",
25
+ "## Task",
26
+ state.task,
27
+ "",
28
+ "## Task Interpretation",
29
+ state.interpretation or state.task_type,
30
+ "",
31
+ "## Agent Steps",
32
+ ]
33
+ lines.extend(f"{index}. {step}" for index, step in enumerate(state.steps_taken, start=1))
34
+ lines.extend(["", "## Selected Files", "### Primary"])
35
+ lines.extend(f"- {item.file.relative_path}" for item in primary)
36
+ if not primary:
37
+ lines.append("- None")
38
+ lines.extend(["", "### Related / Supporting"])
39
+ lines.extend(f"- {item.file.relative_path}" for item in related)
40
+ if not related:
41
+ lines.append("- None")
42
+ lines.extend(["", "## Why These Files Matter"])
43
+ lines.extend(_why_lines([*primary, *related]))
44
+ lines.extend(["", "## Missing Context / Warnings"])
45
+ lines.extend(f"- {warning}" for warning in state.warnings)
46
+ if not state.warnings:
47
+ lines.append("- None")
48
+ lines.extend([
49
+ "",
50
+ "## Context Quality Score",
51
+ f"{state.quality_score}/100",
52
+ "",
53
+ "## Suggested Prompt For Coding Agent",
54
+ "Use the selected context to implement the task. Pay special attention to the warnings above. Keep the patch minimal and add/update tests if possible.",
55
+ "",
56
+ ])
57
+ output_path.write_text("\n".join(lines), encoding="utf-8", newline="\n")
58
+ return output_path
59
+
60
+
61
+ def write_agent_json(repo_path: Path, state: AgentState) -> Path:
62
+ output_path = ensure_cache_dir(repo_path) / "agent.json"
63
+ write_json(output_path, state.to_dict())
64
+ return output_path
@@ -0,0 +1,227 @@
1
+ """Rule-based context review and quality scoring."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from ..models import RankedFile
8
+ from ..reporter import INTERPRETATIONS
9
+ from ..scanner import read_text_safely
10
+ from ..task_analyzer import INTENT_LABELS
11
+ from .state import AgentState
12
+
13
+
14
+ CATEGORY_TERMS = {
15
+ "test": {"test", "tests", "spec", "fixture"},
16
+ "ui": {"component", "page", "view", "screen", "ui", "tsx", "jsx", "vue", "svelte", "css", "scss"},
17
+ "backend": {"api", "route", "routes", "controller", "service", "server", "middleware", "handler"},
18
+ "auth": {"auth", "role", "roles", "permission", "permissions", "rbac", "guard", "admin", "policy", "user"},
19
+ "route": {"route", "routes", "router", "routing", "page"},
20
+ "model": {"model", "schema", "entity", "repository", "migration", "prisma"},
21
+ "config": {"config", "settings", "env", "toml", "yaml", "json", "dockerfile"},
22
+ "docs": {"readme", "docs", "md"},
23
+ }
24
+
25
+ REQUIRED_BY_INTENT = {
26
+ "access_control": {"auth", "route", "test"},
27
+ "authentication": {"auth", "route", "test"},
28
+ "frontend_feature": {"ui", "route", "test"},
29
+ "backend_feature": {"backend", "model", "test"},
30
+ "database_feature": {"model", "config", "test"},
31
+ "config_deployment": {"config", "test"},
32
+ "testing_task": {"test"},
33
+ }
34
+
35
+ RBAC_REQUIRED_CATEGORIES = {
36
+ "rbac_rules",
37
+ "auth_role_source",
38
+ "middleware_routes",
39
+ "navigation_visibility",
40
+ "protected_pages",
41
+ "backend_enforcement",
42
+ "tests",
43
+ }
44
+
45
+
46
+ def _path_tokens(path: str) -> set[str]:
47
+ value = path.replace("\\", "/").lower()
48
+ name = Path(value).name
49
+ raw = value.replace(".", "/").replace("-", "/").replace("_", "/")
50
+ return {part for part in raw.split("/") if part} | {name}
51
+
52
+
53
+ def covered_categories(paths: list[str]) -> set[str]:
54
+ covered: set[str] = set()
55
+ for path in paths:
56
+ tokens = _path_tokens(path)
57
+ for category, terms in CATEGORY_TERMS.items():
58
+ if tokens & terms or any(term in path.lower() for term in terms):
59
+ covered.add(category)
60
+ return covered
61
+
62
+
63
+ def _selected_text(state: AgentState, ranked_files: list[RankedFile]) -> dict[str, str]:
64
+ text_by_path: dict[str, str] = {}
65
+ for item in ranked_files:
66
+ relative = item.file.relative_path
67
+ text = read_text_safely(Path(item.file.path))
68
+ if text is not None:
69
+ text_by_path[relative] = text
70
+ repo_path = Path(state.repo_path)
71
+ for relative in state.selected_files:
72
+ if relative in text_by_path:
73
+ continue
74
+ text = read_text_safely(repo_path / relative)
75
+ if text is not None:
76
+ text_by_path[relative] = text
77
+ return text_by_path
78
+
79
+
80
+ def _is_rbac_review_task(state: AgentState) -> bool:
81
+ task = state.task.lower()
82
+ task_terms = {"rbac", "admin", "access", "access-control", "permission", "permissions", "role", "roles", "protected"}
83
+ return state.task_type == "access_control" or any(term in task for term in task_terms)
84
+
85
+
86
+ def _has_app_page_path(path: str) -> bool:
87
+ parts = path.split("/")
88
+ return (
89
+ len(parts) >= 3
90
+ and parts[0] == "app"
91
+ and parts[1] == "app"
92
+ and parts[-1] in {"page.tsx", "page.jsx", "layout.tsx", "layout.jsx"}
93
+ )
94
+
95
+
96
+ def _detect_rbac_categories(state: AgentState, ranked_files: list[RankedFile]) -> set[str]:
97
+ categories: set[str] = set()
98
+ text_by_path = _selected_text(state, ranked_files)
99
+ selected = state.selected_files or [item.file.relative_path for item in ranked_files]
100
+
101
+ for relative in selected:
102
+ path = relative.replace("\\", "/").lower()
103
+ name = Path(path).name
104
+ text = text_by_path.get(relative, "")
105
+ combined = f"{path}\n{text}".lower()
106
+
107
+ if name.endswith((".test.ts", ".test.tsx", ".spec.ts", ".spec.tsx", "_test.py")) or "/tests/" in f"/{path}" or "/__tests__/" in f"/{path}":
108
+ categories.add("tests")
109
+ if path.endswith("lib/rbac.ts") or "rbac" in path or "permission" in path or "role" in path or "canaccess" in combined or "policy" in combined:
110
+ categories.add("rbac_rules")
111
+ if "lib/auth" in path or "/auth/" in path or "session" in combined or "user.role" in combined or "getserversession" in combined:
112
+ categories.add("auth_role_source")
113
+ if name == "middleware.ts" or "middleware" in path or "protected route" in combined or "canaccesspath" in combined:
114
+ categories.add("middleware_routes")
115
+ if "sidebar" in path or "navigation" in path or "/nav" in path or "/menu" in path or "menuitem" in combined:
116
+ categories.add("navigation_visibility")
117
+ if _has_app_page_path(path) or (name in {"page.tsx", "page.jsx", "layout.tsx", "layout.jsx"} and any(term in path for term in ("/admin", "/dashboard", "/settings", "/tools"))):
118
+ categories.add("protected_pages")
119
+ is_server_surface = path.startswith("actions/") or "/actions/" in path or path.startswith("app/api/") or "/app/api/" in path
120
+ calls_server_guard = "requiretoolaccess" in combined or "canaccesspath" in combined or "canaccesstool" in combined
121
+ if is_server_surface or "use server" in combined or (calls_server_guard and name not in {"auth.ts", "rbac.ts"}):
122
+ categories.add("backend_enforcement")
123
+
124
+ return categories
125
+
126
+
127
+ def _apply_rbac_review(state: AgentState, warnings: list[str], missing: list[str], score: int, ranked_files: list[RankedFile]) -> tuple[int, list[str], list[str]]:
128
+ categories = _detect_rbac_categories(state, ranked_files)
129
+ missing_categories = RBAC_REQUIRED_CATEGORIES - categories
130
+
131
+ if "protected_pages" in missing_categories:
132
+ warnings.append("Actual protected page/screen files may be missing.")
133
+ missing.append("actual protected page/screen file")
134
+ score = min(score, 85)
135
+ if "backend_enforcement" in missing_categories:
136
+ warnings.append("Backend/server action enforcement may be missing.")
137
+ missing.append("backend/server action/API enforcement file")
138
+ score = min(score, 85)
139
+ if "navigation_visibility" in categories and "backend_enforcement" in missing_categories:
140
+ warnings.append("Sidebar/UI hiding exists, but server-side enforcement should also be checked.")
141
+ if "tests" in missing_categories:
142
+ warnings.append("No test file found for this access-control task.")
143
+ missing.append("test or spec file")
144
+ score = min(score, 90)
145
+
146
+ ui_categories = categories & {"navigation_visibility", "protected_pages"}
147
+ backend_categories = categories & {"rbac_rules", "auth_role_source", "middleware_routes", "backend_enforcement"}
148
+ if ui_categories and not backend_categories:
149
+ score = min(score, 75)
150
+ if backend_categories and not ui_categories:
151
+ score = min(score, 85)
152
+ if missing_categories:
153
+ score = min(score, 99)
154
+ else:
155
+ score += 15
156
+
157
+ return score, warnings, missing
158
+
159
+
160
+ def review_context(state: AgentState, ranked_files: list[RankedFile]) -> AgentState:
161
+ paths = state.selected_files
162
+ covered = covered_categories(paths)
163
+ warnings: list[str] = []
164
+ missing: list[str] = []
165
+
166
+ if not ranked_files:
167
+ warnings.append("No strong primary file was found for this task.")
168
+ missing.append("primary task-relevant source file")
169
+
170
+ required = REQUIRED_BY_INTENT.get(state.task_type, set())
171
+ for category in sorted(required - covered):
172
+ if category == "test":
173
+ warnings.append("No test file found for this task.")
174
+ missing.append("test or spec file")
175
+ elif category == "auth":
176
+ warnings.append("Auth, role, or permission file may be missing.")
177
+ missing.append("auth/role/permission file")
178
+ elif category == "backend":
179
+ warnings.append("Backend route/controller/service context may be missing.")
180
+ missing.append("backend route/controller/service file")
181
+ elif category == "route":
182
+ warnings.append("Route or navigation context may be missing.")
183
+ missing.append("route/navigation file")
184
+ elif category == "model":
185
+ warnings.append("Model/schema/repository context may be missing.")
186
+ missing.append("model/schema/repository file")
187
+ elif category == "config":
188
+ warnings.append("Configuration context may be missing.")
189
+ missing.append("config file")
190
+ elif category == "ui":
191
+ warnings.append("Frontend component/page context may be missing.")
192
+ missing.append("frontend component/page file")
193
+
194
+ if state.task_type in {"access_control", "authentication"}:
195
+ if "ui" in covered and "backend" not in covered:
196
+ warnings.append("Only UI files found; security-sensitive tasks usually need backend checks too.")
197
+ if "backend" not in covered:
198
+ warnings.append("Backend enforcement file may be missing.")
199
+
200
+ score = 0
201
+ if state.interpretation:
202
+ score += 30
203
+ if ranked_files:
204
+ score += 25
205
+ if state.added_related_files:
206
+ score += 15
207
+ if "test" in covered:
208
+ score += 10
209
+ if state.estimated_tokens <= state.token_budget:
210
+ score += 10
211
+ if required and len(required & covered) >= max(1, len(required) - 1):
212
+ score += 10
213
+ score -= 10 * len(warnings)
214
+
215
+ if _is_rbac_review_task(state):
216
+ score, warnings, missing = _apply_rbac_review(state, warnings, missing, score, ranked_files)
217
+
218
+ state.missing_context = list(dict.fromkeys(missing))
219
+ state.warnings = list(dict.fromkeys(warnings))
220
+ state.quality_score = max(0, min(100, score))
221
+ return state
222
+
223
+
224
+ def task_interpretation(intent: str) -> str:
225
+ label = INTENT_LABELS.get(intent, "General code task")
226
+ detail = INTERPRETATIONS.get(intent, INTERPRETATIONS["general_code_task"])
227
+ return f"{label}. {detail}"
@@ -0,0 +1,30 @@
1
+ """State tracked by the deterministic agent context builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict, dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class AgentState:
12
+ repo_path: str
13
+ task: str
14
+ task_type: str = "general_code_task"
15
+ interpretation: str = ""
16
+ selected_files: list[str] = field(default_factory=list)
17
+ added_related_files: list[str] = field(default_factory=list)
18
+ missing_context: list[str] = field(default_factory=list)
19
+ warnings: list[str] = field(default_factory=list)
20
+ quality_score: int = 0
21
+ token_budget: int = 30000
22
+ estimated_tokens: int = 0
23
+ steps_taken: list[str] = field(default_factory=list)
24
+
25
+ @classmethod
26
+ def create(cls, repo_path: Path, task: str, token_budget: int) -> "AgentState":
27
+ return cls(repo_path=str(repo_path.resolve()), task=task, token_budget=token_budget)
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ return asdict(self)