contextl 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ """
2
+ Repository Intelligence Engine
3
+ Step 2: Import Parser
4
+
5
+ Reads each scanned file and extracts import relationships.
6
+ Handles:
7
+ - Relative imports: import X from "./foo" or "../../lib/bar"
8
+ - Alias imports: import X from "@/components/Button"
9
+ - Type imports: import type { X } from "@/types"
10
+ - Named imports: import { X, Y } from "./utils"
11
+
12
+ Returns only imports that resolve to other files in the repository.
13
+ External packages (react, next, etc.) are ignored.
14
+ """
15
+
16
+ import re
17
+ from pathlib import Path
18
+ from dataclasses import dataclass, field
19
+
20
+ from scanner import ScannedFile, ScanResult
21
+
22
+
23
+ # Matches any ES6 import statement and captures the module path
24
+ IMPORT_PATTERN = re.compile(
25
+ r"""import\s+(?:type\s+)? # import or import type
26
+ (?:
27
+ \{[^}]*\} # named imports: { Foo, Bar }
28
+ |[\w*]+ # default or namespace: Foo or *
29
+ |[\w*]+\s*,\s*\{[^}]*\} # mixed: Foo, { Bar }
30
+ )?
31
+ \s*(?:from\s+)? # optional "from"
32
+ ['"](.*?)['"] # the module path in quotes
33
+ """,
34
+ re.VERBOSE,
35
+ )
36
+
37
+ # Also catch: import("./foo") — dynamic imports
38
+ DYNAMIC_IMPORT_PATTERN = re.compile(r"""import\s*\(\s*['"](.*?)['"]\s*\)""")
39
+
40
+
41
+ @dataclass
42
+ class ImportRelationship:
43
+ """A resolved import from one file to another."""
44
+ source: str # Relative path of the file doing the importing
45
+ target: str # Relative path of the file being imported
46
+ raw_import: str # The original import string (e.g. "@/components/Button")
47
+
48
+
49
+ @dataclass
50
+ class ParseResult:
51
+ """All import relationships discovered across the repository."""
52
+ relationships: list[ImportRelationship] = field(default_factory=list)
53
+ unresolved: list[tuple[str, str]] = field(default_factory=list)
54
+ # unresolved = [(source_file, raw_import), ...] — external packages or not found
55
+
56
+ def summary(self) -> str:
57
+ lines = [
58
+ f"Import relationships found: {len(self.relationships)}",
59
+ f"Unresolved (external/missing): {len(self.unresolved)}",
60
+ "",
61
+ "Resolved imports:",
62
+ ]
63
+ for rel in self.relationships:
64
+ lines.append(f" {rel.source}")
65
+ lines.append(f" → {rel.target} (from '{rel.raw_import}')")
66
+ return "\n".join(lines)
67
+
68
+
69
+ def _extract_raw_imports(source_code: str) -> list[str]:
70
+ """Pull all import paths out of a TypeScript/JSX file."""
71
+ paths = []
72
+ for match in IMPORT_PATTERN.finditer(source_code):
73
+ paths.append(match.group(1))
74
+ for match in DYNAMIC_IMPORT_PATTERN.finditer(source_code):
75
+ paths.append(match.group(1))
76
+ return paths
77
+
78
+
79
+ def _is_external(import_path: str) -> bool:
80
+ """Return True if this is an npm package rather than a local file."""
81
+ # Local files start with . (relative) or @ followed by / (alias like @/)
82
+ # but NOT @scope/package style from npm — those don't contain our alias prefix
83
+ if import_path.startswith("./") or import_path.startswith("../"):
84
+ return False
85
+ if import_path.startswith("@/"):
86
+ return False
87
+ return True # Everything else is an external package
88
+
89
+
90
+ def _resolve_alias(import_path: str, alias_map: dict[str, str]) -> str | None:
91
+ """
92
+ Convert an aliased import path to a repo-relative path.
93
+ Example: "@/components/Button" → "frontend/components/Button"
94
+ """
95
+ for alias, real_prefix in alias_map.items():
96
+ if import_path.startswith(alias):
97
+ suffix = import_path[len(alias):].lstrip("/")
98
+ prefix = real_prefix.rstrip("/")
99
+ if prefix:
100
+ return prefix + "/" + suffix
101
+ else:
102
+ # alias maps directly to the repo root — no leading slash
103
+ return suffix
104
+ return None
105
+
106
+
107
+ def _resolve_relative(import_path: str, source_file: str) -> str:
108
+ """
109
+ Resolve a relative import path against the source file's directory.
110
+ Example: source="frontend/app/page.tsx", import="../lib/api" → "frontend/lib/api"
111
+ """
112
+ source_dir = Path(source_file).parent
113
+ resolved = (source_dir / import_path).resolve()
114
+ # Make it relative again (we'll strip the absolute prefix below)
115
+ return str(resolved)
116
+
117
+
118
+ def _find_file_in_repo(
119
+ candidate: str,
120
+ file_index: dict[str, str],
121
+ root: str,
122
+ ) -> str | None:
123
+ """
124
+ Given a candidate path (without extension), find the matching file
125
+ in the repository. Tries adding common extensions if missing.
126
+
127
+ file_index maps absolute_path → relative_path.
128
+ """
129
+ root_path = Path(root)
130
+ candidate_path = Path(candidate)
131
+
132
+ # If candidate is absolute, make it relative to root
133
+ if candidate_path.is_absolute():
134
+ try:
135
+ candidate_path = candidate_path.relative_to(root_path)
136
+ except ValueError:
137
+ return None
138
+
139
+ candidate_str = str(candidate_path)
140
+
141
+ # Try exact match first (already has extension)
142
+ if candidate_str in file_index:
143
+ return candidate_str
144
+
145
+ # Try adding each supported extension
146
+ for ext in [".tsx", ".ts", ".jsx", ".js"]:
147
+ with_ext = candidate_str + ext
148
+ if with_ext in file_index:
149
+ return with_ext
150
+
151
+ # Try as a directory index file (e.g. components/Button/index.tsx)
152
+ for ext in [".tsx", ".ts", ".jsx", ".js"]:
153
+ index_path = candidate_str + "/index" + ext
154
+ if index_path in file_index:
155
+ return index_path
156
+
157
+ return None
158
+
159
+
160
+ def _detect_alias_map(scan_result: ScanResult) -> dict[str, str]:
161
+ """
162
+ Auto-detect the @/ alias by finding tsconfig.json or next.config files
163
+ and inferring the project root. Falls back to a heuristic.
164
+
165
+ Returns a map like {"@/": "frontend/"} or {"@/": ""}
166
+ """
167
+ root = Path(scan_result.root)
168
+
169
+ # Look for tsconfig.json to find paths config
170
+ for tsconfig in root.rglob("tsconfig.json"):
171
+ try:
172
+ import json
173
+ data = json.loads(tsconfig.read_text())
174
+ paths = data.get("compilerOptions", {}).get("paths", {})
175
+ base_url = data.get("compilerOptions", {}).get("baseUrl", ".")
176
+
177
+ alias_map = {}
178
+ for alias, targets in paths.items():
179
+ if not targets:
180
+ continue
181
+ # "@/*": ["./src/*"] → strip trailing /* from both
182
+ clean_alias = alias.rstrip("/*").rstrip("*")
183
+ clean_target = targets[0].rstrip("/*").rstrip("*").lstrip("./")
184
+
185
+ tsconfig_dir = tsconfig.parent.relative_to(root)
186
+ prefix = str(tsconfig_dir / clean_target).lstrip("./")
187
+ alias_map[clean_alias + "/"] = prefix + "/" if prefix else ""
188
+
189
+ if alias_map:
190
+ return alias_map
191
+ except Exception:
192
+ continue
193
+
194
+ # Heuristic fallback: if all files share a common top-level dir, use that
195
+ top_dirs = {Path(f.path).parts[0] for f in scan_result.files if Path(f.path).parts}
196
+ if len(top_dirs) == 1:
197
+ top = list(top_dirs)[0]
198
+ return {"@/": top + "/"}
199
+
200
+ return {"@/": ""}
201
+
202
+
203
+ def parse_imports(scan_result: ScanResult) -> ParseResult:
204
+ """
205
+ Parse all import statements from scanned files and resolve them
206
+ to concrete file paths within the repository.
207
+
208
+ Args:
209
+ scan_result: Output from scan_repo().
210
+
211
+ Returns:
212
+ ParseResult containing all resolved ImportRelationships.
213
+ """
214
+ result = ParseResult()
215
+
216
+ # Build lookup: relative_path → ScannedFile
217
+ file_index: dict[str, ScannedFile] = {f.path: f for f in scan_result.files}
218
+
219
+ # Auto-detect alias map (e.g. @/ → frontend/)
220
+ alias_map = _detect_alias_map(scan_result)
221
+
222
+ for scanned_file in scan_result.files:
223
+ try:
224
+ source_code = Path(scanned_file.absolute_path).read_text(encoding="utf-8")
225
+ except Exception:
226
+ continue
227
+
228
+ raw_imports = _extract_raw_imports(source_code)
229
+
230
+ for raw in raw_imports:
231
+ if _is_external(raw):
232
+ result.unresolved.append((scanned_file.path, raw))
233
+ continue
234
+
235
+ # Resolve to a candidate path string
236
+ if raw.startswith("@/"):
237
+ resolved_alias = _resolve_alias(raw, alias_map)
238
+ if resolved_alias is None:
239
+ result.unresolved.append((scanned_file.path, raw))
240
+ continue
241
+ candidate = resolved_alias
242
+ else:
243
+ # Relative import
244
+ candidate = _resolve_relative(raw, scanned_file.path)
245
+
246
+ # Find the actual file in the repo
247
+ matched = _find_file_in_repo(candidate, file_index, scan_result.root)
248
+
249
+ if matched:
250
+ result.relationships.append(
251
+ ImportRelationship(
252
+ source=scanned_file.path,
253
+ target=matched,
254
+ raw_import=raw,
255
+ )
256
+ )
257
+ else:
258
+ result.unresolved.append((scanned_file.path, raw))
259
+
260
+ return result
261
+
262
+
263
+ if __name__ == "__main__":
264
+ import sys
265
+ from scanner import scan_repo
266
+
267
+ target = sys.argv[1] if len(sys.argv) > 1 else "."
268
+ scan = scan_repo(target)
269
+ parse = parse_imports(scan)
270
+
271
+ print(parse.summary())
package/python/main.py ADDED
@@ -0,0 +1,237 @@
1
+ """
2
+ Repository Intelligence Engine
3
+ CLI Entry Point
4
+
5
+ Usage:
6
+ python main.py <repo_path> "<query>" [--top N] [--json]
7
+
8
+ Examples:
9
+ python main.py ./my-next-app "change the download button"
10
+ python main.py ./my-next-app "fix file upload" --top 10
11
+ python main.py ./my-next-app "update footer text" --json
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+ import time
18
+ from pathlib import Path
19
+
20
+ from scanner import scan_repo
21
+ from import_parser import parse_imports
22
+ from graph_builder import build_graph
23
+ from query_engine import query, RankedFile
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Confidence thresholds
28
+ # ---------------------------------------------------------------------------
29
+ def _confidence(score: float) -> str:
30
+ """Derive a confidence label from the total relevance score."""
31
+ if score > 0.7:
32
+ return "high"
33
+ elif score >= 0.4:
34
+ return "medium"
35
+ else:
36
+ return "low"
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Reasoning generator
41
+ # ---------------------------------------------------------------------------
42
+ def _reasoning(ranked: RankedFile, repo_graph) -> str:
43
+ """
44
+ Produce a one-line human-readable explanation of why this file ranked here.
45
+ Uses the score breakdown from RankedFile plus graph context.
46
+ """
47
+ parts = []
48
+
49
+ # Filename / path signal
50
+ if ranked.keyword_score >= 0.8:
51
+ parts.append("Filename strongly matches query terms")
52
+ elif ranked.keyword_score >= 0.4:
53
+ parts.append("Filename partially matches query terms")
54
+
55
+ # Content signal
56
+ if ranked.content_score >= 0.8:
57
+ parts.append("file contents heavily reference query terms")
58
+ elif ranked.content_score >= 0.4:
59
+ parts.append("file contents mention query terms")
60
+
61
+ # Graph context
62
+ if ranked.neighbor_bonus > 0.05:
63
+ try:
64
+ dependents = repo_graph.get_dependents(ranked.path)
65
+ if dependents:
66
+ short_names = [Path(d).name for d in dependents[:2]]
67
+ parts.append(f"imported by {', '.join(short_names)}")
68
+ except Exception:
69
+ pass
70
+
71
+ # Centrality signal
72
+ if ranked.centrality > 0.05:
73
+ parts.append("highly connected file in the dependency graph")
74
+
75
+ # Terms matched
76
+ if ranked.matched_terms:
77
+ term_str = ", ".join(f'"{t}"' for t in sorted(ranked.matched_terms)[:4])
78
+ parts.append(f"matched terms: {term_str}")
79
+
80
+ if not parts:
81
+ return "Low-signal match based on graph proximity"
82
+
83
+ return "; ".join(parts).capitalize() + "."
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Output formatters
88
+ # ---------------------------------------------------------------------------
89
+ def _format_json(query_str: str, repo_path: str, results: list[RankedFile], repo_graph) -> str:
90
+ """Render results as a clean JSON string."""
91
+ output = {
92
+ "query": query_str,
93
+ "repo": str(Path(repo_path).resolve()),
94
+ "results": [
95
+ {
96
+ "rank": i + 1,
97
+ "path": r.path,
98
+ "score": round(r.total_score, 4),
99
+ "confidence": _confidence(r.total_score),
100
+ "matched_terms": sorted(r.matched_terms),
101
+ "reasoning": _reasoning(r, repo_graph),
102
+ }
103
+ for i, r in enumerate(results)
104
+ ],
105
+ }
106
+ return json.dumps(output, indent=2)
107
+
108
+
109
+ def _format_human(query_str: str, repo_path: str, results: list[RankedFile], repo_graph, elapsed: float) -> str:
110
+ """Render results as styled, human-readable console output."""
111
+ BOLD = "\033[1m"
112
+ DIM = "\033[2m"
113
+ GREEN = "\033[32m"
114
+ YELLOW= "\033[33m"
115
+ RED = "\033[31m"
116
+ CYAN = "\033[36m"
117
+ RESET = "\033[0m"
118
+
119
+ CONF_COLOR = {"high": GREEN, "medium": YELLOW, "low": RED}
120
+
121
+ lines = [
122
+ "",
123
+ f"{BOLD}Repository Intelligence Engine{RESET}",
124
+ f"{DIM}{'─' * 60}{RESET}",
125
+ f" Query : {BOLD}{query_str}{RESET}",
126
+ f" Repo : {repo_path}",
127
+ f" Found : {len(results)} result(s) {DIM}({elapsed:.2f}s){RESET}",
128
+ f"{DIM}{'─' * 60}{RESET}",
129
+ "",
130
+ ]
131
+
132
+ for i, r in enumerate(results, 1):
133
+ conf = _confidence(r.total_score)
134
+ color = CONF_COLOR.get(conf, RESET)
135
+ badge = f"{color}[{conf.upper()}]{RESET}"
136
+
137
+ terms = ", ".join(sorted(r.matched_terms)) if r.matched_terms else "—"
138
+ reason = _reasoning(r, repo_graph)
139
+
140
+ lines += [
141
+ f" {BOLD}#{i}{RESET} {CYAN}{r.path}{RESET} {badge}",
142
+ f" Score : {r.total_score:.4f} "
143
+ f"(kw={r.keyword_score:.3f} "
144
+ f"content={r.content_score:.3f} "
145
+ f"neighbor={r.neighbor_bonus:.3f} "
146
+ f"pr={r.centrality:.4f})",
147
+ f" Terms : {terms}",
148
+ f" Reason : {DIM}{reason}{RESET}",
149
+ "",
150
+ ]
151
+
152
+ return "\n".join(lines)
153
+
154
+
155
+ # ---------------------------------------------------------------------------
156
+ # Main pipeline
157
+ # ---------------------------------------------------------------------------
158
+ def run_engine(repo_path: str, query_str: str, top_n: int):
159
+ """
160
+ Execute the full pipeline and return (results, repo_graph, elapsed_seconds).
161
+ Raises SystemExit on unrecoverable errors.
162
+ """
163
+ t0 = time.perf_counter()
164
+
165
+ try:
166
+ scan = scan_repo(repo_path)
167
+ except ValueError as e:
168
+ print(f"Error: {e}", file=sys.stderr)
169
+ sys.exit(1)
170
+
171
+ if scan.total_files == 0:
172
+ print("Warning: No source files found in the repository.", file=sys.stderr)
173
+
174
+ parse = parse_imports(scan)
175
+ repo_graph = build_graph(scan, parse)
176
+ results = query(query_str, repo_graph, top_n=top_n)
177
+
178
+ elapsed = time.perf_counter() - t0
179
+ return results, repo_graph, elapsed
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # CLI
184
+ # ---------------------------------------------------------------------------
185
+ def build_parser() -> argparse.ArgumentParser:
186
+ parser = argparse.ArgumentParser(
187
+ prog="prune",
188
+ description=(
189
+ "Repository Intelligence Engine — find the most relevant files "
190
+ "for a given change request without LLMs or embeddings."
191
+ ),
192
+ formatter_class=argparse.RawDescriptionHelpFormatter,
193
+ epilog=__doc__,
194
+ )
195
+ parser.add_argument("repo_path", help="Path to the repository root")
196
+ parser.add_argument("query", help="Natural-language query (e.g. 'fix the upload error handler')")
197
+ parser.add_argument(
198
+ "--top", "-n",
199
+ type=int,
200
+ default=5,
201
+ metavar="N",
202
+ help="Number of results to return (default: 5)",
203
+ )
204
+ parser.add_argument(
205
+ "--json",
206
+ action="store_true",
207
+ default=False,
208
+ help="Output clean JSON instead of human-readable text",
209
+ )
210
+ return parser
211
+
212
+
213
+ def main():
214
+ parser = build_parser()
215
+ args = parser.parse_args()
216
+
217
+ results, repo_graph, elapsed = run_engine(args.repo_path, args.query, args.top)
218
+
219
+ if not results:
220
+ if args.json:
221
+ print(json.dumps({
222
+ "query": args.query,
223
+ "repo": str(Path(args.repo_path).resolve()),
224
+ "results": [],
225
+ }, indent=2))
226
+ else:
227
+ print(f"\nNo relevant files found for: '{args.query}'")
228
+ sys.exit(0)
229
+
230
+ if args.json:
231
+ print(_format_json(args.query, args.repo_path, results, repo_graph))
232
+ else:
233
+ print(_format_human(args.query, args.repo_path, results, repo_graph, elapsed))
234
+
235
+
236
+ if __name__ == "__main__":
237
+ main()