faultlines 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. faultline/__init__.py +1 -0
  2. faultline/analyzer/__init__.py +0 -0
  3. faultline/analyzer/ast_extractor.py +354 -0
  4. faultline/analyzer/cochange_detector.py +198 -0
  5. faultline/analyzer/coverage.py +66 -0
  6. faultline/analyzer/evolve.py +664 -0
  7. faultline/analyzer/features.py +1330 -0
  8. faultline/analyzer/git.py +251 -0
  9. faultline/analyzer/import_graph.py +711 -0
  10. faultline/analyzer/incremental.py +201 -0
  11. faultline/analyzer/repo_classifier.py +355 -0
  12. faultline/analyzer/shared_files.py +121 -0
  13. faultline/analyzer/validation.py +232 -0
  14. faultline/analyzer/workspace.py +372 -0
  15. faultline/cli.py +1518 -0
  16. faultline/digest/__init__.py +2 -0
  17. faultline/digest/__main__.py +4 -0
  18. faultline/digest/cli.py +89 -0
  19. faultline/digest/git_reader.py +160 -0
  20. faultline/digest/summarizer.py +250 -0
  21. faultline/integrations/__init__.py +13 -0
  22. faultline/integrations/base.py +213 -0
  23. faultline/integrations/posthog_provider.py +191 -0
  24. faultline/integrations/sentry_provider.py +171 -0
  25. faultline/llm/__init__.py +0 -0
  26. faultline/llm/cost.py +280 -0
  27. faultline/llm/deepseek_client.py +148 -0
  28. faultline/llm/detector.py +4222 -0
  29. faultline/llm/flow_detector.py +933 -0
  30. faultline/llm/pipeline.py +201 -0
  31. faultline/llm/sonnet_scanner.py +1557 -0
  32. faultline/models/__init__.py +0 -0
  33. faultline/models/types.py +98 -0
  34. faultline/output/__init__.py +0 -0
  35. faultline/output/reporter.py +245 -0
  36. faultline/output/writer.py +38 -0
  37. faultlines-0.1.0.dist-info/METADATA +287 -0
  38. faultlines-0.1.0.dist-info/RECORD +41 -0
  39. faultlines-0.1.0.dist-info/WHEEL +4 -0
  40. faultlines-0.1.0.dist-info/entry_points.txt +2 -0
  41. faultlines-0.1.0.dist-info/licenses/LICENSE +183 -0
faultline/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,354 @@
1
+ """
2
+ Regex-based signature extractor for TypeScript and JavaScript files.
3
+
4
+ Extracts exports, route definitions, and imports from each file
5
+ without any external AST dependencies. This "skeleton" is then
6
+ fed to an LLM to identify user-facing flows within each feature.
7
+
8
+ Supported patterns:
9
+ - Named exports: export function Foo / export const Foo / export class Foo
10
+ - Default exports: export default function Foo / export default class Foo
11
+ - Re-exports: export { Foo, Bar }
12
+ - Next.js routes: export async function GET/POST/PUT/DELETE/PATCH (App Router)
13
+ - Next.js pages: getServerSideProps, getStaticProps (Pages Router)
14
+ - Express routes: router.get('/path', ...) / app.post('/path', ...)
15
+ - ES imports: import X from 'Y'
16
+ """
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from pathlib import Path
20
+
21
+ from faultline.models.types import SymbolRange
22
+
23
+
24
+ _TS_JS_EXTENSIONS = {".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"}
25
+ _PYTHON_EXTENSIONS = {".py"}
26
+
27
+ # Named function/class/const exports
28
+ _RE_NAMED_EXPORT = re.compile(
29
+ r"export\s+(?:async\s+)?(?:function\s*\*?\s*|class\s+|const\s+|let\s+|var\s+)(\w+)"
30
+ )
31
+ # Default function/class exports with a name
32
+ _RE_DEFAULT_EXPORT = re.compile(
33
+ r"export\s+default\s+(?:async\s+)?(?:function|class)\s+(\w+)"
34
+ )
35
+ # Re-export block: export { Foo, Bar as Baz }
36
+ _RE_REEXPORT = re.compile(r"export\s*\{([^}]+)\}")
37
+
38
+ # Next.js App Router HTTP method handlers
39
+ _RE_NEXTJS_ROUTE = re.compile(
40
+ r"export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b"
41
+ )
42
+ # Next.js Pages Router data fetchers
43
+ _RE_NEXTJS_PAGE = re.compile(
44
+ r"export\s+(?:async\s+)?function\s+(getServerSideProps|getStaticProps|getStaticPaths)\b"
45
+ )
46
+ # Express/Fastify route definitions: router.get('/path', ...) or app.post('/path')
47
+ _RE_EXPRESS_ROUTE = re.compile(
48
+ r"\b(?:router|app|server)\s*\.\s*(get|post|put|delete|patch|head)\s*\(\s*['\"]([^'\"]+)['\"]"
49
+ )
50
+ # ES6 import paths
51
+ _RE_IMPORT = re.compile(r"import\s+.*?from\s+['\"]([^'\"]+)['\"]")
52
+
53
+ # Python patterns
54
+ _RE_PYTHON_CLASS = re.compile(r"^class\s+(\w+)", re.MULTILINE)
55
+ _RE_PYTHON_FUNC = re.compile(r"^(?:async\s+)?def\s+([a-zA-Z]\w*)", re.MULTILINE)
56
+ _RE_PYTHON_ROUTE = re.compile(
57
+ r"@\w*(?:router|app|blueprint|bp|api)\s*\.\s*(get|post|put|delete|patch)\s*\(\s*['\"]([^'\"]+)['\"]",
58
+ re.IGNORECASE,
59
+ )
60
+
61
+
62
+ # Named import destructuring: import { FOO, BAR as Baz } from './path'
63
+ _RE_NAMED_IMPORT = re.compile(
64
+ r"import\s*\{([^}]+)\}\s*from\s*['\"]([^'\"]+)['\"]"
65
+ )
66
+ # Namespace import: import * as X from './path'
67
+ _RE_NAMESPACE_IMPORT = re.compile(
68
+ r"import\s*\*\s*as\s+\w+\s+from\s*['\"]([^'\"]+)['\"]"
69
+ )
70
+
71
+ # TS type/interface/enum exports
72
+ _RE_TYPE_EXPORT = re.compile(
73
+ r"export\s+(?:declare\s+)?(?:type|interface|enum)\s+(\w+)"
74
+ )
75
+
76
+
77
+ @dataclass
78
+ class FileSignature:
79
+ path: str
80
+ exports: list[str] = field(default_factory=list)
81
+ routes: list[str] = field(default_factory=list)
82
+ imports: list[str] = field(default_factory=list)
83
+ symbol_ranges: list[SymbolRange] = field(default_factory=list)
84
+ source: str = field(default="", repr=False)
85
+
86
+ def is_empty(self) -> bool:
87
+ return not self.exports and not self.routes and not self.imports
88
+
89
+ def to_prompt_line(self) -> str:
90
+ """Formats the signature as a single line for LLM prompts."""
91
+ parts = []
92
+ if self.exports:
93
+ parts.append(f"exports: {', '.join(self.exports[:8])}")
94
+ if self.routes:
95
+ parts.append(f"routes: {', '.join(self.routes[:5])}")
96
+ if not parts:
97
+ return ""
98
+ return f" {self.path} → {' | '.join(parts)}"
99
+
100
+
101
+ def extract_signatures(
102
+ files: list[str],
103
+ repo_path: str,
104
+ ) -> dict[str, FileSignature]:
105
+ """
106
+ Extracts function/route/import signatures from TypeScript and JavaScript files.
107
+
108
+ Args:
109
+ files: List of relative file paths (relative to repo_path).
110
+ repo_path: Absolute path to the repository root.
111
+
112
+ Returns:
113
+ Dict mapping relative file path → FileSignature.
114
+ Non-TS/JS files are skipped and not included in the result.
115
+ """
116
+ result: dict[str, FileSignature] = {}
117
+ root = Path(repo_path)
118
+
119
+ for rel_path in files:
120
+ suffix = Path(rel_path).suffix.lower()
121
+ if suffix not in _TS_JS_EXTENSIONS and suffix not in _PYTHON_EXTENSIONS:
122
+ continue
123
+ abs_path = root / rel_path
124
+ try:
125
+ source = abs_path.read_text(encoding="utf-8", errors="ignore")
126
+ except OSError:
127
+ continue
128
+
129
+ if suffix in _PYTHON_EXTENSIONS:
130
+ sig = _parse_python_file(rel_path, source)
131
+ else:
132
+ sig = _parse_file(rel_path, source)
133
+ sig.symbol_ranges = extract_symbol_ranges(source)
134
+ sig.source = source
135
+
136
+ if not sig.is_empty():
137
+ result[rel_path] = sig
138
+
139
+ return result
140
+
141
+
142
+ def _parse_file(rel_path: str, source: str) -> FileSignature:
143
+ sig = FileSignature(path=rel_path)
144
+
145
+ # Collect named exports
146
+ seen_exports: set[str] = set()
147
+
148
+ for match in _RE_NAMED_EXPORT.finditer(source):
149
+ name = match.group(1)
150
+ if name not in seen_exports:
151
+ seen_exports.add(name)
152
+ sig.exports.append(name)
153
+
154
+ for match in _RE_DEFAULT_EXPORT.finditer(source):
155
+ name = match.group(1)
156
+ if name not in seen_exports:
157
+ seen_exports.add(name)
158
+ sig.exports.append(name)
159
+
160
+ for match in _RE_REEXPORT.finditer(source):
161
+ for token in match.group(1).split(","):
162
+ # Handle "Foo as Bar" → take the exported name "Bar"
163
+ parts = token.strip().split(" as ")
164
+ name = parts[-1].strip()
165
+ if name and name not in seen_exports:
166
+ seen_exports.add(name)
167
+ sig.exports.append(name)
168
+
169
+ # Collect route definitions
170
+ for match in _RE_NEXTJS_ROUTE.finditer(source):
171
+ method = match.group(1)
172
+ # Infer path from the file path for App Router (files live at the route path)
173
+ route_path = _infer_nextjs_route_path(rel_path)
174
+ sig.routes.append(f"{method} {route_path}")
175
+
176
+ for match in _RE_NEXTJS_PAGE.finditer(source):
177
+ sig.routes.append(match.group(1))
178
+
179
+ for match in _RE_EXPRESS_ROUTE.finditer(source):
180
+ method = match.group(1).upper()
181
+ path = match.group(2)
182
+ sig.routes.append(f"{method} {path}")
183
+
184
+ # Collect imports (only internal/relative, skip node_modules)
185
+ for match in _RE_IMPORT.finditer(source):
186
+ src = match.group(1)
187
+ if src.startswith(".") or src.startswith("@/") or src.startswith("~/"):
188
+ sig.imports.append(src)
189
+
190
+ return sig
191
+
192
+
193
+ def _parse_python_file(rel_path: str, source: str) -> FileSignature:
194
+ sig = FileSignature(path=rel_path)
195
+ seen: set[str] = set()
196
+
197
+ for match in _RE_PYTHON_CLASS.finditer(source):
198
+ name = match.group(1)
199
+ if name not in seen:
200
+ seen.add(name)
201
+ sig.exports.append(name)
202
+
203
+ for match in _RE_PYTHON_FUNC.finditer(source):
204
+ name = match.group(1)
205
+ if name not in seen:
206
+ seen.add(name)
207
+ sig.exports.append(name)
208
+
209
+ for match in _RE_PYTHON_ROUTE.finditer(source):
210
+ method = match.group(1).upper()
211
+ path = match.group(2)
212
+ sig.routes.append(f"{method} {path}")
213
+
214
+ return sig
215
+
216
+
217
+ def _infer_nextjs_route_path(rel_path: str) -> str:
218
+ """
219
+ Infers the Next.js API route path from the file's relative path.
220
+
221
+ Examples:
222
+ app/api/auth/login/route.ts → /api/auth/login
223
+ pages/api/auth.ts → /api/auth
224
+ src/app/api/users/route.ts → /api/users
225
+ """
226
+ p = Path(rel_path)
227
+ parts = p.parts
228
+
229
+ # Drop leading src/, app/ wrappers
230
+ skip = {"src", "app"}
231
+ start = 0
232
+ for i, part in enumerate(parts):
233
+ if part not in skip:
234
+ start = i
235
+ break
236
+
237
+ trimmed = parts[start:]
238
+
239
+ # Drop trailing "route.ts" filename
240
+ if trimmed and Path(trimmed[-1]).stem == "route":
241
+ trimmed = trimmed[:-1]
242
+ else:
243
+ # Drop the filename extension for pages/api style
244
+ trimmed = trimmed[:-1] + (Path(trimmed[-1]).stem,) if trimmed else trimmed
245
+
246
+ return "/" + "/".join(trimmed) if trimmed else "/"
247
+
248
+
249
+ def extract_symbol_ranges(source: str) -> list[SymbolRange]:
250
+ """Extracts line ranges for each exported symbol in TS/JS source.
251
+
252
+ MVP heuristic: each export's end_line = next export's start_line - 1,
253
+ or EOF for the last export. This avoids complex brace-balancing but
254
+ gives reasonable line attribution for most files.
255
+ """
256
+ total_lines = source.count("\n") + 1
257
+ # Collect all export positions with their symbol names and kinds
258
+ exports: list[tuple[int, str, str]] = [] # (start_line, name, kind)
259
+
260
+ for match in _RE_NAMED_EXPORT.finditer(source):
261
+ line = source[:match.start()].count("\n") + 1
262
+ name = match.group(1)
263
+ # Determine kind from the keyword before the name
264
+ text = source[match.start():match.end()]
265
+ if "function" in text:
266
+ kind = "function"
267
+ elif "class" in text:
268
+ kind = "class"
269
+ else:
270
+ kind = "const"
271
+ exports.append((line, name, kind))
272
+
273
+ for match in _RE_DEFAULT_EXPORT.finditer(source):
274
+ line = source[:match.start()].count("\n") + 1
275
+ name = match.group(1)
276
+ text = source[match.start():match.end()]
277
+ kind = "class" if "class" in text else "function"
278
+ exports.append((line, name, kind))
279
+
280
+ for match in _RE_TYPE_EXPORT.finditer(source):
281
+ line = source[:match.start()].count("\n") + 1
282
+ name = match.group(1)
283
+ text = source[match.start():match.end()]
284
+ if "enum" in text:
285
+ kind = "enum"
286
+ elif "interface" in text:
287
+ kind = "type"
288
+ else:
289
+ kind = "type"
290
+ exports.append((line, name, kind))
291
+
292
+ for match in _RE_REEXPORT.finditer(source):
293
+ line = source[:match.start()].count("\n") + 1
294
+ for token in match.group(1).split(","):
295
+ parts = token.strip().split(" as ")
296
+ name = parts[-1].strip()
297
+ if name:
298
+ exports.append((line, name, "reexport"))
299
+
300
+ if not exports:
301
+ return []
302
+
303
+ # Sort by start_line, deduplicate by name (keep first occurrence)
304
+ exports.sort(key=lambda x: x[0])
305
+ seen: set[str] = set()
306
+ unique: list[tuple[int, str, str]] = []
307
+ for start, name, kind in exports:
308
+ if name not in seen:
309
+ seen.add(name)
310
+ unique.append((start, name, kind))
311
+
312
+ # Assign end_line: next export's start_line - 1, or EOF for last
313
+ ranges = []
314
+ for i, (start, name, kind) in enumerate(unique):
315
+ if i + 1 < len(unique):
316
+ end = unique[i + 1][0] - 1
317
+ else:
318
+ end = total_lines
319
+ ranges.append(SymbolRange(
320
+ name=name, start_line=start, end_line=max(start, end), kind=kind,
321
+ ))
322
+
323
+ return ranges
324
+
325
+
326
+ def extract_named_imports(source: str) -> dict[str, set[str]]:
327
+ """Extracts named imports from TS/JS source.
328
+
329
+ Returns:
330
+ Dict mapping module path → set of imported symbol names.
331
+ For namespace imports (import * as X), returns {"*"} as the symbol set.
332
+ """
333
+ result: dict[str, set[str]] = {}
334
+
335
+ for match in _RE_NAMED_IMPORT.finditer(source):
336
+ names_str = match.group(1)
337
+ module = match.group(2)
338
+ if not (module.startswith(".") or module.startswith("@/") or module.startswith("~/")):
339
+ continue
340
+ names = set()
341
+ for token in names_str.split(","):
342
+ parts = token.strip().split(" as ")
343
+ original = parts[0].strip()
344
+ if original:
345
+ names.add(original)
346
+ if names:
347
+ result.setdefault(module, set()).update(names)
348
+
349
+ for match in _RE_NAMESPACE_IMPORT.finditer(source):
350
+ module = match.group(1)
351
+ if module.startswith(".") or module.startswith("@/") or module.startswith("~/"):
352
+ result.setdefault(module, set()).add("*")
353
+
354
+ return result
@@ -0,0 +1,198 @@
1
+ """Deterministic feature detection via co-change community detection.
2
+
3
+ Files that frequently change together in git history are grouped into the same
4
+ feature using Union-Find. This is the primary detection algorithm.
5
+
6
+ Same git history → same groups every time (100% deterministic).
7
+
8
+ When --llm is enabled, the LLM *names* the groups (but does not determine them).
9
+ Results are cached, so repeated runs return identical names.
10
+ """
11
+
12
+ from collections import defaultdict
13
+ from itertools import combinations
14
+ from pathlib import Path
15
+
16
+ from faultline.models.types import Commit
17
+
18
+ # Minimum commits in history to trust co-change signal.
19
+ # Below this threshold the caller falls back to directory heuristics.
20
+ _MIN_COMMITS_FOR_COCHANGE = 50
21
+
22
+ # Jaccard coupling threshold for merging two files into the same feature.
23
+ # Jaccard = commits_touching_both / commits_touching_either.
24
+ # 0.20 means "these files change together in ≥20% of commits that touch either one".
25
+ _COCHANGE_THRESHOLD = 0.20
26
+
27
+ # Commits that touch more files than this are excluded (bulk ops / large refactors).
28
+ _MAX_FILES_PER_COMMIT = 30
29
+
30
+ # A file must appear in at least this many commits to participate in coupling.
31
+ # Files edited only once produce noisy pairs.
32
+ _MIN_FILE_COMMITS = 2
33
+
34
+ # Directory names that are generic structural wrappers, not business feature names.
35
+ _SKIP_DIRS = {
36
+ "src", "app", "lib", "pkg", "internal", "core",
37
+ "views", "pages", "screens", "routes", "containers",
38
+ "components", "layouts", "features",
39
+ }
40
+
41
+
42
+ class _UnionFind:
43
+ """Path-compressed, union-by-rank disjoint set data structure."""
44
+
45
+ def __init__(self, nodes: list[str]) -> None:
46
+ self._parent: dict[str, str] = {n: n for n in nodes}
47
+ self._rank: dict[str, int] = defaultdict(int)
48
+
49
+ def find(self, x: str) -> str:
50
+ if self._parent[x] != x:
51
+ self._parent[x] = self.find(self._parent[x]) # path compression
52
+ return self._parent[x]
53
+
54
+ def union(self, x: str, y: str) -> None:
55
+ rx, ry = self.find(x), self.find(y)
56
+ if rx == ry:
57
+ return
58
+ if self._rank[rx] < self._rank[ry]:
59
+ rx, ry = ry, rx
60
+ self._parent[ry] = rx
61
+ if self._rank[rx] == self._rank[ry]:
62
+ self._rank[rx] += 1
63
+
64
+ def groups(self) -> dict[str, list[str]]:
65
+ clusters: dict[str, list[str]] = defaultdict(list)
66
+ for node in self._parent:
67
+ clusters[self.find(node)].append(node)
68
+ return dict(clusters)
69
+
70
+
71
+ def detect_features_from_cochange(
72
+ files: list[str],
73
+ commits: list[Commit],
74
+ ) -> dict[str, list[str]] | None:
75
+ """Groups files into features based on co-change patterns.
76
+
77
+ Returns None when there are fewer than _MIN_COMMITS_FOR_COCHANGE commits —
78
+ the caller should fall back to directory-based heuristics in that case.
79
+ Also returns None if the resulting mapping is empty (no co-change signal).
80
+
81
+ The returned dict maps feature_name → list of file paths.
82
+ Names are directory-derived; pass the result to name_clusters_llm() or
83
+ name_clusters_ollama() to replace them with semantic business domain names.
84
+
85
+ Args:
86
+ files: Tracked file paths (relative, with path prefix already stripped).
87
+ commits: Commit history for the analysis window.
88
+ """
89
+ if len(commits) < _MIN_COMMITS_FOR_COCHANGE:
90
+ return None
91
+
92
+ file_set = set(files)
93
+
94
+ # Index: file → set of commit SHAs (non-bulk commits only)
95
+ file_commits: dict[str, set[str]] = defaultdict(set)
96
+ for commit in commits:
97
+ if len(commit.files_changed) > _MAX_FILES_PER_COMMIT:
98
+ continue
99
+ for f in commit.files_changed:
100
+ if f in file_set:
101
+ file_commits[f].add(commit.sha)
102
+
103
+ # Inverted index: commit SHA → files (for efficient O(k²) pair counting)
104
+ commit_to_files: dict[str, list[str]] = defaultdict(list)
105
+ for f, shas in file_commits.items():
106
+ if len(shas) < _MIN_FILE_COMMITS:
107
+ continue # too few appearances — unreliable signal
108
+ for sha in shas:
109
+ commit_to_files[sha].append(f)
110
+
111
+ # Count co-occurrences for each file pair
112
+ pair_both: dict[tuple[str, str], int] = defaultdict(int)
113
+ for touched in commit_to_files.values():
114
+ if len(touched) < 2:
115
+ continue
116
+ for f1, f2 in combinations(sorted(touched), 2):
117
+ pair_both[(f1, f2)] += 1
118
+
119
+ # Union-Find: merge files whose Jaccard score meets the threshold
120
+ uf = _UnionFind(files)
121
+ for (f1, f2), both in pair_both.items():
122
+ a = len(file_commits.get(f1, set()))
123
+ b = len(file_commits.get(f2, set()))
124
+ denom = a + b - both
125
+ if denom > 0 and both / denom >= _COCHANGE_THRESHOLD:
126
+ uf.union(f1, f2)
127
+
128
+ result = _finalize_clusters(uf.groups())
129
+ return result if result else None
130
+
131
+
132
+ def _finalize_clusters(
133
+ raw_groups: dict[str, list[str]],
134
+ ) -> dict[str, list[str]]:
135
+ """Converts raw Union-Find groups to named feature clusters.
136
+
137
+ Multi-file clusters receive a directory-derived name.
138
+ Singleton clusters are merged into a same-directory cluster if one exists,
139
+ or grouped together under a shared directory name.
140
+ """
141
+ multi: dict[str, list[str]] = {}
142
+ singletons: list[str] = []
143
+
144
+ for members in raw_groups.values():
145
+ members_sorted = sorted(members)
146
+ if len(members_sorted) >= 2:
147
+ name = _cluster_name(members_sorted)
148
+ name = _unique_name(name, multi)
149
+ multi[name] = members_sorted
150
+ else:
151
+ singletons.extend(members_sorted)
152
+
153
+ # Build dir → cluster index so singletons can be absorbed
154
+ dir_to_cluster: dict[str, str] = {}
155
+ for cluster_name, members in multi.items():
156
+ for f in members:
157
+ dir_to_cluster[str(Path(f).parent)] = cluster_name
158
+
159
+ # Assign singletons: merge into same-dir cluster or bucket by dir name
160
+ dir_orphans: dict[str, list[str]] = defaultdict(list)
161
+ for f in singletons:
162
+ d = str(Path(f).parent)
163
+ if d in dir_to_cluster:
164
+ multi[dir_to_cluster[d]].append(f)
165
+ else:
166
+ dir_orphans[_feature_name_from_path(f)].append(f)
167
+
168
+ for name, fs in dir_orphans.items():
169
+ name = _unique_name(name, multi)
170
+ multi[name] = sorted(fs)
171
+
172
+ return multi
173
+
174
+
175
+ def _cluster_name(files: list[str]) -> str:
176
+ """Derives a cluster name from the most common meaningful directory component."""
177
+ counts: dict[str, int] = defaultdict(int)
178
+ for f in files:
179
+ counts[_feature_name_from_path(f)] += 1
180
+ return max(counts, key=lambda k: counts[k])
181
+
182
+
183
+ def _feature_name_from_path(path: str) -> str:
184
+ """Extracts the first non-generic directory component as a feature name."""
185
+ for part in Path(path).parts[:-1]:
186
+ if part.lower() not in _SKIP_DIRS:
187
+ return part.lower()
188
+ return "root"
189
+
190
+
191
+ def _unique_name(name: str, existing: dict) -> str:
192
+ """Returns a unique name by appending a numeric suffix if needed."""
193
+ if name not in existing:
194
+ return name
195
+ suffix = 2
196
+ while f"{name}-{suffix}" in existing:
197
+ suffix += 1
198
+ return f"{name}-{suffix}"
@@ -0,0 +1,66 @@
1
+ """Reads test coverage data from standard coverage file formats."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+
7
+ def read_coverage(repo_path: str, coverage_path: str | None = None) -> dict[str, float]:
8
+ """
9
+ Returns file_path → line coverage % (0–100).
10
+
11
+ If coverage_path is provided, reads that file directly (lcov or jest format).
12
+ Otherwise tries coverage/coverage-summary.json (Jest/NYC) then coverage/lcov.info.
13
+ Returns empty dict if no coverage data found.
14
+ """
15
+ if coverage_path:
16
+ p = Path(coverage_path)
17
+ if not p.exists():
18
+ return {}
19
+ if p.name.endswith(".json"):
20
+ return _read_jest(p)
21
+ return _read_lcov(p)
22
+
23
+ root = Path(repo_path)
24
+ # Auto-detect: check common locations
25
+ candidates = [
26
+ root / "coverage" / "coverage-summary.json",
27
+ root / "coverage" / "lcov.info",
28
+ root / "lcov.info",
29
+ root / "coverage.lcov",
30
+ ]
31
+ for candidate in candidates:
32
+ if candidate.exists():
33
+ if candidate.name.endswith(".json"):
34
+ return _read_jest(candidate)
35
+ return _read_lcov(candidate)
36
+ return {}
37
+
38
+
39
+ def _read_jest(path: Path) -> dict[str, float]:
40
+ data = json.loads(path.read_text())
41
+ result: dict[str, float] = {}
42
+ for file_path, stats in data.items():
43
+ if file_path == "total":
44
+ continue
45
+ pct = (stats.get("lines") or {}).get("pct")
46
+ if pct is not None:
47
+ result[str(file_path)] = float(pct)
48
+ return result
49
+
50
+
51
+ def _read_lcov(path: Path) -> dict[str, float]:
52
+ result: dict[str, float] = {}
53
+ current: str | None = None
54
+ lf = lh = 0
55
+ for line in path.read_text().splitlines():
56
+ if line.startswith("SF:"):
57
+ current = line[3:]
58
+ lf = lh = 0
59
+ elif line.startswith("LF:"):
60
+ lf = int(line[3:])
61
+ elif line.startswith("LH:"):
62
+ lh = int(line[3:])
63
+ elif line == "end_of_record" and current:
64
+ result[current] = round(lh / lf * 100, 1) if lf > 0 else 0.0
65
+ current = None
66
+ return result