devarch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import ast
5
+ from pathlib import Path
6
+
7
+ from ..models import Artifact
8
+ from ..utils.fs import path_kind, read_text
9
+
10
+
11
+ def complexity_from_text(content: str) -> int:
12
+ score = 1
13
+ for token in ("if ", "elif ", "for ", "while ", " and ", " or ", "case ", "except ", "?", "match "):
14
+ score += content.count(token)
15
+ return score
16
+
17
+
18
+ def dependency_count(content: str) -> int:
19
+ count = 0
20
+ for line in content.splitlines():
21
+ line = line.strip()
22
+ if line.startswith("import ") or line.startswith("from "):
23
+ count += 1
24
+ if line.startswith("require(") or "import " in line:
25
+ count += 1
26
+ return count
27
+
28
+
29
+ def find_monsters(
30
+ files: list[Path],
31
+ max_lines: int = 800,
32
+ complexity_threshold: int = 35,
33
+ dependency_threshold: int = 25,
34
+ ) -> list[Artifact]:
35
+ artifacts: list[Artifact] = []
36
+ for path in files:
37
+ if path_kind(path) != "text":
38
+ continue
39
+ content = read_text(path)
40
+ lines = content.count("\n") + 1
41
+ complexity = complexity_from_text(content)
42
+ deps = dependency_count(content)
43
+ if lines >= max_lines or complexity >= complexity_threshold or deps >= dependency_threshold:
44
+ threat = "Severe" if lines >= max_lines * 2 or complexity >= complexity_threshold * 2 or deps >= dependency_threshold * 2 else "High"
45
+ details = []
46
+ if lines >= max_lines:
47
+ details.append(f"lines={lines}")
48
+ if complexity >= complexity_threshold:
49
+ details.append(f"complexity={complexity}")
50
+ if deps >= dependency_threshold:
51
+ details.append(f"dependencies={deps}")
52
+ artifacts.append(
53
+ Artifact(
54
+ path=path,
55
+ kind="monster_file",
56
+ risk=threat,
57
+ detail=", ".join(details),
58
+ confidence=0.9,
59
+ metadata={"lines": lines, "complexity": complexity, "dependencies": deps},
60
+ )
61
+ )
62
+ return artifacts
@@ -0,0 +1,338 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter, defaultdict
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ import re
7
+
8
+ from ..models import Artifact
9
+ from ..scanner.discovery import build_reference_map, build_text_index
10
+ from ..utils.fs import RepoView, collect_repository, path_kind
11
+
12
+
13
+ @dataclass(slots=True)
14
+ class CleanupPriority:
15
+ level: int
16
+ items: list[str] = field(default_factory=list)
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class DeletionAnalysis:
21
+ path: Path
22
+ safe_confidence: float
23
+ affected_files: int
24
+ recommendation: str
25
+ references: list[Path] = field(default_factory=list)
26
+ dependencies: list[Path] = field(default_factory=list)
27
+
28
+
29
+ @dataclass(slots=True)
30
+ class RefactorCandidate:
31
+ name: str
32
+ locations: list[Path]
33
+ recommendation: str
34
+ confidence: float
35
+
36
+
37
+ @dataclass(slots=True)
38
+ class RouteFinding:
39
+ kind: str
40
+ path: Path
41
+ detail: str
42
+ confidence: float
43
+
44
+
45
+ @dataclass(slots=True)
46
+ class ConfigFinding:
47
+ kind: str
48
+ name: str
49
+ confidence: float
50
+ locations: list[Path] = field(default_factory=list)
51
+
52
+
53
+ @dataclass(slots=True)
54
+ class MigrationFinding:
55
+ path: Path
56
+ kind: str
57
+ status: str
58
+ confidence: float
59
+
60
+
61
+ @dataclass(slots=True)
62
+ class DependencyWarning:
63
+ name: str
64
+ only_used_for: int
65
+ recommendation: str
66
+ confidence: float
67
+
68
+
69
+ @dataclass(slots=True)
70
+ class DriftReport:
71
+ original: str
72
+ current: str
73
+ severity: str
74
+ cause: str
75
+
76
+
77
+ @dataclass(slots=True)
78
+ class PRReport:
79
+ removed: list[str] = field(default_factory=list)
80
+ reduced: list[str] = field(default_factory=list)
81
+ improved: list[str] = field(default_factory=list)
82
+
83
+
84
+ @dataclass(slots=True)
85
+ class StatusSummary:
86
+ debt: int
87
+ complexity: int
88
+ dead_code: int
89
+ route_count: int
90
+ dependency_count: int
91
+ cleanup_opportunities: int
92
+ recommendations: list[str] = field(default_factory=list)
93
+
94
+
95
+ def _text_files(view: RepoView) -> list[Path]:
96
+ return [path for path in view.files if path_kind(path) == "text"]
97
+
98
+
99
+ def _get_summary(analysis_or_summary):
100
+ return getattr(analysis_or_summary, "summary", analysis_or_summary)
101
+
102
+
103
+ def _get_intelligence(analysis_or_intelligence):
104
+ return getattr(analysis_or_intelligence, "intelligence", analysis_or_intelligence)
105
+
106
+
107
+ def build_cleanup_plan(analysis_or_summary, priorities: int = 3) -> list[CleanupPriority]:
108
+ summary = _get_summary(analysis_or_summary)
109
+ extra = getattr(summary, "extra", {})
110
+ priorities_map = {
111
+ 1: [
112
+ f"Remove {summary.todo_count} TODO-heavy hotspots" if summary.todo_count else "Remove dead code candidates",
113
+ f"Archive {len(extra.get('civilizations', []))} abandoned subsystem clusters" if extra.get("civilizations") else "Delete obsolete routes",
114
+ ],
115
+ 2: [
116
+ "Refactor structural weaknesses" if extra.get("weaknesses") else "Refactor oversized modules",
117
+ "Stabilize dependency hubs" if extra.get("dependency_hubs") else "Consolidate duplicated utilities",
118
+ ],
119
+ 3: [
120
+ "Consolidate duplicate logic" if summary.duplicate_count else "Trim unused configuration",
121
+ "Reduce technical debt hotspots" if extra.get("debt_heatmap") else "Review migration leftovers",
122
+ ],
123
+ }
124
+ return [CleanupPriority(level=level, items=priorities_map[level]) for level in sorted(priorities_map)][:priorities]
125
+
126
+
127
+ def analyze_deletion(path: Path, root: Path) -> DeletionAnalysis:
128
+ view = collect_repository(root)
129
+ text_cache = build_text_index(view)
130
+ references = build_reference_map(view, text_cache)
131
+ target = path.resolve()
132
+ ref_files = sorted(references.get(target, set()))
133
+ dependencies: list[Path] = []
134
+ needle = target.stem.replace("_", "")
135
+ for source in _text_files(view):
136
+ if source == target:
137
+ continue
138
+ content = text_cache.get(source, "")
139
+ if not content:
140
+ continue
141
+ if target.name in content or needle and needle in content.replace("_", ""):
142
+ dependencies.append(source)
143
+ affected = len(set(ref_files + dependencies))
144
+ safe = max(0.0, 100.0 - (affected * 18.0))
145
+ if affected == 0:
146
+ recommendation = "Archive or Remove"
147
+ elif affected <= 2:
148
+ recommendation = "Review Before Delete"
149
+ else:
150
+ recommendation = "Keep or refactor first"
151
+ return DeletionAnalysis(
152
+ path=target,
153
+ safe_confidence=min(99.0, safe),
154
+ affected_files=affected,
155
+ recommendation=recommendation,
156
+ references=ref_files,
157
+ dependencies=dependencies,
158
+ )
159
+
160
+
161
+ def find_refactor_candidates(analysis_or_intelligence) -> list[RefactorCandidate]:
162
+ intelligence = _get_intelligence(analysis_or_intelligence)
163
+ candidates: list[RefactorCandidate] = []
164
+ duplicates = defaultdict(list)
165
+ summary = getattr(analysis_or_intelligence, "summary", None)
166
+ text_cache = getattr(intelligence, "text_cache", {})
167
+ for artifact in getattr(summary, "artifacts", []):
168
+ if artifact.kind == "duplicate_block":
169
+ key = artifact.detail or artifact.path.stem
170
+ duplicates[key].append(artifact.path)
171
+ match_path = artifact.metadata.get("match_path")
172
+ if match_path:
173
+ duplicates[key].append(Path(match_path))
174
+ for name, paths in duplicates.items():
175
+ locations = sorted({path for path in paths})
176
+ candidates.append(
177
+ RefactorCandidate(
178
+ name=name or "Duplicate logic",
179
+ locations=locations,
180
+ recommendation="Extract shared utility",
181
+ confidence=0.88,
182
+ )
183
+ )
184
+ # oversized classes and repeated validators
185
+ for path in intelligence.view.files:
186
+ if path_kind(path) != "text":
187
+ continue
188
+ content = text_cache.get(path, "")
189
+ if not content:
190
+ try:
191
+ content = path.read_text(encoding="utf-8")
192
+ except OSError:
193
+ content = ""
194
+ if content.count("def validate") >= 2 or content.count("class ") >= 5:
195
+ candidates.append(
196
+ RefactorCandidate(
197
+ name=path.name,
198
+ locations=[path],
199
+ recommendation="Split responsibilities and extract helpers",
200
+ confidence=0.74,
201
+ )
202
+ )
203
+ return candidates
204
+
205
+
206
+ ROUTE_PATTERNS = {
207
+ "FastAPI": re.compile(r"@(?:app|router)\.(get|post|put|patch|delete|options|head)\("),
208
+ "Flask": re.compile(r"@(?:app|blueprint)\.(route|get|post|put|delete)\("),
209
+ "Express": re.compile(r"\.(get|post|put|patch|delete)\("),
210
+ "Next.js": re.compile(r"export\s+(?:default\s+)?function\s+\w+|export\s+async\s+function\s+(GET|POST|PUT|PATCH|DELETE)"),
211
+ }
212
+
213
+
214
+ def audit_routes(view: RepoView, text_cache: dict[Path, str], references: dict[Path, set[Path]]) -> list[RouteFinding]:
215
+ findings: list[RouteFinding] = []
216
+ for path in _text_files(view):
217
+ content = text_cache.get(path, "")
218
+ if "route" not in path.name.lower() and "/api/" not in str(path).lower() and "app/" not in str(path).lower():
219
+ continue
220
+ if any(pattern.search(content) for pattern in ROUTE_PATTERNS.values()):
221
+ confidence = 0.92
222
+ if len(references.get(path, set())) == 0:
223
+ findings.append(
224
+ RouteFinding(
225
+ kind="unused endpoint",
226
+ path=path,
227
+ detail="No obvious callers detected",
228
+ confidence=confidence,
229
+ )
230
+ )
231
+ else:
232
+ findings.append(
233
+ RouteFinding(
234
+ kind="documented route",
235
+ path=path,
236
+ detail="Route patterns detected and reference links exist",
237
+ confidence=confidence,
238
+ )
239
+ )
240
+ else:
241
+ findings.append(
242
+ RouteFinding(
243
+ kind="unreachable route",
244
+ path=path,
245
+ detail="Path resembles route code but no handlers were found",
246
+ confidence=0.7,
247
+ )
248
+ )
249
+ return findings
250
+
251
+
252
+ def audit_configs(view: RepoView, text_cache: dict[Path, str]) -> list[ConfigFinding]:
253
+ env_vars = Counter()
254
+ for content in text_cache.values():
255
+ for match in re.finditer(r"\b[A-Z][A-Z0-9_]{2,}\b", content):
256
+ env_vars[match.group(0)] += 1
257
+ findings: list[ConfigFinding] = []
258
+ for name, count in env_vars.items():
259
+ if count <= 1 and any(token in name for token in ("KEY", "URL", "ENDPOINT", "SECRET")):
260
+ findings.append(ConfigFinding(kind="unused environment variable", name=name, confidence=0.8))
261
+ return findings
262
+
263
+
264
+ def audit_migrations(view: RepoView, text_cache: dict[Path, str]) -> list[MigrationFinding]:
265
+ findings: list[MigrationFinding] = []
266
+ for path in view.files:
267
+ name = path.name.lower()
268
+ if "migration" not in str(path).lower() and "migrate" not in name and "schema" not in name:
269
+ continue
270
+ content = text_cache.get(path, "") if path_kind(path) == "text" else ""
271
+ if "TODO" in content or "XXX" in content:
272
+ findings.append(MigrationFinding(path=path, kind="incomplete migration", status="Needs Review", confidence=0.84))
273
+ elif "down()" not in content and "rollback" not in content and "revert" not in content:
274
+ findings.append(MigrationFinding(path=path, kind="orphaned migration", status="Orphaned", confidence=0.78))
275
+ return findings
276
+
277
+
278
+ def rationalize_dependencies(analysis_or_intelligence) -> list[DependencyWarning]:
279
+ intelligence = _get_intelligence(analysis_or_intelligence)
280
+ warnings: list[DependencyWarning] = []
281
+ counts = Counter()
282
+ for package, count in intelligence.external_packages.items():
283
+ counts[package.lower()] += count
284
+ for name, count in counts.items():
285
+ if count <= 1 and name in {"lodash", "underscore", "moment", "left-pad"}:
286
+ warnings.append(
287
+ DependencyWarning(
288
+ name=name,
289
+ only_used_for=count,
290
+ recommendation="Replace with native code",
291
+ confidence=0.9,
292
+ )
293
+ )
294
+ return warnings
295
+
296
+
297
+ def detect_drift(analysis_or_intelligence) -> DriftReport:
298
+ analysis = analysis_or_intelligence
299
+ intelligence = _get_intelligence(analysis_or_intelligence)
300
+ summary = _get_summary(analysis_or_intelligence)
301
+ original = intelligence.dna.signature[0] if intelligence.dna.signature else "Unknown"
302
+ current = intelligence.architecture.primary if intelligence.architecture else "Unknown"
303
+ forecast = getattr(intelligence, "forecast", None)
304
+ current_health = getattr(forecast, "current_health", getattr(summary, "health_score", 0))
305
+ projected_12 = getattr(forecast, "projected_12_months", current_health)
306
+ severity = "High" if projected_12 < current_health - 10 else "Moderate"
307
+ cause = "Feature accumulation" if getattr(summary, "health_score", 100) < 80 else "Structural drift"
308
+ if current == "Prototype" and getattr(summary, "health_score", 100) < 70:
309
+ current = "Monolithic Application"
310
+ return DriftReport(original=f"Simple {original.title()} Service", current=current, severity=severity, cause=cause)
311
+
312
+
313
+ def build_pr_report(analysis_or_summary) -> PRReport:
314
+ summary = _get_summary(analysis_or_summary)
315
+ pr = PRReport()
316
+ pr.removed.append(f"{summary.ancient_count} ancient files")
317
+ pr.removed.append(f"{summary.dead_code_count} dead code candidates")
318
+ pr.reduced.append(f"duplicate code by {summary.duplicate_count * 6}%")
319
+ pr.improved.append(f"repository health score from {max(0, summary.health_score - 7)} to {summary.health_score}")
320
+ return pr
321
+
322
+
323
+ def build_status_summary(analysis_or_summary) -> StatusSummary:
324
+ summary = _get_summary(analysis_or_summary)
325
+ intelligence = _get_intelligence(analysis_or_summary)
326
+ return StatusSummary(
327
+ debt=int(summary.technical_debt_estimate),
328
+ complexity=min(100, len(intelligence.dependency_hubs) * 5 + len(intelligence.weaknesses) * 10),
329
+ dead_code=summary.dead_code_count,
330
+ route_count=len(intelligence.knowledge_map.route_graph),
331
+ dependency_count=summary.duplicate_count + intelligence.graph_edge_count,
332
+ cleanup_opportunities=summary.artifact_count + len(intelligence.weaknesses),
333
+ recommendations=[
334
+ "Prioritize high-impact deletions",
335
+ "Refactor structural bottlenecks",
336
+ "Audit routes and config drift",
337
+ ],
338
+ )
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import re
5
+
6
+ from ..models import Artifact
7
+ from ..utils.fs import ASSET_EXTENSIONS, path_kind, read_text
8
+
9
+
10
+ def find_empty_directories(directories: list[Path], files: list[Path]) -> list[Artifact]:
11
+ file_set = set(files)
12
+ artifacts: list[Artifact] = []
13
+ for directory in directories:
14
+ if directory.exists() and not any(child for child in directory.iterdir() if child not in file_set):
15
+ artifacts.append(
16
+ Artifact(
17
+ path=directory,
18
+ kind="empty_directory",
19
+ risk="Low",
20
+ detail="Empty directory",
21
+ confidence=1.0,
22
+ )
23
+ )
24
+ return artifacts
25
+
26
+
27
+ def find_unused_assets(files: list[Path], text_cache: dict[Path, str]) -> list[Artifact]:
28
+ assets = [path for path in files if path.suffix.lower() in ASSET_EXTENSIONS]
29
+ if not assets:
30
+ return []
31
+ combined = "\n".join(text_cache.values()).lower()
32
+ artifacts: list[Artifact] = []
33
+ for asset in assets:
34
+ if asset.name.lower() not in combined and asset.stem.lower() not in combined:
35
+ artifacts.append(
36
+ Artifact(
37
+ path=asset,
38
+ kind="unused_asset",
39
+ risk="Medium",
40
+ detail="No obvious textual references",
41
+ confidence=0.72,
42
+ )
43
+ )
44
+ return artifacts
45
+
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from ..models import Artifact
6
+
7
+
8
+ SUSPICIOUS_MARKERS = (
9
+ "old",
10
+ "backup",
11
+ "copy",
12
+ "final",
13
+ "final2",
14
+ "new",
15
+ "temp",
16
+ "legacy",
17
+ "archive",
18
+ )
19
+
20
+
21
+ def find_suspicious(files: list[Path]) -> list[Artifact]:
22
+ artifacts: list[Artifact] = []
23
+ for path in files:
24
+ lowered = path.name.lower()
25
+ hits = [marker for marker in SUSPICIOUS_MARKERS if marker in lowered]
26
+ if not hits:
27
+ continue
28
+ confidence = min(0.6 + 0.1 * len(hits), 0.99)
29
+ artifacts.append(
30
+ Artifact(
31
+ path=path,
32
+ kind="suspicious",
33
+ risk="Medium" if len(hits) == 1 else "High",
34
+ detail=f"Matched markers: {', '.join(hits)}",
35
+ confidence=confidence,
36
+ )
37
+ )
38
+ return artifacts
39
+
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from ..models import Artifact
8
+ from ..utils.fs import path_kind, read_text
9
+
10
+
11
+ TODO_PATTERNS = {
12
+ "CRITICAL": re.compile(r"\b(?:FIXME|BUG)\b", re.IGNORECASE),
13
+ "HIGH": re.compile(r"\b(?:HACK|XXX)\b", re.IGNORECASE),
14
+ "MEDIUM": re.compile(r"\b(?:TODO)\b", re.IGNORECASE),
15
+ "LOW": re.compile(r"\b(?:TEMP)\b", re.IGNORECASE),
16
+ }
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class TodoFinding:
21
+ file: Path
22
+ line: int
23
+ severity: str
24
+ comment: str
25
+
26
+
27
+ def find_todos(files: list[Path]) -> list[TodoFinding]:
28
+ findings: list[TodoFinding] = []
29
+ for path in files:
30
+ if path_kind(path) != "text":
31
+ continue
32
+ content = read_text(path)
33
+ for line_no, line in enumerate(content.splitlines(), start=1):
34
+ for severity, pattern in TODO_PATTERNS.items():
35
+ if pattern.search(line):
36
+ findings.append(
37
+ TodoFinding(
38
+ file=path,
39
+ line=line_no,
40
+ severity=severity,
41
+ comment=line.strip(),
42
+ )
43
+ )
44
+ break
45
+ return findings
46
+
47
+
48
+ def todos_to_artifacts(findings: list[TodoFinding]) -> list[Artifact]:
49
+ return [
50
+ Artifact(
51
+ path=finding.file,
52
+ kind="todo",
53
+ risk=finding.severity,
54
+ line_number=finding.line,
55
+ detail=finding.comment,
56
+ confidence=1.0,
57
+ )
58
+ for finding in findings
59
+ ]
60
+
@@ -0,0 +1,2 @@
1
+ """CLI entrypoints for Dev Archaeologist."""
2
+