devarch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,902 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, dataclass, field
4
+ from datetime import datetime, timezone
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ try:
11
+ import tomllib
12
+ except ModuleNotFoundError: # pragma: no cover - Python < 3.11 fallback
13
+ tomllib = None # type: ignore[assignment]
14
+
15
+ from ..scanner.intelligence import RepositoryAnalysis
16
+
17
+
18
+ STATE_DIR_NAME = ".devarch"
19
+ BASELINE_FILE = "baseline.json"
20
+ HISTORY_FILE = "history.jsonl"
21
+ BUDGET_FILE_CANDIDATES = ("budget.json", "budget.toml", "budget.yml", "budget.yaml")
22
+
23
+
24
+ @dataclass(slots=True)
25
+ class MaintenanceSnapshot:
26
+ captured_at: str
27
+ root: str
28
+ file_count: int
29
+ dependency_count: int
30
+ complexity_score: int
31
+ dead_code_count: int
32
+ duplicate_code_count: int
33
+ route_count: int
34
+ todo_count: int
35
+ health_score: int
36
+ technical_debt: float
37
+
38
+
39
+ @dataclass(slots=True)
40
+ class RegressionReport:
41
+ baseline: MaintenanceSnapshot
42
+ current: MaintenanceSnapshot
43
+ complexity_delta: float
44
+ dead_code_delta: int
45
+ duplicate_delta: int
46
+ health_delta: int
47
+ status: str
48
+
49
+
50
+ @dataclass(slots=True)
51
+ class BudgetLimits:
52
+ max_dead_files: int = 10
53
+ max_complexity: int = 80
54
+ max_duplicate_blocks: int = 25
55
+ max_todos: int = 50
56
+ max_routes: int = 100
57
+ max_dependencies: int = 150
58
+ min_health_score: int = 70
59
+
60
+
61
+ @dataclass(slots=True)
62
+ class BudgetCheck:
63
+ limits: BudgetLimits
64
+ exceeded: list[tuple[str, int, int]] = field(default_factory=list)
65
+ status: str = "Passing"
66
+
67
+
68
+ @dataclass(slots=True)
69
+ class ReleaseCheck:
70
+ score: int
71
+ status: str
72
+ warnings: list[str] = field(default_factory=list)
73
+ blockers: list[str] = field(default_factory=list)
74
+ budget: BudgetCheck | None = None
75
+ regression: RegressionReport | None = None
76
+
77
+
78
+ @dataclass(slots=True)
79
+ class OwnershipFinding:
80
+ module: str
81
+ last_significant_activity_days: int
82
+ primary_maintainer: str
83
+ status: str
84
+
85
+
86
+ @dataclass(slots=True)
87
+ class DependencyAlert:
88
+ package: str
89
+ status: str
90
+ recommendation: str
91
+ confidence: float
92
+ used_by: int
93
+
94
+
95
+ @dataclass(slots=True)
96
+ class StandardsReport:
97
+ naming: int
98
+ documentation: int
99
+ consistency: int
100
+ test_coverage: int
101
+ notes: list[str] = field(default_factory=list)
102
+
103
+
104
+ @dataclass(slots=True)
105
+ class HistoryPoint:
106
+ captured_at: str
107
+ health_score: int
108
+ complexity_score: int
109
+ dead_code_count: int
110
+ duplicate_code_count: int
111
+ label: str
112
+
113
+
114
+ @dataclass(slots=True)
115
+ class RecommendationItem:
116
+ title: str
117
+ current: str
118
+ target: str
119
+ potential_reduction: str
120
+
121
+
122
+ @dataclass(slots=True)
123
+ class RemediationFinding:
124
+ problem: str
125
+ evidence: list[str]
126
+ impact: str
127
+ confidence: float
128
+ recommended_fix: str
129
+ estimated_effort: str
130
+ risk_level: str
131
+ root_cause: str
132
+ likely_consequences: str
133
+ alternative_solution: str
134
+ implementation_difficulty: str
135
+ location: str
136
+
137
+
138
+ @dataclass(slots=True)
139
+ class PrescriptionPlan:
140
+ findings: list[RemediationFinding] = field(default_factory=list)
141
+ immediate_actions: list[str] = field(default_factory=list)
142
+ estimated_time_minutes: int = 0
143
+ expected_health_increase: int = 0
144
+
145
+
146
+ @dataclass(slots=True)
147
+ class RepairWeek:
148
+ week: int
149
+ focus: str
150
+ actions: list[str] = field(default_factory=list)
151
+ expected_health: str = ""
152
+
153
+
154
+ def _state_dir(root: Path) -> Path:
155
+ return root / STATE_DIR_NAME
156
+
157
+
158
+ def _baseline_path(root: Path) -> Path:
159
+ return _state_dir(root) / BASELINE_FILE
160
+
161
+
162
+ def _history_path(root: Path) -> Path:
163
+ return _state_dir(root) / HISTORY_FILE
164
+
165
+
166
+ def _default_budget_path(root: Path) -> Path | None:
167
+ state_dir = _state_dir(root)
168
+ for name in BUDGET_FILE_CANDIDATES:
169
+ candidate = state_dir / name
170
+ if candidate.exists():
171
+ return candidate
172
+ return None
173
+
174
+
175
+ def _ensure_state_dir(root: Path) -> Path:
176
+ state_dir = _state_dir(root)
177
+ state_dir.mkdir(parents=True, exist_ok=True)
178
+ return state_dir
179
+
180
+
181
+ def _serialise(value: Any) -> Any:
182
+ if isinstance(value, Path):
183
+ return str(value)
184
+ if isinstance(value, datetime):
185
+ return value.isoformat()
186
+ if isinstance(value, dict):
187
+ return {str(key): _serialise(item) for key, item in value.items()}
188
+ if isinstance(value, list):
189
+ return [_serialise(item) for item in value]
190
+ if hasattr(value, "__dict__"):
191
+ return _serialise(value.__dict__)
192
+ return value
193
+
194
+
195
+ def _snapshot_from_analysis(analysis: RepositoryAnalysis) -> MaintenanceSnapshot:
196
+ summary = analysis.summary
197
+ intelligence = analysis.intelligence
198
+ return MaintenanceSnapshot(
199
+ captured_at=datetime.now(timezone.utc).isoformat(),
200
+ root=str(summary.root),
201
+ file_count=summary.total_files,
202
+ dependency_count=analysis.intelligence.graph_edge_count,
203
+ complexity_score=min(100, len(intelligence.dependency_hubs) * 5 + len(intelligence.weaknesses) * 10),
204
+ dead_code_count=summary.dead_code_count,
205
+ duplicate_code_count=summary.duplicate_count,
206
+ route_count=len(intelligence.knowledge_map.route_graph),
207
+ todo_count=summary.todo_count,
208
+ health_score=summary.health_score,
209
+ technical_debt=summary.technical_debt_estimate,
210
+ )
211
+
212
+
213
+ def build_snapshot(analysis: RepositoryAnalysis) -> MaintenanceSnapshot:
214
+ return _snapshot_from_analysis(analysis)
215
+
216
+
217
+ def save_baseline(analysis: RepositoryAnalysis) -> MaintenanceSnapshot:
218
+ snapshot = _snapshot_from_analysis(analysis)
219
+ state_dir = _ensure_state_dir(analysis.summary.root)
220
+ _baseline_path(analysis.summary.root).write_text(
221
+ json.dumps(_serialise(asdict(snapshot)), indent=2),
222
+ encoding="utf-8",
223
+ )
224
+ _history_path(analysis.summary.root).open("a", encoding="utf-8").write(json.dumps({"kind": "baseline", **_serialise(asdict(snapshot))}) + "\n")
225
+ return snapshot
226
+
227
+
228
+ def load_baseline(root: Path) -> MaintenanceSnapshot | None:
229
+ path = _baseline_path(root)
230
+ if not path.exists():
231
+ return None
232
+ data = json.loads(path.read_text(encoding="utf-8"))
233
+ return MaintenanceSnapshot(**data)
234
+
235
+
236
+ def load_history(root: Path) -> list[HistoryPoint]:
237
+ path = _history_path(root)
238
+ if not path.exists():
239
+ return []
240
+ history: list[HistoryPoint] = []
241
+ for line in path.read_text(encoding="utf-8").splitlines():
242
+ if not line.strip():
243
+ continue
244
+ raw = json.loads(line)
245
+ history.append(
246
+ HistoryPoint(
247
+ captured_at=raw["captured_at"],
248
+ health_score=raw["health_score"],
249
+ complexity_score=raw["complexity_score"],
250
+ dead_code_count=raw["dead_code_count"],
251
+ duplicate_code_count=raw["duplicate_code_count"],
252
+ label=raw.get("kind", "snapshot"),
253
+ )
254
+ )
255
+ return history
256
+
257
+
258
+ def append_history(analysis: RepositoryAnalysis, label: str = "snapshot") -> MaintenanceSnapshot:
259
+ snapshot = _snapshot_from_analysis(analysis)
260
+ _ensure_state_dir(analysis.summary.root)
261
+ with _history_path(analysis.summary.root).open("a", encoding="utf-8") as handle:
262
+ handle.write(json.dumps({"kind": label, **_serialise(asdict(snapshot))}) + "\n")
263
+ return snapshot
264
+
265
+
266
+ def compare_to_baseline(current: MaintenanceSnapshot, baseline: MaintenanceSnapshot) -> RegressionReport:
267
+ complexity_delta = ((current.complexity_score - baseline.complexity_score) / baseline.complexity_score * 100) if baseline.complexity_score else 0.0
268
+ dead_code_delta = current.dead_code_count - baseline.dead_code_count
269
+ duplicate_delta = current.duplicate_code_count - baseline.duplicate_code_count
270
+ health_delta = current.health_score - baseline.health_score
271
+ status = "Improved"
272
+ if health_delta < 0 or dead_code_delta > 0 or duplicate_delta > 0 or complexity_delta > 0:
273
+ status = "Regressed"
274
+ return RegressionReport(
275
+ baseline=baseline,
276
+ current=current,
277
+ complexity_delta=round(complexity_delta, 1),
278
+ dead_code_delta=dead_code_delta,
279
+ duplicate_delta=duplicate_delta,
280
+ health_delta=health_delta,
281
+ status=status,
282
+ )
283
+
284
+
285
+ def read_budget_limits(root: Path, config: Path | None = None) -> BudgetLimits:
286
+ candidate = config or _default_budget_path(root)
287
+ if not candidate or not candidate.exists():
288
+ return BudgetLimits()
289
+ suffix = candidate.suffix.lower()
290
+ data: dict[str, Any]
291
+ if suffix == ".json":
292
+ data = json.loads(candidate.read_text(encoding="utf-8"))
293
+ elif suffix == ".toml" and tomllib is not None:
294
+ data = tomllib.loads(candidate.read_text(encoding="utf-8"))
295
+ elif suffix in {".yml", ".yaml"}:
296
+ data = {}
297
+ for line in candidate.read_text(encoding="utf-8").splitlines():
298
+ line = line.strip()
299
+ if not line or line.startswith("#") or ":" not in line:
300
+ continue
301
+ key, value = line.split(":", 1)
302
+ key = key.strip()
303
+ value = value.strip().strip("'\"")
304
+ if re.fullmatch(r"-?\d+", value):
305
+ data[key] = int(value)
306
+ elif re.fullmatch(r"-?\d+\.\d+", value):
307
+ data[key] = float(value)
308
+ else:
309
+ data[key] = value
310
+ else:
311
+ return BudgetLimits()
312
+ allowed = {field.name for field in BudgetLimits.__dataclass_fields__.values()} # type: ignore[attr-defined]
313
+ return BudgetLimits(**{key: data[key] for key in data if key in allowed})
314
+
315
+
316
+ def evaluate_budget(current: MaintenanceSnapshot, limits: BudgetLimits) -> BudgetCheck:
317
+ exceeded: list[tuple[str, int, int]] = []
318
+ checks = {
319
+ "Dead Files": (current.dead_code_count, limits.max_dead_files),
320
+ "Complexity": (current.complexity_score, limits.max_complexity),
321
+ "Duplicate Blocks": (current.duplicate_code_count, limits.max_duplicate_blocks),
322
+ "TODOs": (current.todo_count, limits.max_todos),
323
+ "Routes": (current.route_count, limits.max_routes),
324
+ "Dependencies": (current.dependency_count, limits.max_dependencies),
325
+ }
326
+ for label, (value, limit) in checks.items():
327
+ if value > limit:
328
+ exceeded.append((label, value, limit))
329
+ status = "Passing" if not exceeded and current.health_score >= limits.min_health_score else "Failing"
330
+ return BudgetCheck(limits=limits, exceeded=exceeded, status=status)
331
+
332
+
333
+ def ownership_report(analysis: RepositoryAnalysis) -> list[OwnershipFinding]:
334
+ findings: list[OwnershipFinding] = []
335
+ intelligence = analysis.intelligence
336
+ for item in intelligence.contributors:
337
+ related_days = [
338
+ intelligence.file_last_active_days.get(path, 0)
339
+ for path in intelligence.view.files
340
+ if (path.relative_to(intelligence.root).parts and path.relative_to(intelligence.root).parts[0] == item.area)
341
+ ]
342
+ last_activity = max(related_days, default=0)
343
+ if item.owner == "unknown" or last_activity >= 365:
344
+ findings.append(
345
+ OwnershipFinding(
346
+ module=item.area,
347
+ last_significant_activity_days=last_activity,
348
+ primary_maintainer=item.owner if item.owner != "unknown" else "Unknown",
349
+ status="Unowned" if item.owner == "unknown" else "Stale",
350
+ )
351
+ )
352
+ return findings
353
+
354
+
355
+ def _declared_dependencies(root: Path) -> set[str]:
356
+ declared: set[str] = set()
357
+ pyproject = root / "pyproject.toml"
358
+ if pyproject.exists() and tomllib is not None:
359
+ try:
360
+ data = tomllib.loads(pyproject.read_text(encoding="utf-8"))
361
+ except Exception:
362
+ data = {}
363
+ project = data.get("project", {})
364
+ for item in project.get("dependencies", []):
365
+ name = re.split(r"[<>=~!\[]", str(item), 1)[0].strip()
366
+ if name:
367
+ declared.add(name.lower())
368
+ for candidate in root.rglob("requirements*.txt"):
369
+ for line in candidate.read_text(encoding="utf-8", errors="ignore").splitlines():
370
+ line = line.strip()
371
+ if not line or line.startswith("#") or line.startswith("-r"):
372
+ continue
373
+ name = re.split(r"[<>=~!\[]", line, 1)[0].strip()
374
+ if name:
375
+ declared.add(name.lower())
376
+ return declared
377
+
378
+
379
+ def dependency_health_report(analysis: RepositoryAnalysis) -> list[DependencyAlert]:
380
+ root = analysis.summary.root
381
+ intelligence = analysis.intelligence
382
+ declared = _declared_dependencies(root)
383
+ imported = {package.lower() for package in intelligence.external_packages}
384
+ local_modules = {
385
+ path.stem.lower()
386
+ for path in intelligence.view.files
387
+ if path.suffix.lower() in {".py", ".pyi", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
388
+ }
389
+ local_modules.update(
390
+ part.lower()
391
+ for path in intelligence.view.files
392
+ for part in path.parts[:1]
393
+ if part and part.lower() not in {root.name.lower()}
394
+ )
395
+ local_modules.update({"version", "cli", "reports", "scanner", "intelligence", "discovery", "maintenance", "recovery", "models", "utils", "analyzers", "plugins"})
396
+ alerts: list[DependencyAlert] = []
397
+ for package in sorted(declared):
398
+ used_by = intelligence.external_packages.get(package, intelligence.external_packages.get(package.lower(), 0))
399
+ if package not in imported or used_by == 0:
400
+ alerts.append(
401
+ DependencyAlert(
402
+ package=package,
403
+ status="Unused",
404
+ recommendation="Remove or replace",
405
+ confidence=0.9,
406
+ used_by=used_by,
407
+ )
408
+ )
409
+ for package, count in intelligence.external_packages.items():
410
+ normalized = package.lower()
411
+ if normalized in local_modules or normalized in declared:
412
+ continue
413
+ if count <= 1:
414
+ alerts.append(
415
+ DependencyAlert(
416
+ package=normalized,
417
+ status="Untracked",
418
+ recommendation="Document or pin the dependency",
419
+ confidence=0.72,
420
+ used_by=count,
421
+ )
422
+ )
423
+ return alerts
424
+
425
+
426
+ def standards_report(analysis: RepositoryAnalysis) -> StandardsReport:
427
+ view = analysis.intelligence.view
428
+ text_cache = analysis.intelligence.text_cache
429
+ python_files = [path for path in view.files if path.suffix.lower() == ".py"]
430
+ readable_docs = [path for path in view.files if path.suffix.lower() in {".md", ".rst"} or path.name.lower().startswith("readme")]
431
+ snake_case = sum(1 for path in view.files if re.fullmatch(r"[a-z0-9_./-]+", path.name.lower()) and "__" not in path.name)
432
+ docs_with_docstring = 0
433
+ for path in python_files:
434
+ content = text_cache.get(path, "")
435
+ if '"""' in content or "'''" in content:
436
+ docs_with_docstring += 1
437
+ naming = int(round((snake_case / max(1, len(view.files))) * 100))
438
+ documentation = int(round(min(1.0, (len(readable_docs) + docs_with_docstring) / max(1, len(view.files))) * 100))
439
+ consistent_dirs = sum(1 for directory in view.directories if directory.name == directory.name.lower())
440
+ consistency = int(round((consistent_dirs / max(1, len(view.directories))) * 100)) if view.directories else 100
441
+ test_coverage = int(round((sum(1 for path in view.files if "test" in path.name.lower()) / max(1, len(view.files))) * 100))
442
+ notes: list[str] = []
443
+ if documentation < 70:
444
+ notes.append("Documentation coverage could be improved")
445
+ if naming < 80:
446
+ notes.append("File naming is inconsistent")
447
+ if consistency < 80:
448
+ notes.append("Folder naming is inconsistent")
449
+ if test_coverage < 15:
450
+ notes.append("Test footprint is light")
451
+ return StandardsReport(
452
+ naming=naming,
453
+ documentation=documentation,
454
+ consistency=consistency,
455
+ test_coverage=test_coverage,
456
+ notes=notes,
457
+ )
458
+
459
+
460
+ def history_points(root: Path, current: MaintenanceSnapshot | None = None) -> list[HistoryPoint]:
461
+ points = load_history(root)
462
+ if current is not None:
463
+ points = [*points, HistoryPoint(
464
+ captured_at=current.captured_at,
465
+ health_score=current.health_score,
466
+ complexity_score=current.complexity_score,
467
+ dead_code_count=current.dead_code_count,
468
+ duplicate_code_count=current.duplicate_code_count,
469
+ label="current",
470
+ )]
471
+ return points
472
+
473
+
474
+ def cleanup_candidates(analysis: RepositoryAnalysis) -> list[str]:
475
+ from . import recovery
476
+
477
+ plan = recovery.build_cleanup_plan(analysis)
478
+ queue: list[str] = []
479
+ for item in plan:
480
+ for line in item.items:
481
+ queue.append(line)
482
+ return queue
483
+
484
+
485
+ def recommendation_items(analysis: RepositoryAnalysis) -> list[RecommendationItem]:
486
+ current = _snapshot_from_analysis(analysis)
487
+ items: list[RecommendationItem] = []
488
+ dependency_alerts = dependency_health_report(analysis)
489
+ if dependency_alerts:
490
+ unused = sum(1 for item in dependency_alerts if item.status == "Unused")
491
+ items.append(
492
+ RecommendationItem(
493
+ title="Reduce dependency count",
494
+ current=f"{len(dependency_alerts)} risky packages",
495
+ target=f"Remove {unused} unused dependencies",
496
+ potential_reduction=f"{min(100, unused * 5)}% smaller dependency surface",
497
+ )
498
+ )
499
+ if current.dead_code_count:
500
+ items.append(
501
+ RecommendationItem(
502
+ title="Lower dead code count",
503
+ current=f"{current.dead_code_count} dead files",
504
+ target="Move toward zero",
505
+ potential_reduction="Fewer stale execution paths",
506
+ )
507
+ )
508
+ if current.complexity_score >= 70:
509
+ items.append(
510
+ RecommendationItem(
511
+ title="Reduce complexity hotspots",
512
+ current=f"Complexity score {current.complexity_score}/100",
513
+ target="Drop below 60",
514
+ potential_reduction="Lower maintenance burden",
515
+ )
516
+ )
517
+ return items[:5]
518
+
519
+
520
+ def _risk_from_artifact(kind: str, impact: str) -> str:
521
+ if impact in {"Critical", "Severe"}:
522
+ return "High"
523
+ if kind in {"monster_file", "dead_code_candidate", "unreachable_code"}:
524
+ return "Medium"
525
+ return "Low"
526
+
527
+
528
+ def _effort_from_artifact(kind: str, size: int | None = None) -> str:
529
+ if kind in {"dead_code_candidate", "unused_asset", "empty_directory", "suspicious"}:
530
+ return "2-10 minutes"
531
+ if kind in {"todo", "ancient_file"}:
532
+ return "15-30 minutes"
533
+ if kind in {"duplicate_block"}:
534
+ return "30-90 minutes"
535
+ if kind in {"monster_file"}:
536
+ return "2-4 hours"
537
+ if size and size > 2000:
538
+ return "4+ hours"
539
+ return "30 minutes"
540
+
541
+
542
+ def _make_finding(
543
+ *,
544
+ problem: str,
545
+ evidence: list[str],
546
+ impact: str,
547
+ confidence: float,
548
+ recommended_fix: str,
549
+ estimated_effort: str,
550
+ risk_level: str,
551
+ root_cause: str,
552
+ likely_consequences: str,
553
+ alternative_solution: str,
554
+ implementation_difficulty: str,
555
+ location: str,
556
+ ) -> RemediationFinding:
557
+ return RemediationFinding(
558
+ problem=problem,
559
+ evidence=evidence,
560
+ impact=impact,
561
+ confidence=confidence,
562
+ recommended_fix=recommended_fix,
563
+ estimated_effort=estimated_effort,
564
+ risk_level=risk_level,
565
+ root_cause=root_cause,
566
+ likely_consequences=likely_consequences,
567
+ alternative_solution=alternative_solution,
568
+ implementation_difficulty=implementation_difficulty,
569
+ location=location,
570
+ )
571
+
572
+
573
+ def remediation_findings(analysis: RepositoryAnalysis) -> list[RemediationFinding]:
574
+ findings: list[RemediationFinding] = []
575
+ summary = analysis.summary
576
+ intelligence = analysis.intelligence
577
+ covered_locations: set[str] = set()
578
+
579
+ def add_finding(item: RemediationFinding) -> None:
580
+ findings.append(item)
581
+ if item.location:
582
+ covered_locations.add(item.location)
583
+
584
+ for artifact in summary.artifacts:
585
+ kind = artifact.kind
586
+ risk = _risk_from_artifact(kind, artifact.risk)
587
+ evidence = [str(artifact.path)]
588
+ if artifact.detail:
589
+ evidence.append(artifact.detail)
590
+ if kind == "dead_code_candidate":
591
+ add_finding(
592
+ _make_finding(
593
+ problem=f"Dead code detected in {artifact.path.name}",
594
+ evidence=evidence,
595
+ impact=artifact.risk or "Low",
596
+ confidence=artifact.confidence or 0.85,
597
+ recommended_fix="Remove the file or unreachable block",
598
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
599
+ risk_level=risk,
600
+ root_cause="Unused logic accumulated over time",
601
+ likely_consequences="Continued maintenance overhead and false dependency signals",
602
+ alternative_solution="Archive the file first if you need a rollback window",
603
+ implementation_difficulty="Easy",
604
+ location=str(artifact.path),
605
+ )
606
+ )
607
+ elif kind == "ancient_file":
608
+ add_finding(
609
+ _make_finding(
610
+ problem=f"Ancient file appears abandoned: {artifact.path.name}",
611
+ evidence=evidence,
612
+ impact=artifact.risk or "Moderate",
613
+ confidence=artifact.confidence or 0.82,
614
+ recommended_fix="Archive or remove after confirming no runtime references",
615
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
616
+ risk_level=risk,
617
+ root_cause="File has drifted out of active development",
618
+ likely_consequences="Stale behavior, confusing ownership, unnecessary cognitive load",
619
+ alternative_solution="Move to an archive folder with a deprecation note",
620
+ implementation_difficulty="Easy",
621
+ location=str(artifact.path),
622
+ )
623
+ )
624
+ elif kind == "duplicate_block":
625
+ add_finding(
626
+ _make_finding(
627
+ problem=f"Duplicated logic detected in {artifact.path.name}",
628
+ evidence=evidence,
629
+ impact="Medium",
630
+ confidence=artifact.confidence or 0.84,
631
+ recommended_fix="Extract shared utility or shared module",
632
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
633
+ risk_level="Medium",
634
+ root_cause="Copy-paste reuse instead of shared abstraction",
635
+ likely_consequences="Bug fixes will need to be repeated in multiple places",
636
+ alternative_solution="Document the duplication if extraction would be too invasive",
637
+ implementation_difficulty="Moderate",
638
+ location=str(artifact.path),
639
+ )
640
+ )
641
+ elif kind == "monster_file":
642
+ add_finding(
643
+ _make_finding(
644
+ problem=f"Oversized or complex file detected: {artifact.path.name}",
645
+ evidence=[str(artifact.path), artifact.detail or "High complexity surface"],
646
+ impact="High",
647
+ confidence=artifact.confidence or 0.9,
648
+ recommended_fix="Split responsibilities into smaller modules",
649
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
650
+ risk_level="High",
651
+ root_cause="Feature accumulation without module boundaries",
652
+ likely_consequences="Change risk and review burden will stay high",
653
+ alternative_solution="Add internal helper modules before a full split",
654
+ implementation_difficulty="Hard",
655
+ location=str(artifact.path),
656
+ )
657
+ )
658
+ elif kind in {"todo", "fixme", "hack", "bug", "temp", "xxx"}:
659
+ add_finding(
660
+ _make_finding(
661
+ problem=f"Outstanding developer note in {artifact.path.name}",
662
+ evidence=evidence,
663
+ impact="Low",
664
+ confidence=artifact.confidence or 0.75,
665
+ recommended_fix="Resolve the note or convert it into a tracked issue",
666
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
667
+ risk_level="Low",
668
+ root_cause="Work was paused before the change was completed",
669
+ likely_consequences="Deferred cleanup will compound over time",
670
+ alternative_solution="Keep as a documented follow-up with owner and deadline",
671
+ implementation_difficulty="Easy",
672
+ location=str(artifact.path),
673
+ )
674
+ )
675
+ elif kind in {"unused_asset", "empty_directory"}:
676
+ add_finding(
677
+ _make_finding(
678
+ problem=f"Unused structure or asset: {artifact.path.name}",
679
+ evidence=evidence,
680
+ impact="Low",
681
+ confidence=artifact.confidence or 0.8,
682
+ recommended_fix="Delete if truly unused or document a future use",
683
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
684
+ risk_level="Low",
685
+ root_cause="Repository drift and old build outputs",
686
+ likely_consequences="Clutter and confusion for future maintainers",
687
+ alternative_solution="Move to an archive folder temporarily",
688
+ implementation_difficulty="Easy",
689
+ location=str(artifact.path),
690
+ )
691
+ )
692
+ elif kind == "suspicious":
693
+ add_finding(
694
+ _make_finding(
695
+ problem=f"Suspicious backup-style file detected: {artifact.path.name}",
696
+ evidence=evidence,
697
+ impact="Low",
698
+ confidence=artifact.confidence or 0.86,
699
+ recommended_fix="Rename, archive, or remove after verifying intent",
700
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
701
+ risk_level="Low",
702
+ root_cause="Temporary file naming conventions were never cleaned up",
703
+ likely_consequences="Potential confusion and accidental reuse of stale code",
704
+ alternative_solution="Label the file clearly if it must remain",
705
+ implementation_difficulty="Easy",
706
+ location=str(artifact.path),
707
+ )
708
+ )
709
+ else:
710
+ add_finding(
711
+ _make_finding(
712
+ problem=f"Review artifact: {artifact.path.name} ({artifact.kind})",
713
+ evidence=evidence,
714
+ impact=artifact.risk or "Moderate",
715
+ confidence=artifact.confidence or 0.7,
716
+ recommended_fix="Review and either keep, archive, or remove based on ownership",
717
+ estimated_effort=_effort_from_artifact(kind, artifact.size_bytes),
718
+ risk_level=risk,
719
+ root_cause="Artifact was discovered during repository excavation",
720
+ likely_consequences="Unreviewed files may continue to accumulate technical debt",
721
+ alternative_solution="Document why the artifact is intentionally kept",
722
+ implementation_difficulty="Easy",
723
+ location=str(artifact.path),
724
+ )
725
+ )
726
+
727
+ # Ensure non-artifact findings are also represented with remediation guidance.
728
+
729
+ for weakness in intelligence.weaknesses:
730
+ add_finding(
731
+ _make_finding(
732
+ problem=f"Structural weakness in {weakness.path.name}",
733
+ evidence=[str(weakness.path), f"Referenced by {weakness.referenced_by} files"],
734
+ impact=weakness.failure_impact,
735
+ confidence=weakness.confidence,
736
+ recommended_fix="Decouple the module and reduce fan-in",
737
+ estimated_effort="1-4 hours",
738
+ risk_level="High" if weakness.failure_impact in {"Critical", "Severe"} else "Medium",
739
+ root_cause="Too many modules depend on a single implementation",
740
+ likely_consequences="Change propagation and fragile deployments",
741
+ alternative_solution="Wrap the API before a deeper refactor",
742
+ implementation_difficulty="Hard",
743
+ location=str(weakness.path),
744
+ )
745
+ )
746
+
747
+ for alert in dependency_health_report(analysis):
748
+ add_finding(
749
+ _make_finding(
750
+ problem=f"Dependency issue: {alert.package}",
751
+ evidence=[alert.status, f"Used by {alert.used_by} files"],
752
+ impact="Moderate" if alert.used_by else "Low",
753
+ confidence=alert.confidence,
754
+ recommended_fix=alert.recommendation,
755
+ estimated_effort="10-30 minutes",
756
+ risk_level="Medium" if alert.status == "Untracked" else "Low",
757
+ root_cause="Dependency lifecycle has drifted from the codebase",
758
+ likely_consequences="Unnecessary package surface and upgrade burden",
759
+ alternative_solution="Pin and document the package if removal is risky",
760
+ implementation_difficulty="Easy",
761
+ location=alert.package,
762
+ )
763
+ )
764
+
765
+ for note in standards_report(analysis).notes:
766
+ add_finding(
767
+ _make_finding(
768
+ problem=note,
769
+ evidence=[summary.root.name],
770
+ impact="Moderate",
771
+ confidence=0.7,
772
+ recommended_fix="Improve naming, docs, or tests in the affected area",
773
+ estimated_effort="15-60 minutes",
774
+ risk_level="Low",
775
+ root_cause="Repository standards have drifted over time",
776
+ likely_consequences="Lower maintainability and onboarding friction",
777
+ alternative_solution="Add a short style guide note for the team",
778
+ implementation_difficulty="Easy",
779
+ location=str(summary.root),
780
+ )
781
+ )
782
+
783
+ if intelligence.architecture and intelligence.architecture.primary == "Prototype" and summary.health_score < 85:
784
+ add_finding(
785
+ _make_finding(
786
+ problem="Architecture is still reading as a prototype",
787
+ evidence=[intelligence.architecture.primary, intelligence.architecture.secondary],
788
+ impact="Moderate",
789
+ confidence=intelligence.architecture.confidence,
790
+ recommended_fix="Introduce explicit layers or module boundaries",
791
+ estimated_effort="2-8 hours",
792
+ risk_level="Medium",
793
+ root_cause="Growth outpaced structure",
794
+ likely_consequences="New changes will keep spreading across the codebase",
795
+ alternative_solution="Document the current architecture before refactoring",
796
+ implementation_difficulty="Hard",
797
+ location=str(summary.root),
798
+ )
799
+ )
800
+
801
+ return findings
802
+
803
+
804
+ def prescribe_repository(analysis: RepositoryAnalysis) -> PrescriptionPlan:
805
+ findings = remediation_findings(analysis)
806
+ top = sorted(
807
+ findings,
808
+ key=lambda item: (
809
+ {"High": 3, "Medium": 2, "Low": 1}.get(item.risk_level, 1),
810
+ item.confidence,
811
+ ),
812
+ reverse=True,
813
+ )
814
+ immediate_actions: list[str] = []
815
+ estimated_time = 0
816
+ expected_health = 0
817
+ for finding in top[:5]:
818
+ immediate_actions.append(f"{finding.recommended_fix} - {finding.location}")
819
+ if finding.estimated_effort.startswith("2-10"):
820
+ estimated_time += 10
821
+ elif finding.estimated_effort.startswith("15-30"):
822
+ estimated_time += 25
823
+ elif finding.estimated_effort.startswith("30-90"):
824
+ estimated_time += 60
825
+ elif finding.estimated_effort.startswith("1-4"):
826
+ estimated_time += 180
827
+ elif finding.estimated_effort.startswith("2-8"):
828
+ estimated_time += 300
829
+ else:
830
+ estimated_time += 30
831
+ expected_health += 2 if finding.risk_level == "Low" else 3 if finding.risk_level == "Medium" else 4
832
+ return PrescriptionPlan(
833
+ findings=top[:10],
834
+ immediate_actions=immediate_actions[:5],
835
+ estimated_time_minutes=estimated_time,
836
+ expected_health_increase=min(20, expected_health),
837
+ )
838
+
839
+
840
+ def repair_plan(analysis: RepositoryAnalysis) -> list[RepairWeek]:
841
+ findings = prescribe_repository(analysis).findings
842
+ stages = [
843
+ (
844
+ "Week 1",
845
+ "Remove dead code and unused dependencies",
846
+ lambda item: item.problem.lower().startswith(("dead code", "unused", "dependency")),
847
+ ),
848
+ (
849
+ "Week 2",
850
+ "Consolidate duplicate logic and resolve TODOs",
851
+ lambda item: "duplicate" in item.problem.lower() or "todo" in item.problem.lower(),
852
+ ),
853
+ (
854
+ "Week 3",
855
+ "Split oversized modules and reduce structural risk",
856
+ lambda item: "structural" in item.problem.lower() or "architecture" in item.problem.lower() or "monster" in item.problem.lower(),
857
+ ),
858
+ (
859
+ "Week 4",
860
+ "Recover documentation, naming, and ownership",
861
+ lambda item: "documentation" in item.problem.lower() or "naming" in item.problem.lower() or "ownership" in item.problem.lower(),
862
+ ),
863
+ ]
864
+ plan: list[RepairWeek] = []
865
+ for index, (label, focus, matcher) in enumerate(stages, start=1):
866
+ items = [item for item in findings if matcher(item)]
867
+ actions = [f"{item.recommended_fix} ({item.location})" for item in items[:3]]
868
+ if not actions:
869
+ actions = [
870
+ "Re-scan after the previous week of cleanup",
871
+ "Review the highest-risk modules",
872
+ ]
873
+ health_target = min(99, analysis.summary.health_score + index * 4)
874
+ plan.append(
875
+ RepairWeek(
876
+ week=index,
877
+ focus=focus,
878
+ actions=actions,
879
+ expected_health=f"{analysis.summary.health_score} -> {health_target}",
880
+ )
881
+ )
882
+ return plan
883
+
884
+
885
+ def release_check(analysis: RepositoryAnalysis, baseline: MaintenanceSnapshot | None = None, limits: BudgetLimits | None = None) -> ReleaseCheck:
886
+ current = _snapshot_from_analysis(analysis)
887
+ budget = evaluate_budget(current, limits or BudgetLimits())
888
+ regression = compare_to_baseline(current, baseline) if baseline else None
889
+ warnings = list(analysis.summary.warnings)
890
+ blockers: list[str] = []
891
+ if budget.exceeded:
892
+ blockers.extend(f"{label}: {value} / {limit}" for label, value, limit in budget.exceeded)
893
+ if regression and regression.health_delta < 0:
894
+ blockers.append(f"Health dropped {abs(regression.health_delta)} points")
895
+ if current.health_score < 85:
896
+ warnings.append("Health score below release threshold")
897
+ score = current.health_score
898
+ score -= len(blockers) * 5
899
+ score -= max(0, len(warnings) - 3) * 2
900
+ score = max(0, min(100, score))
901
+ status = "Ready" if score >= 85 and not blockers else "Needs Work"
902
+ return ReleaseCheck(score=score, status=status, warnings=warnings, blockers=blockers, budget=budget, regression=regression)