devarch 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devarch/__init__.py +4 -0
- devarch/__main__.py +4 -0
- devarch/analyzers/__init__.py +2 -0
- devarch/analyzers/ancient.py +48 -0
- devarch/analyzers/dead_code.py +92 -0
- devarch/analyzers/duplicates.py +101 -0
- devarch/analyzers/health.py +60 -0
- devarch/analyzers/maintenance.py +902 -0
- devarch/analyzers/monsters.py +62 -0
- devarch/analyzers/recovery.py +338 -0
- devarch/analyzers/ruins.py +45 -0
- devarch/analyzers/suspicious.py +39 -0
- devarch/analyzers/todos.py +60 -0
- devarch/cli/__init__.py +2 -0
- devarch/cli/main.py +1708 -0
- devarch/models.py +43 -0
- devarch/plugins.py +29 -0
- devarch/reports/__init__.py +2 -0
- devarch/reports/exporters.py +274 -0
- devarch/scanner/__init__.py +2 -0
- devarch/scanner/core.py +15 -0
- devarch/scanner/discovery.py +84 -0
- devarch/scanner/intelligence.py +1559 -0
- devarch/utils/__init__.py +2 -0
- devarch/utils/fs.py +165 -0
- devarch/utils/git_info.py +64 -0
- devarch/utils/rich_ui.py +107 -0
- devarch/version.py +3 -0
- devarch-0.2.0.dist-info/METADATA +317 -0
- devarch-0.2.0.dist-info/RECORD +33 -0
- devarch-0.2.0.dist-info/WHEEL +4 -0
- devarch-0.2.0.dist-info/entry_points.txt +3 -0
- devarch-0.2.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,1559 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
from collections import Counter, defaultdict
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import subprocess
|
|
11
|
+
from typing import Iterable
|
|
12
|
+
|
|
13
|
+
from ..analyzers.ancient import find_ancient_files
|
|
14
|
+
from ..analyzers.dead_code import find_dead_code
|
|
15
|
+
from ..analyzers.duplicates import find_duplicates, similarity_report
|
|
16
|
+
from ..analyzers.health import calculate_health
|
|
17
|
+
from ..analyzers.monsters import find_monsters
|
|
18
|
+
from ..analyzers.ruins import find_empty_directories, find_unused_assets
|
|
19
|
+
from ..analyzers.suspicious import find_suspicious
|
|
20
|
+
from ..analyzers.todos import find_todos, todos_to_artifacts
|
|
21
|
+
from ..models import Artifact, ScanSummary
|
|
22
|
+
from ..utils.fs import RepoView, collect_repository, path_kind
|
|
23
|
+
from ..utils.git_info import GitSummary, collect_git_summary
|
|
24
|
+
from .discovery import build_reference_map, build_text_index
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
PY_IMPORT_RE = re.compile(r"^\s*from\s+([\w.]+)(?:\s+import|\s*$)|^\s*import\s+([\w.]+)", re.MULTILINE)
|
|
28
|
+
JS_IMPORT_RE = re.compile(r"""(?m)^\s*import\s+.*?\s+from\s+['"]([^'"]+)['"]|require\(['"]([^'"]+)['"]\)""")
|
|
29
|
+
RELATIVE_PREFIX_RE = re.compile(r"^(\.+)(.*)$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(slots=True)
|
|
33
|
+
class DependencyHub:
|
|
34
|
+
path: Path
|
|
35
|
+
referenced_by: int
|
|
36
|
+
depends_on: int
|
|
37
|
+
external_packages: list[str] = field(default_factory=list)
|
|
38
|
+
dependency_risk: str = "Low"
|
|
39
|
+
failure_impact: str = "Moderate"
|
|
40
|
+
confidence: float = 0.0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(slots=True)
|
|
44
|
+
class FamilyTree:
|
|
45
|
+
name: str
|
|
46
|
+
root: Path
|
|
47
|
+
children: list[Path] = field(default_factory=list)
|
|
48
|
+
parent_modules: list[Path] = field(default_factory=list)
|
|
49
|
+
inherited_classes: list[str] = field(default_factory=list)
|
|
50
|
+
major_chains: list[list[Path]] = field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(slots=True)
|
|
54
|
+
class CivilizationCluster:
|
|
55
|
+
name: str
|
|
56
|
+
files: list[Path]
|
|
57
|
+
referenced: int
|
|
58
|
+
last_active_days: int
|
|
59
|
+
status: str
|
|
60
|
+
confidence: float
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(slots=True)
|
|
64
|
+
class HeatmapBucket:
|
|
65
|
+
bucket: str
|
|
66
|
+
score: float
|
|
67
|
+
label: str
|
|
68
|
+
files: int
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(slots=True)
|
|
72
|
+
class PersonalityProfile:
|
|
73
|
+
type: str
|
|
74
|
+
traits: list[str]
|
|
75
|
+
risk: str
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(slots=True)
|
|
79
|
+
class ForecastProfile:
|
|
80
|
+
current_health: int
|
|
81
|
+
projected_6_months: int
|
|
82
|
+
projected_12_months: int
|
|
83
|
+
reason: str
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(slots=True)
|
|
87
|
+
class DNAProfile:
|
|
88
|
+
signature: list[str]
|
|
89
|
+
confidence: float
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass(slots=True)
|
|
93
|
+
class TimelineEra:
|
|
94
|
+
year: int
|
|
95
|
+
title: str
|
|
96
|
+
activity: int
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass(slots=True)
|
|
100
|
+
class InvestigationIncident:
|
|
101
|
+
incident: str
|
|
102
|
+
date: str
|
|
103
|
+
impact: str
|
|
104
|
+
outcome: str
|
|
105
|
+
risk: str
|
|
106
|
+
evidence: list[str] = field(default_factory=list)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(slots=True)
|
|
110
|
+
class StructuralWeakness:
|
|
111
|
+
path: Path
|
|
112
|
+
referenced_by: int
|
|
113
|
+
failure_impact: str
|
|
114
|
+
recovery_difficulty: str
|
|
115
|
+
confidence: float
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclass(slots=True)
|
|
119
|
+
class EarthquakeSimulation:
|
|
120
|
+
target: Path
|
|
121
|
+
projected_damage: int
|
|
122
|
+
subsystems_lost: int
|
|
123
|
+
severity: str
|
|
124
|
+
affected_files: list[Path] = field(default_factory=list)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(slots=True)
|
|
128
|
+
class ArchitectureClassification:
|
|
129
|
+
primary: str
|
|
130
|
+
secondary: str
|
|
131
|
+
confidence: float
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass(slots=True)
|
|
135
|
+
class ContributorOwnership:
|
|
136
|
+
area: str
|
|
137
|
+
owner: str
|
|
138
|
+
maintenance_owner: str
|
|
139
|
+
abandoned_owner: str
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass(slots=True)
|
|
143
|
+
class MutationEvent:
|
|
144
|
+
project_type: str
|
|
145
|
+
became: str
|
|
146
|
+
date: str
|
|
147
|
+
impact: str
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass(slots=True)
|
|
151
|
+
class KnowledgeMap:
|
|
152
|
+
core: list[str] = field(default_factory=list)
|
|
153
|
+
dependency_graph: list[str] = field(default_factory=list)
|
|
154
|
+
route_graph: list[str] = field(default_factory=list)
|
|
155
|
+
service_graph: list[str] = field(default_factory=list)
|
|
156
|
+
architecture_graph: list[str] = field(default_factory=list)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dataclass(slots=True)
|
|
160
|
+
class ContainmentZone:
|
|
161
|
+
location: str
|
|
162
|
+
complexity: int
|
|
163
|
+
spread_rate: str
|
|
164
|
+
recommendation: str
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass(slots=True)
|
|
168
|
+
class SurvivalProfile:
|
|
169
|
+
score: int
|
|
170
|
+
risk: str
|
|
171
|
+
single_point_failure: str
|
|
172
|
+
maintainability: int
|
|
173
|
+
recoverability: int
|
|
174
|
+
onboarding_difficulty: int
|
|
175
|
+
bus_factor: int
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@dataclass(slots=True)
|
|
179
|
+
class ForensicObservation:
|
|
180
|
+
observation: str
|
|
181
|
+
evidence: list[str] = field(default_factory=list)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass(slots=True)
|
|
185
|
+
class RepositoryIntelligence:
|
|
186
|
+
root: Path
|
|
187
|
+
view: RepoView
|
|
188
|
+
text_cache: dict[Path, str]
|
|
189
|
+
references: dict[Path, set[Path]]
|
|
190
|
+
dependencies: dict[Path, set[Path]]
|
|
191
|
+
reverse_dependencies: dict[Path, set[Path]]
|
|
192
|
+
external_packages: Counter[str]
|
|
193
|
+
dependency_hubs: list[DependencyHub]
|
|
194
|
+
dependency_cycles: list[list[Path]]
|
|
195
|
+
dependency_chains: list[list[Path]]
|
|
196
|
+
genealogy: list[FamilyTree]
|
|
197
|
+
civilizations: list[CivilizationCluster]
|
|
198
|
+
debt_heatmap: list[HeatmapBucket]
|
|
199
|
+
personality: PersonalityProfile
|
|
200
|
+
forecast: ForecastProfile
|
|
201
|
+
dna: DNAProfile
|
|
202
|
+
timeline_eras: list[TimelineEra]
|
|
203
|
+
ownership: dict[Path, str]
|
|
204
|
+
file_last_active_days: dict[Path, int]
|
|
205
|
+
artifact_confidence: dict[str, float]
|
|
206
|
+
graph_node_count: int
|
|
207
|
+
graph_edge_count: int
|
|
208
|
+
incidents: list[InvestigationIncident] = field(default_factory=list)
|
|
209
|
+
weaknesses: list[StructuralWeakness] = field(default_factory=list)
|
|
210
|
+
quake_simulation: EarthquakeSimulation | None = None
|
|
211
|
+
architecture: ArchitectureClassification | None = None
|
|
212
|
+
contributors: list[ContributorOwnership] = field(default_factory=list)
|
|
213
|
+
mutations: list[MutationEvent] = field(default_factory=list)
|
|
214
|
+
knowledge_map: KnowledgeMap = field(default_factory=KnowledgeMap)
|
|
215
|
+
containment_zones: list[ContainmentZone] = field(default_factory=list)
|
|
216
|
+
survival: SurvivalProfile | None = None
|
|
217
|
+
observations: list[ForensicObservation] = field(default_factory=list)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@dataclass(slots=True)
|
|
221
|
+
class RepositoryAnalysis:
|
|
222
|
+
summary: ScanSummary
|
|
223
|
+
intelligence: RepositoryIntelligence
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _git_run(root: Path, *args: str) -> str | None:
|
|
227
|
+
try:
|
|
228
|
+
result = subprocess.run(["git", "-C", str(root), *args], capture_output=True, text=True, check=True)
|
|
229
|
+
except (OSError, subprocess.CalledProcessError):
|
|
230
|
+
return None
|
|
231
|
+
return result.stdout.strip() or None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _python_module_key(root: Path, path: Path) -> str:
|
|
235
|
+
return ".".join(path.relative_to(root).with_suffix("").parts)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _package_keys(root: Path, path: Path) -> set[str]:
|
|
239
|
+
key = _python_module_key(root, path)
|
|
240
|
+
parts = key.split(".")
|
|
241
|
+
keys = {key, key.replace(".", "/"), path.name, path.stem}
|
|
242
|
+
for index in range(1, len(parts)):
|
|
243
|
+
prefix = ".".join(parts[:index])
|
|
244
|
+
keys.add(prefix)
|
|
245
|
+
keys.add(prefix.replace(".", "/"))
|
|
246
|
+
return {item for item in keys if item}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _resolve_relative_module(source: Path, target: str, root: Path) -> str | None:
|
|
250
|
+
match = RELATIVE_PREFIX_RE.match(target)
|
|
251
|
+
if not match:
|
|
252
|
+
return None
|
|
253
|
+
dots, remainder = match.groups()
|
|
254
|
+
package_parts = list(source.relative_to(root).parts[:-1])
|
|
255
|
+
for _ in range(max(len(dots) - 1, 0)):
|
|
256
|
+
if package_parts:
|
|
257
|
+
package_parts.pop()
|
|
258
|
+
if remainder:
|
|
259
|
+
package_parts.extend([part for part in remainder.split(".") if part])
|
|
260
|
+
return ".".join(package_parts)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _extract_python_dependencies(path: Path, content: str, root: Path) -> tuple[set[str], set[str], dict[str, list[str]]]:
|
|
264
|
+
internal: set[str] = set()
|
|
265
|
+
external: set[str] = set()
|
|
266
|
+
class_bases: dict[str, list[str]] = defaultdict(list)
|
|
267
|
+
try:
|
|
268
|
+
tree = ast.parse(content)
|
|
269
|
+
except SyntaxError:
|
|
270
|
+
return internal, external, class_bases
|
|
271
|
+
|
|
272
|
+
for node in ast.walk(tree):
|
|
273
|
+
if isinstance(node, ast.Import):
|
|
274
|
+
for alias in node.names:
|
|
275
|
+
name = alias.name
|
|
276
|
+
if name:
|
|
277
|
+
internal.add(name)
|
|
278
|
+
elif isinstance(node, ast.ImportFrom):
|
|
279
|
+
if node.module:
|
|
280
|
+
module_name = "." * node.level + node.module if node.level else node.module
|
|
281
|
+
else:
|
|
282
|
+
module_name = "." * node.level
|
|
283
|
+
if module_name.startswith("."):
|
|
284
|
+
resolved = _resolve_relative_module(path, module_name, root)
|
|
285
|
+
if resolved:
|
|
286
|
+
internal.add(resolved)
|
|
287
|
+
elif module_name:
|
|
288
|
+
internal.add(module_name)
|
|
289
|
+
elif isinstance(node, ast.ClassDef):
|
|
290
|
+
base_names: list[str] = []
|
|
291
|
+
for base in node.bases:
|
|
292
|
+
if isinstance(base, ast.Name):
|
|
293
|
+
base_names.append(base.id)
|
|
294
|
+
elif isinstance(base, ast.Attribute):
|
|
295
|
+
parts = []
|
|
296
|
+
current = base
|
|
297
|
+
while isinstance(current, ast.Attribute):
|
|
298
|
+
parts.append(current.attr)
|
|
299
|
+
current = current.value
|
|
300
|
+
if isinstance(current, ast.Name):
|
|
301
|
+
parts.append(current.id)
|
|
302
|
+
base_names.append(".".join(reversed(parts)))
|
|
303
|
+
if base_names:
|
|
304
|
+
class_bases[node.name].extend(base_names)
|
|
305
|
+
|
|
306
|
+
imported_modules = set()
|
|
307
|
+
for item in internal:
|
|
308
|
+
imported_modules.add(item.split(".")[0])
|
|
309
|
+
for node in ast.walk(tree):
|
|
310
|
+
if isinstance(node, ast.Import):
|
|
311
|
+
for alias in node.names:
|
|
312
|
+
name = alias.name.split(".")[0]
|
|
313
|
+
if name and name not in sys.stdlib_module_names:
|
|
314
|
+
external.add(name)
|
|
315
|
+
elif isinstance(node, ast.ImportFrom):
|
|
316
|
+
if node.module:
|
|
317
|
+
top = node.module.split(".")[0]
|
|
318
|
+
if top and top not in sys.stdlib_module_names and top not in imported_modules:
|
|
319
|
+
external.add(top)
|
|
320
|
+
return internal, external, class_bases
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _extract_js_dependencies(content: str) -> tuple[set[str], set[str]]:
|
|
324
|
+
internal: set[str] = set()
|
|
325
|
+
external: set[str] = set()
|
|
326
|
+
for match in JS_IMPORT_RE.finditer(content):
|
|
327
|
+
target = match.group(1) or match.group(2)
|
|
328
|
+
if not target:
|
|
329
|
+
continue
|
|
330
|
+
if target.startswith(".") or target.startswith("/"):
|
|
331
|
+
internal.add(target)
|
|
332
|
+
else:
|
|
333
|
+
external.add(target.split("/")[0])
|
|
334
|
+
return internal, external
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _build_module_index(view: RepoView) -> dict[str, Path]:
|
|
338
|
+
index: dict[str, Path] = {}
|
|
339
|
+
for path in view.files:
|
|
340
|
+
if path_kind(path) != "text":
|
|
341
|
+
continue
|
|
342
|
+
stem = path.stem
|
|
343
|
+
rel = path.relative_to(view.root).with_suffix("")
|
|
344
|
+
dotted = ".".join(rel.parts)
|
|
345
|
+
slash = rel.as_posix()
|
|
346
|
+
index[dotted] = path
|
|
347
|
+
index[slash] = path
|
|
348
|
+
index[stem] = path
|
|
349
|
+
if path.name == "__init__.py":
|
|
350
|
+
index[".".join(rel.parent.parts)] = path
|
|
351
|
+
index[rel.parent.as_posix()] = path
|
|
352
|
+
return index
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _resolve_target(source: Path, target: str, root: Path, module_index: dict[str, Path]) -> Path | None:
|
|
356
|
+
raw = target.strip()
|
|
357
|
+
if not raw:
|
|
358
|
+
return None
|
|
359
|
+
if raw.startswith("."):
|
|
360
|
+
relative = _resolve_relative_module(source, raw, root)
|
|
361
|
+
if relative:
|
|
362
|
+
candidate = module_index.get(relative) or module_index.get(relative.replace(".", "/"))
|
|
363
|
+
if candidate:
|
|
364
|
+
return candidate
|
|
365
|
+
for suffix in ("", ".py", ".js", ".ts", ".tsx", ".jsx", ".md"):
|
|
366
|
+
possible = root / relative.replace(".", "/")
|
|
367
|
+
if suffix and not str(possible).endswith(suffix):
|
|
368
|
+
possible = possible.with_suffix(suffix)
|
|
369
|
+
if possible.exists():
|
|
370
|
+
return possible.resolve()
|
|
371
|
+
return None
|
|
372
|
+
cleaned = raw.split(" as ", 1)[0].strip()
|
|
373
|
+
cleaned = cleaned.replace("/", ".")
|
|
374
|
+
if cleaned in module_index:
|
|
375
|
+
return module_index[cleaned]
|
|
376
|
+
parts = cleaned.split(".")
|
|
377
|
+
for index in range(len(parts), 0, -1):
|
|
378
|
+
prefix = ".".join(parts[:index])
|
|
379
|
+
if prefix in module_index:
|
|
380
|
+
return module_index[prefix]
|
|
381
|
+
for suffix in (".py", ".js", ".ts", ".tsx", ".jsx", ".md", "/__init__.py"):
|
|
382
|
+
candidate = root / cleaned.replace(".", "/")
|
|
383
|
+
if suffix == "/__init__.py":
|
|
384
|
+
possible = candidate / "__init__.py"
|
|
385
|
+
else:
|
|
386
|
+
possible = candidate.with_suffix(suffix) if candidate.suffix == "" else candidate
|
|
387
|
+
if possible.exists():
|
|
388
|
+
return possible.resolve()
|
|
389
|
+
return None
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _git_last_active_days(root: Path, path: Path, use_git: bool = True) -> int:
|
|
393
|
+
raw = _git_run(root, "log", "-1", "--format=%ct", "--", str(path.relative_to(root))) if use_git else None
|
|
394
|
+
if not raw:
|
|
395
|
+
try:
|
|
396
|
+
modified = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
|
|
397
|
+
except OSError:
|
|
398
|
+
return 0
|
|
399
|
+
return max((datetime.now(timezone.utc) - modified).days, 0)
|
|
400
|
+
timestamp = int(raw.splitlines()[0])
|
|
401
|
+
return max((datetime.now(timezone.utc) - datetime.fromtimestamp(timestamp, tz=timezone.utc)).days, 0)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _git_last_author(root: Path, path: Path) -> str:
|
|
405
|
+
raw = _git_run(root, "log", "-1", "--format=%an", "--", str(path.relative_to(root)))
|
|
406
|
+
return raw or "unknown"
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _cycle_dfs(start: Path, graph: dict[Path, set[Path]]) -> list[list[Path]]:
|
|
410
|
+
cycles: list[list[Path]] = []
|
|
411
|
+
stack: list[Path] = []
|
|
412
|
+
seen: set[Path] = set()
|
|
413
|
+
|
|
414
|
+
def visit(node: Path) -> None:
|
|
415
|
+
if node in stack:
|
|
416
|
+
index = stack.index(node)
|
|
417
|
+
cycle = stack[index:] + [node]
|
|
418
|
+
if len(cycle) > 2:
|
|
419
|
+
cycles.append(cycle)
|
|
420
|
+
return
|
|
421
|
+
if node in seen:
|
|
422
|
+
return
|
|
423
|
+
seen.add(node)
|
|
424
|
+
stack.append(node)
|
|
425
|
+
for child in graph.get(node, set()):
|
|
426
|
+
visit(child)
|
|
427
|
+
stack.pop()
|
|
428
|
+
|
|
429
|
+
visit(start)
|
|
430
|
+
return cycles
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _dependency_cycles(graph: dict[Path, set[Path]]) -> list[list[Path]]:
|
|
434
|
+
cycles: list[list[Path]] = []
|
|
435
|
+
for node in graph:
|
|
436
|
+
cycles.extend(_cycle_dfs(node, graph))
|
|
437
|
+
deduped: list[list[Path]] = []
|
|
438
|
+
seen: set[tuple[str, ...]] = set()
|
|
439
|
+
for cycle in cycles:
|
|
440
|
+
names = tuple(str(path) for path in cycle)
|
|
441
|
+
signature = tuple(sorted(names))
|
|
442
|
+
if signature not in seen:
|
|
443
|
+
seen.add(signature)
|
|
444
|
+
deduped.append(cycle)
|
|
445
|
+
return deduped
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _reachable_count(start: Path, graph: dict[Path, set[Path]]) -> int:
|
|
449
|
+
visited: set[Path] = set()
|
|
450
|
+
stack = [start]
|
|
451
|
+
while stack:
|
|
452
|
+
node = stack.pop()
|
|
453
|
+
for child in graph.get(node, set()):
|
|
454
|
+
if child not in visited and child != start:
|
|
455
|
+
visited.add(child)
|
|
456
|
+
stack.append(child)
|
|
457
|
+
return len(visited)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _longest_chain(start: Path, graph: dict[Path, set[Path]], limit: int = 8) -> list[Path]:
|
|
461
|
+
best: list[Path] = [start]
|
|
462
|
+
|
|
463
|
+
def visit(node: Path, chain: list[Path], seen: set[Path]) -> None:
|
|
464
|
+
nonlocal best
|
|
465
|
+
if len(chain) > len(best):
|
|
466
|
+
best = chain[:]
|
|
467
|
+
if len(chain) >= limit:
|
|
468
|
+
return
|
|
469
|
+
for child in graph.get(node, set()):
|
|
470
|
+
if child in seen:
|
|
471
|
+
continue
|
|
472
|
+
visit(child, chain + [child], seen | {child})
|
|
473
|
+
|
|
474
|
+
visit(start, [start], {start})
|
|
475
|
+
return best
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _cluster_name(paths: list[Path]) -> str:
|
|
479
|
+
lowered = " ".join(path.name.lower() for path in paths)
|
|
480
|
+
if "auth" in lowered:
|
|
481
|
+
return "Legacy Authentication System"
|
|
482
|
+
if "payment" in lowered or "billing" in lowered:
|
|
483
|
+
return "Abandoned Payment Flow"
|
|
484
|
+
if "admin" in lowered:
|
|
485
|
+
return "Forgotten Admin Panel"
|
|
486
|
+
if "api" in lowered or "v1" in lowered or "v2" in lowered:
|
|
487
|
+
return "Legacy API Version"
|
|
488
|
+
return "Lost Subsystem"
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _group_by_top_level(paths: Iterable[Path], root: Path) -> dict[str, list[Path]]:
|
|
492
|
+
groups: dict[str, list[Path]] = defaultdict(list)
|
|
493
|
+
for path in paths:
|
|
494
|
+
rel = path.relative_to(root)
|
|
495
|
+
group = rel.parts[0] if rel.parts else rel.parent.name
|
|
496
|
+
groups[group].append(path)
|
|
497
|
+
return groups
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _repository_dna(view: RepoView, text_cache: dict[Path, str], health_score: int) -> DNAProfile:
|
|
501
|
+
signature: list[str] = []
|
|
502
|
+
joined = "\n".join(text_cache.values()).lower()
|
|
503
|
+
names = " ".join(path.name.lower() for path in view.files)
|
|
504
|
+
if "fastapi" in joined or "fastapi" in names:
|
|
505
|
+
signature.append("FASTAPI")
|
|
506
|
+
if "django" in joined or "django" in names:
|
|
507
|
+
signature.append("DJANGO")
|
|
508
|
+
if "flask" in joined or "flask" in names:
|
|
509
|
+
signature.append("FLASK")
|
|
510
|
+
if "postgres" in joined or "psycopg" in joined or "sqlalchemy" in joined:
|
|
511
|
+
signature.append("POSTGRES")
|
|
512
|
+
if any(path.suffix.lower() in {".ts", ".tsx"} for path in view.files):
|
|
513
|
+
signature.append("TYPESCRIPT")
|
|
514
|
+
if any(path.suffix.lower() == ".py" for path in view.files):
|
|
515
|
+
signature.append("PYTHON")
|
|
516
|
+
if any("test" in path.parts for path in view.files):
|
|
517
|
+
signature.append("TEST_HEAVY")
|
|
518
|
+
if any(path.suffix.lower() in {".md", ".rst"} for path in view.files):
|
|
519
|
+
signature.append("DOCUMENTED")
|
|
520
|
+
if len(view.files) < 20:
|
|
521
|
+
signature.append("COMPACT")
|
|
522
|
+
else:
|
|
523
|
+
signature.append("MODULAR" if len({path.parent for path in view.files}) > 6 else "MONOLITHIC")
|
|
524
|
+
signature.append("HIGH_MAINTAINABILITY" if health_score >= 80 else "MEDIUM_MAINTAINABILITY" if health_score >= 60 else "LOW_MAINTAINABILITY")
|
|
525
|
+
signature.append("LOW_COMPLEXITY" if health_score >= 85 else "MEDIUM_COMPLEXITY" if health_score >= 60 else "HIGH_COMPLEXITY")
|
|
526
|
+
confidence = min(0.99, 0.65 + (health_score / 300))
|
|
527
|
+
return DNAProfile(signature=signature, confidence=round(confidence, 2))
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _architecture_classification(view: RepoView, intelligence: "RepositoryIntelligence | None" = None) -> ArchitectureClassification:
|
|
531
|
+
top_level = {path.relative_to(view.root).parts[0] for path in view.files if path.relative_to(view.root).parts}
|
|
532
|
+
joined = " ".join(sorted(top_level)).lower()
|
|
533
|
+
if any(token in joined for token in ("service", "worker", "queue")):
|
|
534
|
+
primary = "Service Oriented"
|
|
535
|
+
elif any(token in joined for token in ("controller", "view", "model", "route", "api")):
|
|
536
|
+
primary = "Layered Monolith"
|
|
537
|
+
elif len(top_level) >= 8 and sum(1 for path in view.files if path.suffix.lower() in {".py", ".ts", ".tsx"}) > 20:
|
|
538
|
+
primary = "Modular Monolith"
|
|
539
|
+
else:
|
|
540
|
+
primary = "Prototype"
|
|
541
|
+
|
|
542
|
+
if intelligence and intelligence.dependency_cycles:
|
|
543
|
+
secondary = "Event Driven"
|
|
544
|
+
elif any(path.name.startswith("test_") for path in view.files):
|
|
545
|
+
secondary = "Domain Driven Design"
|
|
546
|
+
elif any("service" in part.lower() for part in top_level):
|
|
547
|
+
secondary = "Service Oriented"
|
|
548
|
+
else:
|
|
549
|
+
secondary = "Layered"
|
|
550
|
+
confidence = 0.82 if primary != "Prototype" else 0.68
|
|
551
|
+
return ArchitectureClassification(primary=primary, secondary=secondary, confidence=confidence)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def _contributors(root: Path, ownership: dict[Path, str], dependencies: dict[Path, set[Path]]) -> list[ContributorOwnership]:
|
|
555
|
+
by_area: dict[str, Counter[str]] = defaultdict(Counter)
|
|
556
|
+
for path, owner in ownership.items():
|
|
557
|
+
area = path.relative_to(root).parts[0] if path.relative_to(root).parts else path.stem
|
|
558
|
+
by_area[area][owner] += 1
|
|
559
|
+
contributors: list[ContributorOwnership] = []
|
|
560
|
+
for area, counts in sorted(by_area.items()):
|
|
561
|
+
owner, _ = counts.most_common(1)[0]
|
|
562
|
+
maintenance_owner = owner if owner != "unknown" else "unknown"
|
|
563
|
+
abandoned_owner = "unknown" if counts.get("unknown", 0) else owner
|
|
564
|
+
contributors.append(
|
|
565
|
+
ContributorOwnership(
|
|
566
|
+
area=area,
|
|
567
|
+
owner=owner,
|
|
568
|
+
maintenance_owner=maintenance_owner,
|
|
569
|
+
abandoned_owner=abandoned_owner,
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
return contributors
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def _mutations(root: Path, view: RepoView, git_summary: GitSummary) -> list[MutationEvent]:
|
|
576
|
+
if not git_summary.available:
|
|
577
|
+
heuristics: list[MutationEvent] = []
|
|
578
|
+
names = " ".join(path.name.lower() for path in view.files)
|
|
579
|
+
if "legacy" in names or "migration" in names:
|
|
580
|
+
heuristics.append(
|
|
581
|
+
MutationEvent(
|
|
582
|
+
project_type="CLI",
|
|
583
|
+
became="Hybrid / Transitional System",
|
|
584
|
+
date=datetime.now(timezone.utc).strftime("%Y-%m"),
|
|
585
|
+
impact="Medium",
|
|
586
|
+
)
|
|
587
|
+
)
|
|
588
|
+
return heuristics
|
|
589
|
+
|
|
590
|
+
raw = _git_run(root, "log", "--reverse", "--format=%ad", "--date=format:%Y-%m", "--name-only")
|
|
591
|
+
if not raw:
|
|
592
|
+
return []
|
|
593
|
+
wave_counts: Counter[str] = Counter()
|
|
594
|
+
for line in raw.splitlines():
|
|
595
|
+
if re.match(r"^\d{4}-\d{2}$", line.strip()):
|
|
596
|
+
wave_counts[line.strip()] += 1
|
|
597
|
+
if not wave_counts:
|
|
598
|
+
return []
|
|
599
|
+
year_month, count = wave_counts.most_common(1)[0]
|
|
600
|
+
impact = "High" if count >= 20 else "Medium"
|
|
601
|
+
return [
|
|
602
|
+
MutationEvent(
|
|
603
|
+
project_type="CLI",
|
|
604
|
+
became="Web Platform" if count >= 20 else "Growing Platform",
|
|
605
|
+
date=year_month,
|
|
606
|
+
impact=impact,
|
|
607
|
+
)
|
|
608
|
+
]
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def _knowledge_map(view: RepoView, dependency_hubs: list[DependencyHub], architecture: ArchitectureClassification) -> KnowledgeMap:
|
|
612
|
+
core = sorted({path.relative_to(view.root).parts[0] for path in view.files if path.relative_to(view.root).parts})[:8]
|
|
613
|
+
dep_graph = [f"{hub.path.name} -> {hub.depends_on} deps" for hub in dependency_hubs[:10]]
|
|
614
|
+
route_graph = [f"{path.name}" for path in view.files if "route" in path.name.lower() or "api" in path.name.lower()]
|
|
615
|
+
service_graph = [f"{path.name}" for path in view.files if "service" in path.name.lower() or "worker" in path.name.lower()]
|
|
616
|
+
architecture_graph = [f"{architecture.primary} -> {architecture.secondary}"]
|
|
617
|
+
return KnowledgeMap(
|
|
618
|
+
core=core,
|
|
619
|
+
dependency_graph=dep_graph,
|
|
620
|
+
route_graph=route_graph,
|
|
621
|
+
service_graph=service_graph,
|
|
622
|
+
architecture_graph=architecture_graph,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def _containment_zones(view: RepoView, text_cache: dict[Path, str], dependencies: dict[Path, set[Path]], reverse_dependencies: dict[Path, set[Path]], artifacts: list[Artifact]) -> list[ContainmentZone]:
|
|
627
|
+
zones: list[ContainmentZone] = []
|
|
628
|
+
grouped = _group_by_top_level(view.files, view.root)
|
|
629
|
+
monster_paths = {artifact.path for artifact in artifacts if artifact.kind == "monster_file"}
|
|
630
|
+
duplicate_paths = {artifact.path for artifact in artifacts if artifact.kind == "duplicate_block"}
|
|
631
|
+
for location, paths in grouped.items():
|
|
632
|
+
complexity = 0
|
|
633
|
+
complexity += sum(text_cache.get(path, "").count("if ") + text_cache.get(path, "").count("for ") + text_cache.get(path, "").count("while ") for path in paths)
|
|
634
|
+
complexity += sum(len(dependencies.get(path, set())) + len(reverse_dependencies.get(path, set())) for path in paths)
|
|
635
|
+
complexity += sum(2 for path in paths if path in monster_paths)
|
|
636
|
+
complexity += sum(1 for path in paths if path in duplicate_paths)
|
|
637
|
+
if complexity == 0:
|
|
638
|
+
continue
|
|
639
|
+
if complexity >= 40:
|
|
640
|
+
spread = "Increasing"
|
|
641
|
+
rec = "Immediate Refactor"
|
|
642
|
+
elif complexity >= 20:
|
|
643
|
+
spread = "Moderate"
|
|
644
|
+
rec = "Contain and simplify"
|
|
645
|
+
else:
|
|
646
|
+
spread = "Stable"
|
|
647
|
+
rec = "Monitor"
|
|
648
|
+
zones.append(
|
|
649
|
+
ContainmentZone(
|
|
650
|
+
location=location,
|
|
651
|
+
complexity=min(100, complexity),
|
|
652
|
+
spread_rate=spread,
|
|
653
|
+
recommendation=rec,
|
|
654
|
+
)
|
|
655
|
+
)
|
|
656
|
+
return sorted(zones, key=lambda item: item.complexity, reverse=True)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _survival_score(summary: ScanSummary, intelligence: "RepositoryIntelligence", contributors: list[ContributorOwnership]) -> SurvivalProfile:
|
|
660
|
+
maintainability = max(0, min(100, summary.health_score))
|
|
661
|
+
recoverability = max(0, min(100, 100 - len(intelligence.dependency_cycles) * 8 - len(intelligence.civilizations) * 6 - len(intelligence.weaknesses) * 4))
|
|
662
|
+
onboarding = max(0, min(100, len(intelligence.weaknesses) * 10 + len(intelligence.dependency_hubs) * 2 + (100 - summary.health_score) // 2))
|
|
663
|
+
bus_factor = max(1, min(5, len({item.owner for item in contributors if item.owner != "unknown"})))
|
|
664
|
+
score = round((maintainability * 0.4 + recoverability * 0.25 + (100 - onboarding) * 0.2 + bus_factor * 5 * 0.15))
|
|
665
|
+
if score >= 80:
|
|
666
|
+
risk = "Low"
|
|
667
|
+
elif score >= 60:
|
|
668
|
+
risk = "Moderate"
|
|
669
|
+
elif score >= 40:
|
|
670
|
+
risk = "High"
|
|
671
|
+
else:
|
|
672
|
+
risk = "Critical"
|
|
673
|
+
single_point = intelligence.weaknesses[0].path.name if intelligence.weaknesses else (intelligence.dependency_hubs[0].path.name if intelligence.dependency_hubs else "Unknown")
|
|
674
|
+
return SurvivalProfile(
|
|
675
|
+
score=score,
|
|
676
|
+
risk=risk,
|
|
677
|
+
single_point_failure=single_point,
|
|
678
|
+
maintainability=maintainability,
|
|
679
|
+
recoverability=recoverability,
|
|
680
|
+
onboarding_difficulty=onboarding,
|
|
681
|
+
bus_factor=bus_factor,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def _observations(
|
|
686
|
+
intelligence: "RepositoryIntelligence",
|
|
687
|
+
architecture: ArchitectureClassification,
|
|
688
|
+
investigations: list[InvestigationIncident],
|
|
689
|
+
weaknesses: list[StructuralWeakness],
|
|
690
|
+
civilizations: list[CivilizationCluster],
|
|
691
|
+
) -> list[ForensicObservation]:
|
|
692
|
+
notes: list[ForensicObservation] = []
|
|
693
|
+
if civilizations:
|
|
694
|
+
notes.append(
|
|
695
|
+
ForensicObservation(
|
|
696
|
+
observation=f"The repository contains {len(civilizations)} partially abandoned system cluster(s).",
|
|
697
|
+
evidence=[civ.name for civ in civilizations[:3]],
|
|
698
|
+
)
|
|
699
|
+
)
|
|
700
|
+
if weaknesses:
|
|
701
|
+
notes.append(
|
|
702
|
+
ForensicObservation(
|
|
703
|
+
observation=f"{len(weaknesses)} structural bottleneck(s) concentrate failure risk in a few modules.",
|
|
704
|
+
evidence=[str(item.path) for item in weaknesses[:3]],
|
|
705
|
+
)
|
|
706
|
+
)
|
|
707
|
+
if investigations:
|
|
708
|
+
notes.append(
|
|
709
|
+
ForensicObservation(
|
|
710
|
+
observation="Evidence suggests a migration or refactor has occurred without fully retiring the old path.",
|
|
711
|
+
evidence=[incident.incident for incident in investigations[:2]],
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
notes.append(
|
|
715
|
+
ForensicObservation(
|
|
716
|
+
observation=f"The repository most closely resembles a {architecture.primary.lower()}.",
|
|
717
|
+
evidence=[architecture.secondary, f"confidence={architecture.confidence:.0%}"],
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
if intelligence.dependency_cycles:
|
|
721
|
+
notes.append(
|
|
722
|
+
ForensicObservation(
|
|
723
|
+
observation="Cyclic dependencies indicate architectural pressure and constrained change paths.",
|
|
724
|
+
evidence=[f"cycles={len(intelligence.dependency_cycles)}"],
|
|
725
|
+
)
|
|
726
|
+
)
|
|
727
|
+
return notes
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def _classify_personality(
|
|
731
|
+
*,
|
|
732
|
+
health_score: int,
|
|
733
|
+
commit_count: int,
|
|
734
|
+
file_count: int,
|
|
735
|
+
monster_count: int,
|
|
736
|
+
duplicate_count: int,
|
|
737
|
+
ancient_count: int,
|
|
738
|
+
dependency_cycles: int,
|
|
739
|
+
external_packages: int,
|
|
740
|
+
) -> PersonalityProfile:
|
|
741
|
+
if file_count <= 15 and commit_count < 20:
|
|
742
|
+
return PersonalityProfile(type="Prototype", traits=["Small surface area", "Fast-moving changes", "Minimal bureaucracy"], risk="Volatile")
|
|
743
|
+
if ancient_count and dependency_cycles and monster_count:
|
|
744
|
+
return PersonalityProfile(type="Fortress", traits=["Defensive layers", "Legacy defenses", "High inertia"], risk="Accumulated complexity")
|
|
745
|
+
if external_packages > 20 and file_count > 50 and health_score >= 70:
|
|
746
|
+
return PersonalityProfile(type="Enterprise", traits=["Structured layering", "Many integrations", "Policy driven"], risk="Integration drag")
|
|
747
|
+
if monster_count > 0 and duplicate_count > 0 and health_score < 75:
|
|
748
|
+
return PersonalityProfile(type="Startup", traits=["Rapid experimentation", "High feature growth", "Moderate organization"], risk="Accumulating technical debt")
|
|
749
|
+
if commit_count > 100 and file_count > 80 and health_score >= 75:
|
|
750
|
+
return PersonalityProfile(type="Scientist", traits=["Iterative exploration", "Measured evolution", "Strong evidence trail"], risk="Analysis overhead")
|
|
751
|
+
if file_count > 50 and dependency_cycles == 0 and external_packages < 12:
|
|
752
|
+
return PersonalityProfile(type="Architect", traits=["Clear boundaries", "Intentional structure", "Stable modules"], risk="Rigid change paths")
|
|
753
|
+
if commit_count > 60 and file_count > 40:
|
|
754
|
+
return PersonalityProfile(type="Explorer", traits=["Rapid experimentation", "High feature growth", "Moderate organization"], risk="Moderate technical debt")
|
|
755
|
+
return PersonalityProfile(type="Research Lab", traits=["Experimental paths", "Multiple branches of thought", "Evolving structure"], risk="Discovery overhead")
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
def _forecast(health_score: int, dependency_cycles: int, monster_count: int, duplicate_count: int, ancient_count: int) -> ForecastProfile:
|
|
759
|
+
drift = dependency_cycles * 3 + monster_count * 4 + duplicate_count * 2 + ancient_count
|
|
760
|
+
projected_6 = max(0, min(100, health_score - drift - 4))
|
|
761
|
+
projected_12 = max(0, min(100, health_score - drift - 10))
|
|
762
|
+
if drift:
|
|
763
|
+
reason = "Increasing dependency growth and structural debt"
|
|
764
|
+
else:
|
|
765
|
+
reason = "Stable graph and limited debt signals"
|
|
766
|
+
return ForecastProfile(
|
|
767
|
+
current_health=health_score,
|
|
768
|
+
projected_6_months=projected_6,
|
|
769
|
+
projected_12_months=projected_12,
|
|
770
|
+
reason=reason,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _timeline_eras(root: Path) -> list[TimelineEra]:
|
|
775
|
+
raw = _git_run(root, "log", "--reverse", "--format=%ad", "--date=format:%Y")
|
|
776
|
+
if not raw:
|
|
777
|
+
return []
|
|
778
|
+
counts: Counter[int] = Counter()
|
|
779
|
+
for line in raw.splitlines():
|
|
780
|
+
if line.strip().isdigit():
|
|
781
|
+
counts[int(line.strip())] += 1
|
|
782
|
+
if not counts:
|
|
783
|
+
return []
|
|
784
|
+
eras: list[TimelineEra] = []
|
|
785
|
+
for year, activity in sorted(counts.items()):
|
|
786
|
+
if activity <= 4:
|
|
787
|
+
title = "Foundation Era"
|
|
788
|
+
elif activity <= 15:
|
|
789
|
+
title = "Expansion Era"
|
|
790
|
+
elif activity <= 30:
|
|
791
|
+
title = "Growth Era"
|
|
792
|
+
else:
|
|
793
|
+
title = "Feature Explosion Era"
|
|
794
|
+
eras.append(TimelineEra(year=year, title=title, activity=activity))
|
|
795
|
+
if len(eras) >= 2 and eras[-1].activity <= 6:
|
|
796
|
+
eras[-1].title = "Maintenance Era"
|
|
797
|
+
return eras
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _dependency_heatmap(
|
|
801
|
+
view: RepoView,
|
|
802
|
+
text_cache: dict[Path, str],
|
|
803
|
+
dependencies: dict[Path, set[Path]],
|
|
804
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
805
|
+
artifacts: list[Artifact],
|
|
806
|
+
) -> list[HeatmapBucket]:
|
|
807
|
+
groups = _group_by_top_level(view.files, view.root)
|
|
808
|
+
by_path_artifacts = defaultdict(list)
|
|
809
|
+
for artifact in artifacts:
|
|
810
|
+
by_path_artifacts[artifact.path].append(artifact)
|
|
811
|
+
|
|
812
|
+
buckets: list[HeatmapBucket] = []
|
|
813
|
+
for group, paths in groups.items():
|
|
814
|
+
todo_density = sum(text_cache.get(path, "").count("TODO") + text_cache.get(path, "").count("FIXME") for path in paths)
|
|
815
|
+
monster_weight = sum(1 for path in paths for artifact in by_path_artifacts.get(path, []) if artifact.kind == "monster_file")
|
|
816
|
+
ancient_weight = sum(1 for path in paths for artifact in by_path_artifacts.get(path, []) if artifact.kind == "ancient_file")
|
|
817
|
+
duplicate_weight = sum(1 for path in paths for artifact in by_path_artifacts.get(path, []) if artifact.kind == "duplicate_block")
|
|
818
|
+
incoming = sum(len(reverse_dependencies.get(path, set())) for path in paths)
|
|
819
|
+
outgoing = sum(len(dependencies.get(path, set())) for path in paths)
|
|
820
|
+
score = todo_density * 0.7 + monster_weight * 3 + ancient_weight * 2 + duplicate_weight * 2 + incoming * 0.4 + outgoing * 0.25
|
|
821
|
+
if score >= 20:
|
|
822
|
+
label = "Severe"
|
|
823
|
+
elif score >= 12:
|
|
824
|
+
label = "High"
|
|
825
|
+
elif score >= 6:
|
|
826
|
+
label = "Moderate"
|
|
827
|
+
else:
|
|
828
|
+
label = "Light"
|
|
829
|
+
buckets.append(HeatmapBucket(bucket=group, score=round(score, 1), label=label, files=len(paths)))
|
|
830
|
+
return sorted(buckets, key=lambda item: item.score, reverse=True)
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def _family_trees(
|
|
834
|
+
view: RepoView,
|
|
835
|
+
root: Path,
|
|
836
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
837
|
+
dependencies: dict[Path, set[Path]],
|
|
838
|
+
class_bases: dict[Path, dict[str, list[str]]],
|
|
839
|
+
) -> list[FamilyTree]:
|
|
840
|
+
trees: list[FamilyTree] = []
|
|
841
|
+
grouped = _group_by_top_level(view.files, root)
|
|
842
|
+
for group, paths in grouped.items():
|
|
843
|
+
if len(paths) < 2:
|
|
844
|
+
continue
|
|
845
|
+
ordered = sorted(paths, key=lambda path: (len(reverse_dependencies.get(path, set())), len(dependencies.get(path, set()))), reverse=True)
|
|
846
|
+
main = ordered[0]
|
|
847
|
+
inherited: list[str] = []
|
|
848
|
+
for mapping in class_bases.values():
|
|
849
|
+
for class_name, bases in mapping.items():
|
|
850
|
+
for base in bases:
|
|
851
|
+
inherited.append(f"{class_name} -> {base}")
|
|
852
|
+
trees.append(
|
|
853
|
+
FamilyTree(
|
|
854
|
+
name=f"{group.title()} Family",
|
|
855
|
+
root=main,
|
|
856
|
+
children=ordered[1:5],
|
|
857
|
+
parent_modules=sorted(list(reverse_dependencies.get(main, set())), key=lambda p: str(p))[:5],
|
|
858
|
+
inherited_classes=inherited[:10],
|
|
859
|
+
major_chains=[],
|
|
860
|
+
)
|
|
861
|
+
)
|
|
862
|
+
return trees
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _civilizations(
|
|
866
|
+
view: RepoView,
|
|
867
|
+
root: Path,
|
|
868
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
869
|
+
file_last_active_days: dict[Path, int],
|
|
870
|
+
) -> list[CivilizationCluster]:
|
|
871
|
+
clusters: list[CivilizationCluster] = []
|
|
872
|
+
grouped = _group_by_top_level(view.files, root)
|
|
873
|
+
for group, paths in grouped.items():
|
|
874
|
+
referenced = sum(1 for path in paths if reverse_dependencies.get(path))
|
|
875
|
+
last_active = max((file_last_active_days.get(path, 0) for path in paths), default=0)
|
|
876
|
+
if len(paths) < 3 or last_active < 120:
|
|
877
|
+
continue
|
|
878
|
+
extinct = referenced == 0 and last_active >= 365
|
|
879
|
+
dormant = referenced <= max(1, len(paths) // 4) and last_active >= 180
|
|
880
|
+
if not (extinct or dormant):
|
|
881
|
+
continue
|
|
882
|
+
name = _cluster_name(paths)
|
|
883
|
+
status = "Extinct" if extinct else "Dormant"
|
|
884
|
+
confidence = 0.88 if extinct else 0.76
|
|
885
|
+
clusters.append(
|
|
886
|
+
CivilizationCluster(
|
|
887
|
+
name=name,
|
|
888
|
+
files=sorted(paths),
|
|
889
|
+
referenced=referenced,
|
|
890
|
+
last_active_days=last_active,
|
|
891
|
+
status=status,
|
|
892
|
+
confidence=confidence,
|
|
893
|
+
)
|
|
894
|
+
)
|
|
895
|
+
return sorted(clusters, key=lambda item: (item.last_active_days, item.referenced), reverse=True)
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def _dependency_hubs(
|
|
899
|
+
view: RepoView,
|
|
900
|
+
dependencies: dict[Path, set[Path]],
|
|
901
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
902
|
+
external_packages: dict[Path, set[str]],
|
|
903
|
+
) -> list[DependencyHub]:
|
|
904
|
+
hubs: list[DependencyHub] = []
|
|
905
|
+
for path in view.files:
|
|
906
|
+
if path_kind(path) == "binary":
|
|
907
|
+
continue
|
|
908
|
+
referenced_by = len(reverse_dependencies.get(path, set()))
|
|
909
|
+
depends_on = len(dependencies.get(path, set()))
|
|
910
|
+
ext = sorted(external_packages.get(path, set()))
|
|
911
|
+
if referenced_by >= 5 or depends_on >= 8 or ext:
|
|
912
|
+
impact = "Critical" if referenced_by >= 20 or depends_on >= 15 else "High" if referenced_by >= 8 or depends_on >= 10 else "Moderate"
|
|
913
|
+
risk = "High" if referenced_by >= 10 or depends_on >= 10 else "Medium"
|
|
914
|
+
hubs.append(
|
|
915
|
+
DependencyHub(
|
|
916
|
+
path=path,
|
|
917
|
+
referenced_by=referenced_by,
|
|
918
|
+
depends_on=depends_on,
|
|
919
|
+
external_packages=ext[:8],
|
|
920
|
+
dependency_risk=risk,
|
|
921
|
+
failure_impact=impact,
|
|
922
|
+
confidence=min(0.99, 0.6 + referenced_by * 0.015 + depends_on * 0.01),
|
|
923
|
+
)
|
|
924
|
+
)
|
|
925
|
+
return sorted(hubs, key=lambda item: (item.referenced_by, item.depends_on), reverse=True)
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
def _dependency_chains(dependencies: dict[Path, set[Path]], hubs: list[DependencyHub]) -> list[list[Path]]:
|
|
929
|
+
chains: list[list[Path]] = []
|
|
930
|
+
for hub in hubs[:10]:
|
|
931
|
+
chain = _longest_chain(hub.path, dependencies)
|
|
932
|
+
if len(chain) > 1:
|
|
933
|
+
chains.append(chain)
|
|
934
|
+
return chains
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def _structural_weaknesses(view: RepoView, dependencies: dict[Path, set[Path]], reverse_dependencies: dict[Path, set[Path]], dependency_hubs: list[DependencyHub]) -> list[StructuralWeakness]:
|
|
938
|
+
weaknesses: list[StructuralWeakness] = []
|
|
939
|
+
for hub in dependency_hubs[:12]:
|
|
940
|
+
if hub.referenced_by >= 8 and (hub.depends_on <= 2 or not dependencies.get(hub.path)):
|
|
941
|
+
difficulty = "High" if hub.depends_on > 0 else "Medium"
|
|
942
|
+
impact = "Severe" if hub.referenced_by >= 20 else "High"
|
|
943
|
+
weaknesses.append(
|
|
944
|
+
StructuralWeakness(
|
|
945
|
+
path=hub.path,
|
|
946
|
+
referenced_by=hub.referenced_by,
|
|
947
|
+
failure_impact=impact,
|
|
948
|
+
recovery_difficulty=difficulty,
|
|
949
|
+
confidence=min(0.99, hub.confidence + 0.1),
|
|
950
|
+
)
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
for path, dependents in sorted(reverse_dependencies.items(), key=lambda item: len(item[1]), reverse=True):
|
|
954
|
+
if len(dependents) >= 25 and path not in {weak.path for weak in weaknesses}:
|
|
955
|
+
weaknesses.append(
|
|
956
|
+
StructuralWeakness(
|
|
957
|
+
path=path,
|
|
958
|
+
referenced_by=len(dependents),
|
|
959
|
+
failure_impact="Severe" if len(dependents) >= 50 else "High",
|
|
960
|
+
recovery_difficulty="High",
|
|
961
|
+
confidence=0.9,
|
|
962
|
+
)
|
|
963
|
+
)
|
|
964
|
+
return weaknesses[:10]
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
def _quake_simulation(
|
|
968
|
+
view: RepoView,
|
|
969
|
+
dependencies: dict[Path, set[Path]],
|
|
970
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
971
|
+
target: str | None = None,
|
|
972
|
+
) -> EarthquakeSimulation | None:
|
|
973
|
+
module_index = _build_module_index(view)
|
|
974
|
+
selected: Path | None = None
|
|
975
|
+
if target:
|
|
976
|
+
selected = module_index.get(target) or module_index.get(target.replace("/", "."))
|
|
977
|
+
if not selected:
|
|
978
|
+
for path in view.files:
|
|
979
|
+
if path.name == target or str(path).endswith(target):
|
|
980
|
+
selected = path
|
|
981
|
+
break
|
|
982
|
+
if selected is None:
|
|
983
|
+
if reverse_dependencies:
|
|
984
|
+
selected = max(reverse_dependencies.items(), key=lambda item: len(item[1]))[0]
|
|
985
|
+
elif dependencies:
|
|
986
|
+
selected = max(dependencies.items(), key=lambda item: len(item[1]))[0]
|
|
987
|
+
else:
|
|
988
|
+
return None
|
|
989
|
+
|
|
990
|
+
affected: set[Path] = set()
|
|
991
|
+
stack = [selected]
|
|
992
|
+
while stack:
|
|
993
|
+
node = stack.pop()
|
|
994
|
+
for child in reverse_dependencies.get(node, set()):
|
|
995
|
+
if child not in affected:
|
|
996
|
+
affected.add(child)
|
|
997
|
+
stack.append(child)
|
|
998
|
+
|
|
999
|
+
subsystem_count = len({path.relative_to(view.root).parts[0] for path in affected if path.relative_to(view.root).parts})
|
|
1000
|
+
severity = "Catastrophic" if len(affected) >= 25 else "Severe" if len(affected) >= 10 else "High" if len(affected) >= 4 else "Moderate"
|
|
1001
|
+
return EarthquakeSimulation(
|
|
1002
|
+
target=selected,
|
|
1003
|
+
projected_damage=len(affected),
|
|
1004
|
+
subsystems_lost=subsystem_count,
|
|
1005
|
+
severity=severity,
|
|
1006
|
+
affected_files=sorted(affected)[:30],
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def _investigation_incidents(
|
|
1011
|
+
view: RepoView,
|
|
1012
|
+
text_cache: dict[Path, str],
|
|
1013
|
+
dependencies: dict[Path, set[Path]],
|
|
1014
|
+
reverse_dependencies: dict[Path, set[Path]],
|
|
1015
|
+
git_summary: GitSummary,
|
|
1016
|
+
) -> list[InvestigationIncident]:
|
|
1017
|
+
evidence: list[str] = []
|
|
1018
|
+
dangerous_patterns = {
|
|
1019
|
+
"eval(": "unsafe dynamic evaluation",
|
|
1020
|
+
"exec(": "runtime code execution",
|
|
1021
|
+
"pickle.load": "unsafe deserialization",
|
|
1022
|
+
"yaml.load": "unsafe YAML loading",
|
|
1023
|
+
"shell=True": "shell injection surface",
|
|
1024
|
+
"os.system(": "shell execution",
|
|
1025
|
+
"subprocess.Popen": "process spawning",
|
|
1026
|
+
}
|
|
1027
|
+
for path, content in text_cache.items():
|
|
1028
|
+
for needle, description in dangerous_patterns.items():
|
|
1029
|
+
if needle in content:
|
|
1030
|
+
evidence.append(f"{path.name}: {description}")
|
|
1031
|
+
file_explosion = sorted(
|
|
1032
|
+
(
|
|
1033
|
+
(group, len(paths))
|
|
1034
|
+
for group, paths in _group_by_top_level(view.files, view.root).items()
|
|
1035
|
+
if len(paths) >= 8
|
|
1036
|
+
),
|
|
1037
|
+
key=lambda item: item[1],
|
|
1038
|
+
reverse=True,
|
|
1039
|
+
)
|
|
1040
|
+
if file_explosion:
|
|
1041
|
+
evidence.append(f"File explosion in {file_explosion[0][0]} ({file_explosion[0][1]} files)")
|
|
1042
|
+
migration_names = [path for path in view.files if any(token in path.name.lower() for token in ("migration", "refactor", "legacy", "v2", "v3"))]
|
|
1043
|
+
if migration_names:
|
|
1044
|
+
evidence.append(f"{len(migration_names)} migration/refactor-era files detected")
|
|
1045
|
+
|
|
1046
|
+
if not evidence:
|
|
1047
|
+
return [
|
|
1048
|
+
InvestigationIncident(
|
|
1049
|
+
incident="Repository Baseline",
|
|
1050
|
+
date=git_summary.last_commit.date().isoformat() if git_summary.last_commit else datetime.now(timezone.utc).date().isoformat(),
|
|
1051
|
+
impact=f"{len(view.files)} files scanned",
|
|
1052
|
+
outcome="No obvious incident cluster detected",
|
|
1053
|
+
risk="Low",
|
|
1054
|
+
evidence=["No dangerous patterns observed in source files"],
|
|
1055
|
+
)
|
|
1056
|
+
]
|
|
1057
|
+
|
|
1058
|
+
if git_summary.available and git_summary.last_commit:
|
|
1059
|
+
incident_date = git_summary.last_commit.date().isoformat()
|
|
1060
|
+
else:
|
|
1061
|
+
incident_date = datetime.now(timezone.utc).date().isoformat()
|
|
1062
|
+
|
|
1063
|
+
impact = f"{max(1, len(evidence))} indicators found"
|
|
1064
|
+
if migration_names and file_explosion:
|
|
1065
|
+
outcome = "Legacy system partially abandoned"
|
|
1066
|
+
risk = "High"
|
|
1067
|
+
incident = "Authentication Refactor" if any("auth" in path.name.lower() for path in migration_names) else "Structural Migration"
|
|
1068
|
+
elif evidence:
|
|
1069
|
+
outcome = "Suspicious architectural change patterns detected"
|
|
1070
|
+
risk = "Medium"
|
|
1071
|
+
incident = "Incident Cluster"
|
|
1072
|
+
else:
|
|
1073
|
+
outcome = "Repository appears stable"
|
|
1074
|
+
risk = "Low"
|
|
1075
|
+
incident = "Baseline"
|
|
1076
|
+
return [
|
|
1077
|
+
InvestigationIncident(
|
|
1078
|
+
incident=incident,
|
|
1079
|
+
date=incident_date,
|
|
1080
|
+
impact=impact,
|
|
1081
|
+
outcome=outcome,
|
|
1082
|
+
risk=risk,
|
|
1083
|
+
evidence=evidence[:8],
|
|
1084
|
+
)
|
|
1085
|
+
]
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
def build_repository_intelligence(
|
|
1089
|
+
root: Path,
|
|
1090
|
+
*,
|
|
1091
|
+
view: RepoView | None = None,
|
|
1092
|
+
text_cache: dict[Path, str] | None = None,
|
|
1093
|
+
artifacts: list[Artifact] | None = None,
|
|
1094
|
+
health_score: int | None = None,
|
|
1095
|
+
git_summary: GitSummary | None = None,
|
|
1096
|
+
) -> RepositoryIntelligence:
|
|
1097
|
+
view = view or collect_repository(root)
|
|
1098
|
+
text_cache = text_cache or build_text_index(view)
|
|
1099
|
+
references = build_reference_map(view, text_cache)
|
|
1100
|
+
git_summary = git_summary or collect_git_summary(root)
|
|
1101
|
+
|
|
1102
|
+
module_index = _build_module_index(view)
|
|
1103
|
+
dependencies: dict[Path, set[Path]] = defaultdict(set)
|
|
1104
|
+
reverse_dependencies: dict[Path, set[Path]] = defaultdict(set)
|
|
1105
|
+
external_packages: dict[Path, set[str]] = defaultdict(set)
|
|
1106
|
+
ownership: dict[Path, str] = {}
|
|
1107
|
+
file_last_active_days: dict[Path, int] = {}
|
|
1108
|
+
class_bases_by_path: dict[Path, dict[str, list[str]]] = {}
|
|
1109
|
+
|
|
1110
|
+
for path in view.files:
|
|
1111
|
+
if path_kind(path) != "text":
|
|
1112
|
+
continue
|
|
1113
|
+
content = text_cache.get(path, "")
|
|
1114
|
+
if path.suffix.lower() == ".py":
|
|
1115
|
+
internal, external, class_bases = _extract_python_dependencies(path, content, root)
|
|
1116
|
+
class_bases_by_path[path] = class_bases
|
|
1117
|
+
for dep in internal:
|
|
1118
|
+
resolved = _resolve_target(path, dep, root, module_index)
|
|
1119
|
+
if resolved and resolved != path:
|
|
1120
|
+
dependencies[path].add(resolved)
|
|
1121
|
+
reverse_dependencies[resolved].add(path)
|
|
1122
|
+
external_packages[path].update(external)
|
|
1123
|
+
elif path.suffix.lower() in {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}:
|
|
1124
|
+
internal, external = _extract_js_dependencies(content)
|
|
1125
|
+
for dep in internal:
|
|
1126
|
+
resolved = _resolve_target(path, dep, root, module_index)
|
|
1127
|
+
if resolved and resolved != path:
|
|
1128
|
+
dependencies[path].add(resolved)
|
|
1129
|
+
reverse_dependencies[resolved].add(path)
|
|
1130
|
+
external_packages[path].update(external)
|
|
1131
|
+
else:
|
|
1132
|
+
for match in PY_IMPORT_RE.finditer(content):
|
|
1133
|
+
target = match.group(1) or match.group(2)
|
|
1134
|
+
if not target:
|
|
1135
|
+
continue
|
|
1136
|
+
resolved = _resolve_target(path, target, root, module_index)
|
|
1137
|
+
if resolved and resolved != path:
|
|
1138
|
+
dependencies[path].add(resolved)
|
|
1139
|
+
reverse_dependencies[resolved].add(path)
|
|
1140
|
+
|
|
1141
|
+
if git_summary.available:
|
|
1142
|
+
ownership[path] = _git_last_author(root, path)
|
|
1143
|
+
file_last_active_days[path] = _git_last_active_days(root, path, use_git=True)
|
|
1144
|
+
else:
|
|
1145
|
+
ownership[path] = "unknown"
|
|
1146
|
+
file_last_active_days[path] = _git_last_active_days(root, path, use_git=False)
|
|
1147
|
+
|
|
1148
|
+
all_artifacts = artifacts or []
|
|
1149
|
+
if not all_artifacts:
|
|
1150
|
+
todos = find_todos(view.files)
|
|
1151
|
+
todo_artifacts = todos_to_artifacts(todos)
|
|
1152
|
+
ancient = find_ancient_files(view.files, build_reference_map(view, text_cache))
|
|
1153
|
+
dead_code = find_dead_code(view.root, view.files, text_cache)
|
|
1154
|
+
duplicates = find_duplicates(view.files, text_cache)
|
|
1155
|
+
monsters = find_monsters(view.files)
|
|
1156
|
+
ruins = find_empty_directories(view.directories, view.files) + find_unused_assets(view.files, text_cache)
|
|
1157
|
+
suspicious = find_suspicious(view.files)
|
|
1158
|
+
all_artifacts = [*todo_artifacts, *ancient, *dead_code, *duplicates, *monsters, *ruins, *suspicious]
|
|
1159
|
+
|
|
1160
|
+
artifact_confidence = {
|
|
1161
|
+
"dead_code": round(sum(item.confidence or 0 for item in all_artifacts if item.kind in {"dead_code_candidate", "unreachable_code"}) / max(1, sum(1 for item in all_artifacts if item.kind in {"dead_code_candidate", "unreachable_code"})), 2),
|
|
1162
|
+
"ancient_file": round(sum(item.confidence or 0 for item in all_artifacts if item.kind == "ancient_file") / max(1, sum(1 for item in all_artifacts if item.kind == "ancient_file")), 2),
|
|
1163
|
+
"duplicate_block": round(sum(item.confidence or 0 for item in all_artifacts if item.kind == "duplicate_block") / max(1, sum(1 for item in all_artifacts if item.kind == "duplicate_block")), 2),
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
health_score = health_score if health_score is not None else 0
|
|
1167
|
+
dependency_hubs = _dependency_hubs(view, dependencies, reverse_dependencies, external_packages)
|
|
1168
|
+
dependency_cycles = _dependency_cycles(dependencies)
|
|
1169
|
+
dependency_chains = _dependency_chains(dependencies, dependency_hubs)
|
|
1170
|
+
genealogy = _family_trees(view, root, reverse_dependencies, dependencies, class_bases_by_path)
|
|
1171
|
+
civilizations = _civilizations(view, root, reverse_dependencies, file_last_active_days)
|
|
1172
|
+
debt_heatmap = _dependency_heatmap(view, text_cache, dependencies, reverse_dependencies, all_artifacts)
|
|
1173
|
+
weaknesses = _structural_weaknesses(view, dependencies, reverse_dependencies, dependency_hubs)
|
|
1174
|
+
architecture = _architecture_classification(view)
|
|
1175
|
+
personality = _classify_personality(
|
|
1176
|
+
health_score=health_score,
|
|
1177
|
+
commit_count=git_summary.commit_count,
|
|
1178
|
+
file_count=len(view.files),
|
|
1179
|
+
monster_count=sum(1 for artifact in all_artifacts if artifact.kind == "monster_file"),
|
|
1180
|
+
duplicate_count=sum(1 for artifact in all_artifacts if artifact.kind == "duplicate_block"),
|
|
1181
|
+
ancient_count=sum(1 for artifact in all_artifacts if artifact.kind == "ancient_file"),
|
|
1182
|
+
dependency_cycles=len(dependency_cycles),
|
|
1183
|
+
external_packages=sum(len(values) for values in external_packages.values()),
|
|
1184
|
+
)
|
|
1185
|
+
forecast = _forecast(
|
|
1186
|
+
health_score=health_score,
|
|
1187
|
+
dependency_cycles=len(dependency_cycles),
|
|
1188
|
+
monster_count=sum(1 for artifact in all_artifacts if artifact.kind == "monster_file"),
|
|
1189
|
+
duplicate_count=sum(1 for artifact in all_artifacts if artifact.kind == "duplicate_block"),
|
|
1190
|
+
ancient_count=sum(1 for artifact in all_artifacts if artifact.kind == "ancient_file"),
|
|
1191
|
+
)
|
|
1192
|
+
dna = _repository_dna(view, text_cache, health_score)
|
|
1193
|
+
timeline_eras = _timeline_eras(root)
|
|
1194
|
+
graph_node_count = len(view.files)
|
|
1195
|
+
graph_edge_count = sum(len(values) for values in dependencies.values())
|
|
1196
|
+
contributors = _contributors(root, ownership, dependencies)
|
|
1197
|
+
quake_simulation = _quake_simulation(view, dependencies, reverse_dependencies)
|
|
1198
|
+
mutations = _mutations(root, view, git_summary)
|
|
1199
|
+
survival = _survival_score(
|
|
1200
|
+
ScanSummary(
|
|
1201
|
+
root=root,
|
|
1202
|
+
scanned_at=datetime.now(timezone.utc),
|
|
1203
|
+
total_files=len(view.files),
|
|
1204
|
+
artifact_count=len(all_artifacts),
|
|
1205
|
+
ancient_count=sum(1 for item in all_artifacts if item.kind == "ancient_file"),
|
|
1206
|
+
todo_count=sum(1 for item in all_artifacts if item.kind == "todo"),
|
|
1207
|
+
duplicate_count=sum(1 for item in all_artifacts if item.kind == "duplicate_block"),
|
|
1208
|
+
dead_code_count=sum(1 for item in all_artifacts if item.kind in {"dead_code_candidate", "unreachable_code"}),
|
|
1209
|
+
monster_count=sum(1 for item in all_artifacts if item.kind == "monster_file"),
|
|
1210
|
+
ruin_count=sum(1 for item in all_artifacts if item.kind in {"empty_directory", "unused_asset"}),
|
|
1211
|
+
suspicious_count=sum(1 for item in all_artifacts if item.kind == "suspicious"),
|
|
1212
|
+
technical_debt_estimate=0.0,
|
|
1213
|
+
health_score=health_score,
|
|
1214
|
+
health_status="",
|
|
1215
|
+
),
|
|
1216
|
+
intelligence=RepositoryIntelligence(
|
|
1217
|
+
root=root,
|
|
1218
|
+
view=view,
|
|
1219
|
+
text_cache=text_cache,
|
|
1220
|
+
references=references,
|
|
1221
|
+
dependencies=dependencies,
|
|
1222
|
+
reverse_dependencies=reverse_dependencies,
|
|
1223
|
+
external_packages=Counter(),
|
|
1224
|
+
dependency_hubs=dependency_hubs,
|
|
1225
|
+
dependency_cycles=dependency_cycles,
|
|
1226
|
+
dependency_chains=dependency_chains,
|
|
1227
|
+
genealogy=genealogy,
|
|
1228
|
+
civilizations=civilizations,
|
|
1229
|
+
debt_heatmap=debt_heatmap,
|
|
1230
|
+
personality=personality,
|
|
1231
|
+
forecast=forecast,
|
|
1232
|
+
dna=dna,
|
|
1233
|
+
timeline_eras=timeline_eras,
|
|
1234
|
+
ownership=ownership,
|
|
1235
|
+
file_last_active_days=file_last_active_days,
|
|
1236
|
+
artifact_confidence=artifact_confidence,
|
|
1237
|
+
graph_node_count=graph_node_count,
|
|
1238
|
+
graph_edge_count=graph_edge_count,
|
|
1239
|
+
weaknesses=weaknesses,
|
|
1240
|
+
architecture=architecture,
|
|
1241
|
+
),
|
|
1242
|
+
contributors=contributors,
|
|
1243
|
+
)
|
|
1244
|
+
knowledge_map = _knowledge_map(view, dependency_hubs, architecture)
|
|
1245
|
+
investigations = _investigation_incidents(view, text_cache, dependencies, reverse_dependencies, git_summary)
|
|
1246
|
+
observations = _observations(
|
|
1247
|
+
RepositoryIntelligence(
|
|
1248
|
+
root=root,
|
|
1249
|
+
view=view,
|
|
1250
|
+
text_cache=text_cache,
|
|
1251
|
+
references=references,
|
|
1252
|
+
dependencies=dependencies,
|
|
1253
|
+
reverse_dependencies=reverse_dependencies,
|
|
1254
|
+
external_packages=Counter(),
|
|
1255
|
+
dependency_hubs=dependency_hubs,
|
|
1256
|
+
dependency_cycles=dependency_cycles,
|
|
1257
|
+
dependency_chains=dependency_chains,
|
|
1258
|
+
genealogy=genealogy,
|
|
1259
|
+
civilizations=civilizations,
|
|
1260
|
+
debt_heatmap=debt_heatmap,
|
|
1261
|
+
personality=personality,
|
|
1262
|
+
forecast=forecast,
|
|
1263
|
+
dna=dna,
|
|
1264
|
+
timeline_eras=timeline_eras,
|
|
1265
|
+
ownership=ownership,
|
|
1266
|
+
file_last_active_days=file_last_active_days,
|
|
1267
|
+
artifact_confidence=artifact_confidence,
|
|
1268
|
+
graph_node_count=graph_node_count,
|
|
1269
|
+
graph_edge_count=graph_edge_count,
|
|
1270
|
+
weaknesses=weaknesses,
|
|
1271
|
+
architecture=architecture,
|
|
1272
|
+
contributors=contributors,
|
|
1273
|
+
survival=survival,
|
|
1274
|
+
),
|
|
1275
|
+
architecture,
|
|
1276
|
+
investigations,
|
|
1277
|
+
weaknesses,
|
|
1278
|
+
civilizations,
|
|
1279
|
+
)
|
|
1280
|
+
|
|
1281
|
+
external_package_counts = Counter()
|
|
1282
|
+
for packages in external_packages.values():
|
|
1283
|
+
external_package_counts.update(packages)
|
|
1284
|
+
|
|
1285
|
+
return RepositoryIntelligence(
|
|
1286
|
+
root=root,
|
|
1287
|
+
view=view,
|
|
1288
|
+
text_cache=text_cache,
|
|
1289
|
+
references=references,
|
|
1290
|
+
dependencies=dependencies,
|
|
1291
|
+
reverse_dependencies=reverse_dependencies,
|
|
1292
|
+
external_packages=external_package_counts,
|
|
1293
|
+
dependency_hubs=dependency_hubs,
|
|
1294
|
+
dependency_cycles=dependency_cycles,
|
|
1295
|
+
dependency_chains=dependency_chains,
|
|
1296
|
+
genealogy=genealogy,
|
|
1297
|
+
civilizations=civilizations,
|
|
1298
|
+
debt_heatmap=debt_heatmap,
|
|
1299
|
+
personality=personality,
|
|
1300
|
+
forecast=forecast,
|
|
1301
|
+
dna=dna,
|
|
1302
|
+
timeline_eras=timeline_eras,
|
|
1303
|
+
ownership=ownership,
|
|
1304
|
+
file_last_active_days=file_last_active_days,
|
|
1305
|
+
artifact_confidence=artifact_confidence,
|
|
1306
|
+
graph_node_count=graph_node_count,
|
|
1307
|
+
graph_edge_count=graph_edge_count,
|
|
1308
|
+
incidents=investigations,
|
|
1309
|
+
weaknesses=weaknesses,
|
|
1310
|
+
quake_simulation=quake_simulation,
|
|
1311
|
+
architecture=architecture,
|
|
1312
|
+
contributors=contributors,
|
|
1313
|
+
mutations=mutations,
|
|
1314
|
+
knowledge_map=knowledge_map,
|
|
1315
|
+
containment_zones=_containment_zones(view, text_cache, dependencies, reverse_dependencies, all_artifacts),
|
|
1316
|
+
survival=survival,
|
|
1317
|
+
observations=observations,
|
|
1318
|
+
)
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def analyze_repository(root: Path) -> RepositoryAnalysis:
|
|
1322
|
+
view = collect_repository(root)
|
|
1323
|
+
text_cache = build_text_index(view)
|
|
1324
|
+
references = build_reference_map(view, text_cache)
|
|
1325
|
+
|
|
1326
|
+
todos = find_todos(view.files)
|
|
1327
|
+
todo_artifacts = todos_to_artifacts(todos)
|
|
1328
|
+
ancient = find_ancient_files(view.files, references)
|
|
1329
|
+
dead_code = find_dead_code(view.root, view.files, text_cache)
|
|
1330
|
+
duplicates = find_duplicates(view.files, text_cache)
|
|
1331
|
+
monsters = find_monsters(view.files)
|
|
1332
|
+
ruins = find_empty_directories(view.directories, view.files) + find_unused_assets(view.files, text_cache)
|
|
1333
|
+
suspicious = find_suspicious(view.files)
|
|
1334
|
+
|
|
1335
|
+
artifacts: list[Artifact] = []
|
|
1336
|
+
artifacts.extend(todo_artifacts)
|
|
1337
|
+
artifacts.extend(ancient)
|
|
1338
|
+
artifacts.extend(dead_code)
|
|
1339
|
+
artifacts.extend(duplicates)
|
|
1340
|
+
artifacts.extend(monsters)
|
|
1341
|
+
artifacts.extend(ruins)
|
|
1342
|
+
artifacts.extend(suspicious)
|
|
1343
|
+
|
|
1344
|
+
health = calculate_health(
|
|
1345
|
+
total_files=len(view.files),
|
|
1346
|
+
dead_code_count=len(dead_code),
|
|
1347
|
+
duplicate_count=len(duplicates),
|
|
1348
|
+
ancient_count=len(ancient),
|
|
1349
|
+
todo_count=len(todo_artifacts),
|
|
1350
|
+
monster_count=len(monsters),
|
|
1351
|
+
ruin_count=len(ruins),
|
|
1352
|
+
suspicious_count=len(suspicious),
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1355
|
+
warnings = list(health.warnings)
|
|
1356
|
+
if not artifacts:
|
|
1357
|
+
warnings.append("No major artifacts detected")
|
|
1358
|
+
if not view.files:
|
|
1359
|
+
warnings.append("Repository appears empty")
|
|
1360
|
+
|
|
1361
|
+
git_summary = collect_git_summary(root)
|
|
1362
|
+
intelligence = build_repository_intelligence(root, view=view, text_cache=text_cache, artifacts=artifacts, health_score=health.score, git_summary=git_summary)
|
|
1363
|
+
from ..analyzers import maintenance
|
|
1364
|
+
|
|
1365
|
+
remediation_findings = maintenance.remediation_findings(RepositoryAnalysis(summary=ScanSummary(
|
|
1366
|
+
root=view.root,
|
|
1367
|
+
scanned_at=datetime.now(timezone.utc),
|
|
1368
|
+
total_files=len(view.files),
|
|
1369
|
+
artifact_count=len(artifacts),
|
|
1370
|
+
ancient_count=len(ancient),
|
|
1371
|
+
todo_count=len(todo_artifacts),
|
|
1372
|
+
duplicate_count=len(duplicates),
|
|
1373
|
+
dead_code_count=len(dead_code),
|
|
1374
|
+
monster_count=len(monsters),
|
|
1375
|
+
ruin_count=len(ruins),
|
|
1376
|
+
suspicious_count=len(suspicious),
|
|
1377
|
+
technical_debt_estimate=health.debt_estimate,
|
|
1378
|
+
health_score=health.score,
|
|
1379
|
+
health_status=health.status,
|
|
1380
|
+
), intelligence=intelligence))
|
|
1381
|
+
summary = ScanSummary(
|
|
1382
|
+
root=view.root,
|
|
1383
|
+
scanned_at=datetime.now(timezone.utc),
|
|
1384
|
+
total_files=len(view.files),
|
|
1385
|
+
artifact_count=len(artifacts),
|
|
1386
|
+
ancient_count=len(ancient),
|
|
1387
|
+
todo_count=len(todo_artifacts),
|
|
1388
|
+
duplicate_count=len(duplicates),
|
|
1389
|
+
dead_code_count=len(dead_code),
|
|
1390
|
+
monster_count=len(monsters),
|
|
1391
|
+
ruin_count=len(ruins),
|
|
1392
|
+
suspicious_count=len(suspicious),
|
|
1393
|
+
technical_debt_estimate=health.debt_estimate,
|
|
1394
|
+
health_score=health.score,
|
|
1395
|
+
health_status=health.status,
|
|
1396
|
+
warnings=warnings,
|
|
1397
|
+
artifacts=artifacts,
|
|
1398
|
+
timeline={
|
|
1399
|
+
"available": bool(git_summary.available),
|
|
1400
|
+
"commit_count": git_summary.commit_count,
|
|
1401
|
+
"repository_age_days": git_summary.repository_age_days,
|
|
1402
|
+
"repository_age_years": round(git_summary.repository_age_days / 365, 1) if git_summary.repository_age_days else 0,
|
|
1403
|
+
"first_commit": git_summary.first_commit.isoformat() if git_summary.first_commit else None,
|
|
1404
|
+
"last_commit": git_summary.last_commit.isoformat() if git_summary.last_commit else None,
|
|
1405
|
+
"most_modified_files": git_summary.most_modified_files,
|
|
1406
|
+
"eras": [{"year": era.year, "title": era.title, "activity": era.activity} for era in intelligence.timeline_eras],
|
|
1407
|
+
},
|
|
1408
|
+
extra={
|
|
1409
|
+
"similarity_pairs": similarity_report(view.files, text_cache),
|
|
1410
|
+
"dna": {"signature": intelligence.dna.signature, "confidence": intelligence.dna.confidence},
|
|
1411
|
+
"personality": {"type": intelligence.personality.type, "traits": intelligence.personality.traits, "risk": intelligence.personality.risk},
|
|
1412
|
+
"artifact_confidence": intelligence.artifact_confidence,
|
|
1413
|
+
"architecture": {
|
|
1414
|
+
"primary": intelligence.architecture.primary if intelligence.architecture else "Prototype",
|
|
1415
|
+
"secondary": intelligence.architecture.secondary if intelligence.architecture else "Layered",
|
|
1416
|
+
"confidence": intelligence.architecture.confidence if intelligence.architecture else 0.68,
|
|
1417
|
+
},
|
|
1418
|
+
"forecast": {
|
|
1419
|
+
"current_health": intelligence.forecast.current_health,
|
|
1420
|
+
"projected_6_months": intelligence.forecast.projected_6_months,
|
|
1421
|
+
"projected_12_months": intelligence.forecast.projected_12_months,
|
|
1422
|
+
"reason": intelligence.forecast.reason,
|
|
1423
|
+
},
|
|
1424
|
+
"investigation": [
|
|
1425
|
+
{
|
|
1426
|
+
"incident": incident.incident,
|
|
1427
|
+
"date": incident.date,
|
|
1428
|
+
"impact": incident.impact,
|
|
1429
|
+
"outcome": incident.outcome,
|
|
1430
|
+
"risk": incident.risk,
|
|
1431
|
+
"evidence": incident.evidence,
|
|
1432
|
+
}
|
|
1433
|
+
for incident in intelligence.incidents
|
|
1434
|
+
],
|
|
1435
|
+
"weaknesses": [
|
|
1436
|
+
{
|
|
1437
|
+
"path": str(item.path),
|
|
1438
|
+
"referenced_by": item.referenced_by,
|
|
1439
|
+
"failure_impact": item.failure_impact,
|
|
1440
|
+
"recovery_difficulty": item.recovery_difficulty,
|
|
1441
|
+
"confidence": item.confidence,
|
|
1442
|
+
}
|
|
1443
|
+
for item in intelligence.weaknesses
|
|
1444
|
+
],
|
|
1445
|
+
"quake": None
|
|
1446
|
+
if intelligence.quake_simulation is None
|
|
1447
|
+
else {
|
|
1448
|
+
"target": str(intelligence.quake_simulation.target),
|
|
1449
|
+
"projected_damage": intelligence.quake_simulation.projected_damage,
|
|
1450
|
+
"subsystems_lost": intelligence.quake_simulation.subsystems_lost,
|
|
1451
|
+
"severity": intelligence.quake_simulation.severity,
|
|
1452
|
+
"affected_files": [str(path) for path in intelligence.quake_simulation.affected_files],
|
|
1453
|
+
},
|
|
1454
|
+
"dependency_hubs": [
|
|
1455
|
+
{
|
|
1456
|
+
"path": str(hub.path),
|
|
1457
|
+
"referenced_by": hub.referenced_by,
|
|
1458
|
+
"depends_on": hub.depends_on,
|
|
1459
|
+
"external_packages": hub.external_packages,
|
|
1460
|
+
"dependency_risk": hub.dependency_risk,
|
|
1461
|
+
"failure_impact": hub.failure_impact,
|
|
1462
|
+
"confidence": hub.confidence,
|
|
1463
|
+
}
|
|
1464
|
+
for hub in intelligence.dependency_hubs
|
|
1465
|
+
],
|
|
1466
|
+
"civilizations": [
|
|
1467
|
+
{
|
|
1468
|
+
"name": civ.name,
|
|
1469
|
+
"files": [str(path) for path in civ.files],
|
|
1470
|
+
"referenced": civ.referenced,
|
|
1471
|
+
"last_active_days": civ.last_active_days,
|
|
1472
|
+
"status": civ.status,
|
|
1473
|
+
"confidence": civ.confidence,
|
|
1474
|
+
}
|
|
1475
|
+
for civ in intelligence.civilizations
|
|
1476
|
+
],
|
|
1477
|
+
"contributors": [
|
|
1478
|
+
{
|
|
1479
|
+
"area": item.area,
|
|
1480
|
+
"owner": item.owner,
|
|
1481
|
+
"maintenance_owner": item.maintenance_owner,
|
|
1482
|
+
"abandoned_owner": item.abandoned_owner,
|
|
1483
|
+
}
|
|
1484
|
+
for item in intelligence.contributors
|
|
1485
|
+
],
|
|
1486
|
+
"mutations": [
|
|
1487
|
+
{
|
|
1488
|
+
"project_type": item.project_type,
|
|
1489
|
+
"became": item.became,
|
|
1490
|
+
"date": item.date,
|
|
1491
|
+
"impact": item.impact,
|
|
1492
|
+
}
|
|
1493
|
+
for item in intelligence.mutations
|
|
1494
|
+
],
|
|
1495
|
+
"knowledge_map": {
|
|
1496
|
+
"core": intelligence.knowledge_map.core,
|
|
1497
|
+
"dependency_graph": intelligence.knowledge_map.dependency_graph,
|
|
1498
|
+
"route_graph": intelligence.knowledge_map.route_graph,
|
|
1499
|
+
"service_graph": intelligence.knowledge_map.service_graph,
|
|
1500
|
+
"architecture_graph": intelligence.knowledge_map.architecture_graph,
|
|
1501
|
+
},
|
|
1502
|
+
"containment_zones": [
|
|
1503
|
+
{
|
|
1504
|
+
"location": item.location,
|
|
1505
|
+
"complexity": item.complexity,
|
|
1506
|
+
"spread_rate": item.spread_rate,
|
|
1507
|
+
"recommendation": item.recommendation,
|
|
1508
|
+
}
|
|
1509
|
+
for item in intelligence.containment_zones
|
|
1510
|
+
],
|
|
1511
|
+
"survival": {
|
|
1512
|
+
"score": intelligence.survival.score if intelligence.survival else 0,
|
|
1513
|
+
"risk": intelligence.survival.risk if intelligence.survival else "Unknown",
|
|
1514
|
+
"single_point_failure": intelligence.survival.single_point_failure if intelligence.survival else "Unknown",
|
|
1515
|
+
"maintainability": intelligence.survival.maintainability if intelligence.survival else 0,
|
|
1516
|
+
"recoverability": intelligence.survival.recoverability if intelligence.survival else 0,
|
|
1517
|
+
"onboarding_difficulty": intelligence.survival.onboarding_difficulty if intelligence.survival else 0,
|
|
1518
|
+
"bus_factor": intelligence.survival.bus_factor if intelligence.survival else 0,
|
|
1519
|
+
},
|
|
1520
|
+
"observations": [
|
|
1521
|
+
{
|
|
1522
|
+
"observation": item.observation,
|
|
1523
|
+
"evidence": item.evidence,
|
|
1524
|
+
}
|
|
1525
|
+
for item in intelligence.observations
|
|
1526
|
+
],
|
|
1527
|
+
"debt_heatmap": [
|
|
1528
|
+
{
|
|
1529
|
+
"bucket": bucket.bucket,
|
|
1530
|
+
"score": bucket.score,
|
|
1531
|
+
"label": bucket.label,
|
|
1532
|
+
"files": bucket.files,
|
|
1533
|
+
}
|
|
1534
|
+
for bucket in intelligence.debt_heatmap
|
|
1535
|
+
],
|
|
1536
|
+
"graph": {
|
|
1537
|
+
"nodes": intelligence.graph_node_count,
|
|
1538
|
+
"edges": intelligence.graph_edge_count,
|
|
1539
|
+
},
|
|
1540
|
+
"remediation": [
|
|
1541
|
+
{
|
|
1542
|
+
"problem": item.problem,
|
|
1543
|
+
"evidence": item.evidence,
|
|
1544
|
+
"impact": item.impact,
|
|
1545
|
+
"confidence": item.confidence,
|
|
1546
|
+
"recommended_fix": item.recommended_fix,
|
|
1547
|
+
"estimated_effort": item.estimated_effort,
|
|
1548
|
+
"risk_level": item.risk_level,
|
|
1549
|
+
"root_cause": item.root_cause,
|
|
1550
|
+
"likely_consequences": item.likely_consequences,
|
|
1551
|
+
"alternative_solution": item.alternative_solution,
|
|
1552
|
+
"implementation_difficulty": item.implementation_difficulty,
|
|
1553
|
+
"location": item.location,
|
|
1554
|
+
}
|
|
1555
|
+
for item in remediation_findings[:100]
|
|
1556
|
+
],
|
|
1557
|
+
},
|
|
1558
|
+
)
|
|
1559
|
+
return RepositoryAnalysis(summary=summary, intelligence=intelligence)
|