devarch 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devarch/__init__.py +4 -0
- devarch/__main__.py +4 -0
- devarch/analyzers/__init__.py +2 -0
- devarch/analyzers/ancient.py +48 -0
- devarch/analyzers/dead_code.py +92 -0
- devarch/analyzers/duplicates.py +101 -0
- devarch/analyzers/health.py +60 -0
- devarch/analyzers/maintenance.py +902 -0
- devarch/analyzers/monsters.py +62 -0
- devarch/analyzers/recovery.py +338 -0
- devarch/analyzers/ruins.py +45 -0
- devarch/analyzers/suspicious.py +39 -0
- devarch/analyzers/todos.py +60 -0
- devarch/cli/__init__.py +2 -0
- devarch/cli/main.py +1708 -0
- devarch/models.py +43 -0
- devarch/plugins.py +29 -0
- devarch/reports/__init__.py +2 -0
- devarch/reports/exporters.py +274 -0
- devarch/scanner/__init__.py +2 -0
- devarch/scanner/core.py +15 -0
- devarch/scanner/discovery.py +84 -0
- devarch/scanner/intelligence.py +1559 -0
- devarch/utils/__init__.py +2 -0
- devarch/utils/fs.py +165 -0
- devarch/utils/git_info.py +64 -0
- devarch/utils/rich_ui.py +107 -0
- devarch/version.py +3 -0
- devarch-0.2.0.dist-info/METADATA +317 -0
- devarch-0.2.0.dist-info/RECORD +33 -0
- devarch-0.2.0.dist-info/WHEEL +4 -0
- devarch-0.2.0.dist-info/entry_points.txt +3 -0
- devarch-0.2.0.dist-info/licenses/LICENSE +22 -0
devarch/models.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(slots=True)
|
|
10
|
+
class Artifact:
|
|
11
|
+
path: Path
|
|
12
|
+
kind: str
|
|
13
|
+
risk: str
|
|
14
|
+
score: float = 0.0
|
|
15
|
+
age_days: int | None = None
|
|
16
|
+
size_bytes: int | None = None
|
|
17
|
+
line_number: int | None = None
|
|
18
|
+
detail: str = ""
|
|
19
|
+
confidence: float | None = None
|
|
20
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(slots=True)
|
|
24
|
+
class ScanSummary:
|
|
25
|
+
root: Path
|
|
26
|
+
scanned_at: datetime
|
|
27
|
+
total_files: int
|
|
28
|
+
artifact_count: int
|
|
29
|
+
ancient_count: int
|
|
30
|
+
todo_count: int
|
|
31
|
+
duplicate_count: int
|
|
32
|
+
dead_code_count: int
|
|
33
|
+
monster_count: int
|
|
34
|
+
ruin_count: int
|
|
35
|
+
suspicious_count: int
|
|
36
|
+
technical_debt_estimate: float
|
|
37
|
+
health_score: int
|
|
38
|
+
health_status: str
|
|
39
|
+
warnings: list[str] = field(default_factory=list)
|
|
40
|
+
artifacts: list[Artifact] = field(default_factory=list)
|
|
41
|
+
timeline: dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
43
|
+
|
devarch/plugins.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from importlib import metadata
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Plugin(Protocol):
|
|
9
|
+
name: str
|
|
10
|
+
|
|
11
|
+
def register(self) -> None: ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class PluginInfo:
|
|
16
|
+
name: str
|
|
17
|
+
module: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def discover_plugins() -> list[PluginInfo]:
|
|
21
|
+
infos: list[PluginInfo] = []
|
|
22
|
+
try:
|
|
23
|
+
entries = metadata.entry_points(group="devarch.plugins")
|
|
24
|
+
except TypeError:
|
|
25
|
+
entries = metadata.entry_points().get("devarch.plugins", []) # type: ignore[assignment]
|
|
26
|
+
for entry in entries:
|
|
27
|
+
infos.append(PluginInfo(name=entry.name, module=entry.value))
|
|
28
|
+
return infos
|
|
29
|
+
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from ..models import ScanSummary
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _confidence_text(value: float | None) -> str:
|
|
10
|
+
return f"{value:.0%}" if value is not None else "n/a"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_maintenance(root: Path) -> dict[str, object]:
|
|
14
|
+
state_dir = root / ".devarch"
|
|
15
|
+
baseline_path = state_dir / "baseline.json"
|
|
16
|
+
history_path = state_dir / "history.jsonl"
|
|
17
|
+
data: dict[str, object] = {"baseline": None, "history_entries": 0}
|
|
18
|
+
if baseline_path.exists():
|
|
19
|
+
try:
|
|
20
|
+
data["baseline"] = json.loads(baseline_path.read_text(encoding="utf-8"))
|
|
21
|
+
except json.JSONDecodeError:
|
|
22
|
+
data["baseline"] = None
|
|
23
|
+
if history_path.exists():
|
|
24
|
+
data["history_entries"] = sum(1 for line in history_path.read_text(encoding="utf-8").splitlines() if line.strip())
|
|
25
|
+
return data
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def summary_to_dict(summary: ScanSummary) -> dict[str, object]:
|
|
29
|
+
return {
|
|
30
|
+
"root": str(summary.root),
|
|
31
|
+
"scanned_at": summary.scanned_at.isoformat(),
|
|
32
|
+
"total_files": summary.total_files,
|
|
33
|
+
"artifact_count": summary.artifact_count,
|
|
34
|
+
"ancient_count": summary.ancient_count,
|
|
35
|
+
"todo_count": summary.todo_count,
|
|
36
|
+
"duplicate_count": summary.duplicate_count,
|
|
37
|
+
"dead_code_count": summary.dead_code_count,
|
|
38
|
+
"monster_count": summary.monster_count,
|
|
39
|
+
"ruin_count": summary.ruin_count,
|
|
40
|
+
"suspicious_count": summary.suspicious_count,
|
|
41
|
+
"technical_debt_estimate": summary.technical_debt_estimate,
|
|
42
|
+
"health_score": summary.health_score,
|
|
43
|
+
"health_status": summary.health_status,
|
|
44
|
+
"warnings": summary.warnings,
|
|
45
|
+
"timeline": summary.timeline,
|
|
46
|
+
"extra": summary.extra,
|
|
47
|
+
"maintenance": _load_maintenance(summary.root),
|
|
48
|
+
"artifacts": [
|
|
49
|
+
{
|
|
50
|
+
"path": str(item.path),
|
|
51
|
+
"kind": item.kind,
|
|
52
|
+
"risk": item.risk,
|
|
53
|
+
"score": item.score,
|
|
54
|
+
"age_days": item.age_days,
|
|
55
|
+
"size_bytes": item.size_bytes,
|
|
56
|
+
"line_number": item.line_number,
|
|
57
|
+
"detail": item.detail,
|
|
58
|
+
"confidence": item.confidence,
|
|
59
|
+
"metadata": item.metadata,
|
|
60
|
+
}
|
|
61
|
+
for item in summary.artifacts
|
|
62
|
+
],
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def export_json(summary: ScanSummary, destination: Path) -> Path:
|
|
67
|
+
destination.write_text(json.dumps(summary_to_dict(summary), indent=2), encoding="utf-8")
|
|
68
|
+
return destination
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def export_markdown(summary: ScanSummary, destination: Path) -> Path:
|
|
72
|
+
data = summary_to_dict(summary)
|
|
73
|
+
dna = ", ".join(summary.extra.get("dna", {}).get("signature", [])) or "n/a"
|
|
74
|
+
personality = summary.extra.get("personality", {}).get("type", "n/a")
|
|
75
|
+
architecture = summary.extra.get("architecture", {})
|
|
76
|
+
survival = summary.extra.get("survival", {})
|
|
77
|
+
forecast = summary.extra.get("forecast", {})
|
|
78
|
+
maintenance = data.get("maintenance", {})
|
|
79
|
+
baseline = maintenance.get("baseline") or {}
|
|
80
|
+
lines = [
|
|
81
|
+
"# Dev Archaeologist Excavation Report",
|
|
82
|
+
"",
|
|
83
|
+
f"- Root: `{data['root']}`",
|
|
84
|
+
f"- Scanned at: `{data['scanned_at']}`",
|
|
85
|
+
f"- Health: **{summary.health_score}/100** ({summary.health_status})",
|
|
86
|
+
f"- Technical debt estimate: `{summary.technical_debt_estimate:.1f}`",
|
|
87
|
+
f"- DNA signature: `{dna}`",
|
|
88
|
+
f"- Personality: `{personality}`",
|
|
89
|
+
f"- Architecture: `{architecture.get('primary', 'n/a')} / {architecture.get('secondary', 'n/a')}`",
|
|
90
|
+
f"- Survival score: `{survival.get('score', 'n/a')}`",
|
|
91
|
+
f"- Forecast 6 months: `{forecast.get('projected_6_months', 'n/a')}`",
|
|
92
|
+
f"- Forecast 12 months: `{forecast.get('projected_12_months', 'n/a')}`",
|
|
93
|
+
f"- Baseline health: `{baseline.get('health_score', 'n/a')}`",
|
|
94
|
+
f"- Maintenance history entries: `{maintenance.get('history_entries', 0)}`",
|
|
95
|
+
"",
|
|
96
|
+
"## Metrics",
|
|
97
|
+
"",
|
|
98
|
+
f"- Total files: {summary.total_files}",
|
|
99
|
+
f"- Artifacts: {summary.artifact_count}",
|
|
100
|
+
f"- Ancient files: {summary.ancient_count}",
|
|
101
|
+
f"- TODOs: {summary.todo_count}",
|
|
102
|
+
f"- Duplicates: {summary.duplicate_count}",
|
|
103
|
+
f"- Dead code candidates: {summary.dead_code_count}",
|
|
104
|
+
f"- Monster files: {summary.monster_count}",
|
|
105
|
+
f"- Ruins: {summary.ruin_count}",
|
|
106
|
+
f"- Suspicious files: {summary.suspicious_count}",
|
|
107
|
+
"",
|
|
108
|
+
"## Warnings",
|
|
109
|
+
"",
|
|
110
|
+
]
|
|
111
|
+
lines.extend(f"- {warning}" for warning in (summary.warnings or ["None"]))
|
|
112
|
+
lines.extend(["", "## Intelligence", ""])
|
|
113
|
+
lines.append(f"- Dependency hubs: {len(summary.extra.get('dependency_hubs', []))}")
|
|
114
|
+
lines.append(f"- Civilizations: {len(summary.extra.get('civilizations', []))}")
|
|
115
|
+
lines.append(f"- Heatmap buckets: {len(summary.extra.get('debt_heatmap', []))}")
|
|
116
|
+
lines.append(f"- Structural weaknesses: {len(summary.extra.get('weaknesses', []))}")
|
|
117
|
+
lines.append(f"- Investigations: {len(summary.extra.get('investigation', []))}")
|
|
118
|
+
lines.append(f"- Containment zones: {len(summary.extra.get('containment_zones', []))}")
|
|
119
|
+
lines.append(f"- Remediation findings: {len(summary.extra.get('remediation', []))}")
|
|
120
|
+
lines.extend(["", "## Artifacts", ""])
|
|
121
|
+
for artifact in summary.artifacts:
|
|
122
|
+
lines.extend(
|
|
123
|
+
[
|
|
124
|
+
f"### {artifact.kind}",
|
|
125
|
+
f"- Path: `{artifact.path}`",
|
|
126
|
+
f"- Risk: {artifact.risk}",
|
|
127
|
+
f"- Confidence: {_confidence_text(artifact.confidence)}",
|
|
128
|
+
f"- Detail: {artifact.detail or 'n/a'}",
|
|
129
|
+
]
|
|
130
|
+
)
|
|
131
|
+
if artifact.age_days is not None:
|
|
132
|
+
lines.append(f"- Age: {artifact.age_days} days")
|
|
133
|
+
if artifact.line_number is not None:
|
|
134
|
+
lines.append(f"- Line: {artifact.line_number}")
|
|
135
|
+
lines.append("")
|
|
136
|
+
destination.write_text("\n".join(lines), encoding="utf-8")
|
|
137
|
+
return destination
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def export_html(summary: ScanSummary, destination: Path) -> Path:
|
|
141
|
+
rows = "\n".join(
|
|
142
|
+
f"<tr><td>{artifact.path}</td><td>{artifact.kind}</td><td>{artifact.risk}</td><td>{_confidence_text(artifact.confidence)}</td><td>{artifact.detail}</td></tr>"
|
|
143
|
+
for artifact in summary.artifacts
|
|
144
|
+
)
|
|
145
|
+
warnings = "".join(f"<li>{warning}</li>" for warning in (summary.warnings or ["None"]))
|
|
146
|
+
dna = ", ".join(summary.extra.get("dna", {}).get("signature", [])) or "n/a"
|
|
147
|
+
personality = summary.extra.get("personality", {}).get("type", "n/a")
|
|
148
|
+
architecture = summary.extra.get("architecture", {})
|
|
149
|
+
survival = summary.extra.get("survival", {})
|
|
150
|
+
forecast = summary.extra.get("forecast", {})
|
|
151
|
+
maintenance = summary_to_dict(summary).get("maintenance", {})
|
|
152
|
+
baseline = maintenance.get("baseline") or {}
|
|
153
|
+
heatmap = "".join(
|
|
154
|
+
f"<tr><td>{item['bucket']}</td><td>{item['score']}</td><td>{item['label']}</td><td>{item['files']}</td></tr>"
|
|
155
|
+
for item in summary.extra.get("debt_heatmap", [])
|
|
156
|
+
)
|
|
157
|
+
html = f"""<!doctype html>
|
|
158
|
+
<html lang="en">
|
|
159
|
+
<head>
|
|
160
|
+
<meta charset="utf-8">
|
|
161
|
+
<title>Dev Archaeologist Report</title>
|
|
162
|
+
<style>
|
|
163
|
+
body {{ font-family: system-ui, sans-serif; margin: 2rem; background: #0b1020; color: #e5eefc; }}
|
|
164
|
+
.card {{ background: #111a33; border: 1px solid #22305f; border-radius: 16px; padding: 1rem 1.25rem; margin-bottom: 1rem; }}
|
|
165
|
+
table {{ width: 100%; border-collapse: collapse; }}
|
|
166
|
+
th, td {{ border-bottom: 1px solid #22305f; padding: .55rem; text-align: left; vertical-align: top; }}
|
|
167
|
+
th {{ color: #9dd3ff; }}
|
|
168
|
+
.score {{ font-size: 2rem; font-weight: 700; }}
|
|
169
|
+
</style>
|
|
170
|
+
</head>
|
|
171
|
+
<body>
|
|
172
|
+
<div class="card"><div class="score">{summary.health_score}/100</div><div>{summary.health_status}</div></div>
|
|
173
|
+
<div class="card"><strong>DNA</strong><div>{dna}</div></div>
|
|
174
|
+
<div class="card"><strong>Personality</strong><div>{personality}</div></div>
|
|
175
|
+
<div class="card"><strong>Architecture</strong><div>{architecture.get('primary', 'n/a')} / {architecture.get('secondary', 'n/a')}</div></div>
|
|
176
|
+
<div class="card"><strong>Survival</strong><div>{survival.get('score', 'n/a')}/100</div></div>
|
|
177
|
+
<div class="card"><strong>Forecast</strong><div>6 months: {forecast.get('projected_6_months', 'n/a')} | 12 months: {forecast.get('projected_12_months', 'n/a')}</div></div>
|
|
178
|
+
<div class="card"><strong>Baseline</strong><div>{baseline.get('health_score', 'n/a')} | History entries: {maintenance.get('history_entries', 0)}</div></div>
|
|
179
|
+
<div class="card"><strong>Remediation</strong><div>{len(summary.extra.get('remediation', []))} findings</div></div>
|
|
180
|
+
<div class="card"><strong>Warnings</strong><ul>{warnings}</ul></div>
|
|
181
|
+
<div class="card"><strong>Heatmap</strong><table><thead><tr><th>Bucket</th><th>Score</th><th>Label</th><th>Files</th></tr></thead><tbody>{heatmap}</tbody></table></div>
|
|
182
|
+
<div class="card"><strong>Artifacts</strong><table><thead><tr><th>Path</th><th>Kind</th><th>Risk</th><th>Confidence</th><th>Detail</th></tr></thead><tbody>{rows}</tbody></table></div>
|
|
183
|
+
</body>
|
|
184
|
+
</html>"""
|
|
185
|
+
destination.write_text(html, encoding="utf-8")
|
|
186
|
+
return destination
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def export_pdf(summary: ScanSummary, destination: Path) -> Path:
|
|
190
|
+
def escape_pdf(text: str) -> str:
|
|
191
|
+
return text.replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)")
|
|
192
|
+
|
|
193
|
+
def build_page(lines: list[str], top: int = 760) -> str:
|
|
194
|
+
commands = ["BT", "/F1 11 Tf", f"40 {top} Td"]
|
|
195
|
+
first = True
|
|
196
|
+
for line in lines:
|
|
197
|
+
safe = escape_pdf(line[:120])
|
|
198
|
+
if first:
|
|
199
|
+
commands.append(f"({safe}) Tj")
|
|
200
|
+
first = False
|
|
201
|
+
else:
|
|
202
|
+
commands.append(f"0 -14 Td ({safe}) Tj")
|
|
203
|
+
commands.append("ET")
|
|
204
|
+
return "\n".join(commands)
|
|
205
|
+
|
|
206
|
+
chunks: list[list[str]] = []
|
|
207
|
+
header = [
|
|
208
|
+
"Dev Archaeologist Excavation Report",
|
|
209
|
+
f"Root: {summary.root}",
|
|
210
|
+
f"Health: {summary.health_score}/100 ({summary.health_status})",
|
|
211
|
+
f"Debt estimate: {summary.technical_debt_estimate:.1f}",
|
|
212
|
+
f"DNA: {', '.join(summary.extra.get('dna', {}).get('signature', [])) or 'n/a'}",
|
|
213
|
+
f"Personality: {summary.extra.get('personality', {}).get('type', 'n/a')}",
|
|
214
|
+
f"Architecture: {summary.extra.get('architecture', {}).get('primary', 'n/a')} / {summary.extra.get('architecture', {}).get('secondary', 'n/a')}",
|
|
215
|
+
f"Survival: {summary.extra.get('survival', {}).get('score', 'n/a')}/100",
|
|
216
|
+
f"Forecast 12 months: {summary.extra.get('forecast', {}).get('projected_12_months', 'n/a')}",
|
|
217
|
+
f"Baseline health: {(summary_to_dict(summary).get('maintenance', {}).get('baseline') or {}).get('health_score', 'n/a')}",
|
|
218
|
+
f"Remediation findings: {len(summary.extra.get('remediation', []))}",
|
|
219
|
+
"Warnings:",
|
|
220
|
+
]
|
|
221
|
+
body = [f"- {warning}" for warning in (summary.warnings or ["None"])]
|
|
222
|
+
body.extend(["Artifacts:"])
|
|
223
|
+
body.extend(
|
|
224
|
+
f"- {artifact.kind}: {artifact.path} ({artifact.risk}, {_confidence_text(artifact.confidence)})"
|
|
225
|
+
for artifact in summary.artifacts[:50]
|
|
226
|
+
)
|
|
227
|
+
lines = header + body
|
|
228
|
+
while lines:
|
|
229
|
+
chunks.append(lines[:40])
|
|
230
|
+
lines = lines[40:]
|
|
231
|
+
|
|
232
|
+
objects: list[bytes] = []
|
|
233
|
+
page_objects: list[int] = []
|
|
234
|
+
|
|
235
|
+
def add_object(body: str) -> int:
|
|
236
|
+
objects.append(body.encode("utf-8"))
|
|
237
|
+
return len(objects)
|
|
238
|
+
|
|
239
|
+
catalog_id = add_object("<< /Type /Catalog /Pages 2 0 R >>")
|
|
240
|
+
pages_id = add_object("<< /Type /Pages /Kids [] /Count 0 >>")
|
|
241
|
+
font_id = add_object("<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>")
|
|
242
|
+
|
|
243
|
+
for chunk in chunks:
|
|
244
|
+
content = build_page(chunk)
|
|
245
|
+
content_id = add_object(f"<< /Length {len(content.encode('utf-8'))} >>\nstream\n{content}\nendstream")
|
|
246
|
+
page_id = add_object(
|
|
247
|
+
f"<< /Type /Page /Parent {pages_id} 0 R /MediaBox [0 0 612 792] /Contents {content_id} 0 R /Resources << /Font << /F1 {font_id} 0 R >> >> >>"
|
|
248
|
+
)
|
|
249
|
+
page_objects.append(page_id)
|
|
250
|
+
|
|
251
|
+
kids = " ".join(f"{page} 0 R" for page in page_objects)
|
|
252
|
+
objects[pages_id - 1] = f"<< /Type /Pages /Kids [{kids}] /Count {len(page_objects)} >>".encode("utf-8")
|
|
253
|
+
objects[catalog_id - 1] = b"<< /Type /Catalog /Pages 2 0 R >>"
|
|
254
|
+
|
|
255
|
+
pdf_bytes = bytearray(b"%PDF-1.4\n")
|
|
256
|
+
offsets = [0]
|
|
257
|
+
for index, body in enumerate(objects, start=1):
|
|
258
|
+
offsets.append(len(pdf_bytes))
|
|
259
|
+
pdf_bytes.extend(f"{index} 0 obj\n".encode("ascii"))
|
|
260
|
+
pdf_bytes.extend(body)
|
|
261
|
+
pdf_bytes.extend(b"\nendobj\n")
|
|
262
|
+
xref_offset = len(pdf_bytes)
|
|
263
|
+
pdf_bytes.extend(f"xref\n0 {len(objects) + 1}\n".encode("ascii"))
|
|
264
|
+
pdf_bytes.extend(b"0000000000 65535 f \n")
|
|
265
|
+
for offset in offsets[1:]:
|
|
266
|
+
pdf_bytes.extend(f"{offset:010d} 00000 n \n".encode("ascii"))
|
|
267
|
+
pdf_bytes.extend(
|
|
268
|
+
(
|
|
269
|
+
f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n"
|
|
270
|
+
f"startxref\n{xref_offset}\n%%EOF\n"
|
|
271
|
+
).encode("ascii")
|
|
272
|
+
)
|
|
273
|
+
destination.write_bytes(pdf_bytes)
|
|
274
|
+
return destination
|
devarch/scanner/core.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..models import ScanSummary
|
|
6
|
+
from .intelligence import RepositoryAnalysis, analyze_repository
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def scan_repository(root: Path) -> ScanSummary:
|
|
10
|
+
return analyze_repository(root).summary
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def analyze_repository_root(root: Path) -> RepositoryAnalysis:
|
|
14
|
+
return analyze_repository(root)
|
|
15
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from ..models import Artifact
|
|
8
|
+
from ..utils.fs import RepoView, collect_repository, path_kind, read_text, safe_stat
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
IMPORT_RE = re.compile(
|
|
12
|
+
r"""(?mx)
|
|
13
|
+
^\s*(?:from\s+([\w.\-/]+)\s+import|import\s+([\w.\-/]+))
|
|
14
|
+
"""
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
REF_RE = re.compile(r"""(?i)\b([A-Za-z0-9_\-/]+\.(?:py|pyi|js|jsx|ts|tsx|md|json|yml|yaml|html|css|svg|png|jpg|jpeg|gif))\b""")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_text_index(view: RepoView) -> dict[Path, str]:
|
|
21
|
+
cache: dict[Path, str] = {}
|
|
22
|
+
for path in view.files:
|
|
23
|
+
if path_kind(path) == "text":
|
|
24
|
+
cache[path] = read_text(path)
|
|
25
|
+
return cache
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def normalize_ref(root: Path, ref: str) -> Path | None:
|
|
29
|
+
ref = ref.strip().lstrip(".").replace(".", "/")
|
|
30
|
+
candidate = root / ref
|
|
31
|
+
if candidate.exists():
|
|
32
|
+
return candidate.resolve()
|
|
33
|
+
for suffix in ("", ".py", ".js", ".ts", ".tsx", ".jsx", ".md", ".json", ".yml", ".yaml", ".html", ".css"):
|
|
34
|
+
p = (root / f"{ref}{suffix}").resolve()
|
|
35
|
+
if p.exists():
|
|
36
|
+
return p
|
|
37
|
+
for suffix in ("", ".py", ".js", ".ts", ".tsx", ".jsx", ".md", ".json", ".yml", ".yaml", ".html", ".css"):
|
|
38
|
+
matches = list(root.rglob(f"{ref}{suffix}"))
|
|
39
|
+
if matches:
|
|
40
|
+
return matches[0].resolve()
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_reference_map(view: RepoView, text_index: dict[Path, str]) -> dict[Path, set[Path]]:
|
|
45
|
+
references: dict[Path, set[Path]] = defaultdict(set)
|
|
46
|
+
for source_path, content in text_index.items():
|
|
47
|
+
for match in IMPORT_RE.finditer(content):
|
|
48
|
+
target = match.group(1) or match.group(2)
|
|
49
|
+
if not target:
|
|
50
|
+
continue
|
|
51
|
+
normalized = normalize_ref(view.root, target)
|
|
52
|
+
if normalized:
|
|
53
|
+
references[normalized].add(source_path)
|
|
54
|
+
for match in REF_RE.finditer(content):
|
|
55
|
+
ref_path = normalize_ref(view.root, match.group(1))
|
|
56
|
+
if ref_path:
|
|
57
|
+
references[ref_path].add(source_path)
|
|
58
|
+
return references
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def file_age_days(path: Path, git_last_commit_ts: int | None = None) -> int:
|
|
62
|
+
from datetime import datetime, timezone
|
|
63
|
+
|
|
64
|
+
if git_last_commit_ts is not None:
|
|
65
|
+
modified = datetime.fromtimestamp(git_last_commit_ts, tz=timezone.utc)
|
|
66
|
+
else:
|
|
67
|
+
modified = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
|
|
68
|
+
now = datetime.now(timezone.utc)
|
|
69
|
+
return max((now - modified).days, 0)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def iter_repo_files(root: Path) -> list[Path]:
|
|
73
|
+
return collect_repository(root).files
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def artifact(path: Path, kind: str, risk: str, detail: str, **metadata: object) -> Artifact:
|
|
77
|
+
return Artifact(
|
|
78
|
+
path=path,
|
|
79
|
+
kind=kind,
|
|
80
|
+
risk=risk,
|
|
81
|
+
size_bytes=safe_stat(path),
|
|
82
|
+
detail=detail,
|
|
83
|
+
metadata=dict(metadata),
|
|
84
|
+
)
|