devarch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devarch/models.py ADDED
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class Artifact:
11
+ path: Path
12
+ kind: str
13
+ risk: str
14
+ score: float = 0.0
15
+ age_days: int | None = None
16
+ size_bytes: int | None = None
17
+ line_number: int | None = None
18
+ detail: str = ""
19
+ confidence: float | None = None
20
+ metadata: dict[str, Any] = field(default_factory=dict)
21
+
22
+
23
+ @dataclass(slots=True)
24
+ class ScanSummary:
25
+ root: Path
26
+ scanned_at: datetime
27
+ total_files: int
28
+ artifact_count: int
29
+ ancient_count: int
30
+ todo_count: int
31
+ duplicate_count: int
32
+ dead_code_count: int
33
+ monster_count: int
34
+ ruin_count: int
35
+ suspicious_count: int
36
+ technical_debt_estimate: float
37
+ health_score: int
38
+ health_status: str
39
+ warnings: list[str] = field(default_factory=list)
40
+ artifacts: list[Artifact] = field(default_factory=list)
41
+ timeline: dict[str, Any] = field(default_factory=dict)
42
+ extra: dict[str, Any] = field(default_factory=dict)
43
+
devarch/plugins.py ADDED
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from importlib import metadata
5
+ from typing import Protocol
6
+
7
+
8
+ class Plugin(Protocol):
9
+ name: str
10
+
11
+ def register(self) -> None: ...
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class PluginInfo:
16
+ name: str
17
+ module: str
18
+
19
+
20
+ def discover_plugins() -> list[PluginInfo]:
21
+ infos: list[PluginInfo] = []
22
+ try:
23
+ entries = metadata.entry_points(group="devarch.plugins")
24
+ except TypeError:
25
+ entries = metadata.entry_points().get("devarch.plugins", []) # type: ignore[assignment]
26
+ for entry in entries:
27
+ infos.append(PluginInfo(name=entry.name, module=entry.value))
28
+ return infos
29
+
@@ -0,0 +1,2 @@
1
+ """Report exporters for Dev Archaeologist."""
2
+
@@ -0,0 +1,274 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from ..models import ScanSummary
7
+
8
+
9
+ def _confidence_text(value: float | None) -> str:
10
+ return f"{value:.0%}" if value is not None else "n/a"
11
+
12
+
13
+ def _load_maintenance(root: Path) -> dict[str, object]:
14
+ state_dir = root / ".devarch"
15
+ baseline_path = state_dir / "baseline.json"
16
+ history_path = state_dir / "history.jsonl"
17
+ data: dict[str, object] = {"baseline": None, "history_entries": 0}
18
+ if baseline_path.exists():
19
+ try:
20
+ data["baseline"] = json.loads(baseline_path.read_text(encoding="utf-8"))
21
+ except json.JSONDecodeError:
22
+ data["baseline"] = None
23
+ if history_path.exists():
24
+ data["history_entries"] = sum(1 for line in history_path.read_text(encoding="utf-8").splitlines() if line.strip())
25
+ return data
26
+
27
+
28
+ def summary_to_dict(summary: ScanSummary) -> dict[str, object]:
29
+ return {
30
+ "root": str(summary.root),
31
+ "scanned_at": summary.scanned_at.isoformat(),
32
+ "total_files": summary.total_files,
33
+ "artifact_count": summary.artifact_count,
34
+ "ancient_count": summary.ancient_count,
35
+ "todo_count": summary.todo_count,
36
+ "duplicate_count": summary.duplicate_count,
37
+ "dead_code_count": summary.dead_code_count,
38
+ "monster_count": summary.monster_count,
39
+ "ruin_count": summary.ruin_count,
40
+ "suspicious_count": summary.suspicious_count,
41
+ "technical_debt_estimate": summary.technical_debt_estimate,
42
+ "health_score": summary.health_score,
43
+ "health_status": summary.health_status,
44
+ "warnings": summary.warnings,
45
+ "timeline": summary.timeline,
46
+ "extra": summary.extra,
47
+ "maintenance": _load_maintenance(summary.root),
48
+ "artifacts": [
49
+ {
50
+ "path": str(item.path),
51
+ "kind": item.kind,
52
+ "risk": item.risk,
53
+ "score": item.score,
54
+ "age_days": item.age_days,
55
+ "size_bytes": item.size_bytes,
56
+ "line_number": item.line_number,
57
+ "detail": item.detail,
58
+ "confidence": item.confidence,
59
+ "metadata": item.metadata,
60
+ }
61
+ for item in summary.artifacts
62
+ ],
63
+ }
64
+
65
+
66
+ def export_json(summary: ScanSummary, destination: Path) -> Path:
67
+ destination.write_text(json.dumps(summary_to_dict(summary), indent=2), encoding="utf-8")
68
+ return destination
69
+
70
+
71
+ def export_markdown(summary: ScanSummary, destination: Path) -> Path:
72
+ data = summary_to_dict(summary)
73
+ dna = ", ".join(summary.extra.get("dna", {}).get("signature", [])) or "n/a"
74
+ personality = summary.extra.get("personality", {}).get("type", "n/a")
75
+ architecture = summary.extra.get("architecture", {})
76
+ survival = summary.extra.get("survival", {})
77
+ forecast = summary.extra.get("forecast", {})
78
+ maintenance = data.get("maintenance", {})
79
+ baseline = maintenance.get("baseline") or {}
80
+ lines = [
81
+ "# Dev Archaeologist Excavation Report",
82
+ "",
83
+ f"- Root: `{data['root']}`",
84
+ f"- Scanned at: `{data['scanned_at']}`",
85
+ f"- Health: **{summary.health_score}/100** ({summary.health_status})",
86
+ f"- Technical debt estimate: `{summary.technical_debt_estimate:.1f}`",
87
+ f"- DNA signature: `{dna}`",
88
+ f"- Personality: `{personality}`",
89
+ f"- Architecture: `{architecture.get('primary', 'n/a')} / {architecture.get('secondary', 'n/a')}`",
90
+ f"- Survival score: `{survival.get('score', 'n/a')}`",
91
+ f"- Forecast 6 months: `{forecast.get('projected_6_months', 'n/a')}`",
92
+ f"- Forecast 12 months: `{forecast.get('projected_12_months', 'n/a')}`",
93
+ f"- Baseline health: `{baseline.get('health_score', 'n/a')}`",
94
+ f"- Maintenance history entries: `{maintenance.get('history_entries', 0)}`",
95
+ "",
96
+ "## Metrics",
97
+ "",
98
+ f"- Total files: {summary.total_files}",
99
+ f"- Artifacts: {summary.artifact_count}",
100
+ f"- Ancient files: {summary.ancient_count}",
101
+ f"- TODOs: {summary.todo_count}",
102
+ f"- Duplicates: {summary.duplicate_count}",
103
+ f"- Dead code candidates: {summary.dead_code_count}",
104
+ f"- Monster files: {summary.monster_count}",
105
+ f"- Ruins: {summary.ruin_count}",
106
+ f"- Suspicious files: {summary.suspicious_count}",
107
+ "",
108
+ "## Warnings",
109
+ "",
110
+ ]
111
+ lines.extend(f"- {warning}" for warning in (summary.warnings or ["None"]))
112
+ lines.extend(["", "## Intelligence", ""])
113
+ lines.append(f"- Dependency hubs: {len(summary.extra.get('dependency_hubs', []))}")
114
+ lines.append(f"- Civilizations: {len(summary.extra.get('civilizations', []))}")
115
+ lines.append(f"- Heatmap buckets: {len(summary.extra.get('debt_heatmap', []))}")
116
+ lines.append(f"- Structural weaknesses: {len(summary.extra.get('weaknesses', []))}")
117
+ lines.append(f"- Investigations: {len(summary.extra.get('investigation', []))}")
118
+ lines.append(f"- Containment zones: {len(summary.extra.get('containment_zones', []))}")
119
+ lines.append(f"- Remediation findings: {len(summary.extra.get('remediation', []))}")
120
+ lines.extend(["", "## Artifacts", ""])
121
+ for artifact in summary.artifacts:
122
+ lines.extend(
123
+ [
124
+ f"### {artifact.kind}",
125
+ f"- Path: `{artifact.path}`",
126
+ f"- Risk: {artifact.risk}",
127
+ f"- Confidence: {_confidence_text(artifact.confidence)}",
128
+ f"- Detail: {artifact.detail or 'n/a'}",
129
+ ]
130
+ )
131
+ if artifact.age_days is not None:
132
+ lines.append(f"- Age: {artifact.age_days} days")
133
+ if artifact.line_number is not None:
134
+ lines.append(f"- Line: {artifact.line_number}")
135
+ lines.append("")
136
+ destination.write_text("\n".join(lines), encoding="utf-8")
137
+ return destination
138
+
139
+
140
+ def export_html(summary: ScanSummary, destination: Path) -> Path:
141
+ rows = "\n".join(
142
+ f"<tr><td>{artifact.path}</td><td>{artifact.kind}</td><td>{artifact.risk}</td><td>{_confidence_text(artifact.confidence)}</td><td>{artifact.detail}</td></tr>"
143
+ for artifact in summary.artifacts
144
+ )
145
+ warnings = "".join(f"<li>{warning}</li>" for warning in (summary.warnings or ["None"]))
146
+ dna = ", ".join(summary.extra.get("dna", {}).get("signature", [])) or "n/a"
147
+ personality = summary.extra.get("personality", {}).get("type", "n/a")
148
+ architecture = summary.extra.get("architecture", {})
149
+ survival = summary.extra.get("survival", {})
150
+ forecast = summary.extra.get("forecast", {})
151
+ maintenance = summary_to_dict(summary).get("maintenance", {})
152
+ baseline = maintenance.get("baseline") or {}
153
+ heatmap = "".join(
154
+ f"<tr><td>{item['bucket']}</td><td>{item['score']}</td><td>{item['label']}</td><td>{item['files']}</td></tr>"
155
+ for item in summary.extra.get("debt_heatmap", [])
156
+ )
157
+ html = f"""<!doctype html>
158
+ <html lang="en">
159
+ <head>
160
+ <meta charset="utf-8">
161
+ <title>Dev Archaeologist Report</title>
162
+ <style>
163
+ body {{ font-family: system-ui, sans-serif; margin: 2rem; background: #0b1020; color: #e5eefc; }}
164
+ .card {{ background: #111a33; border: 1px solid #22305f; border-radius: 16px; padding: 1rem 1.25rem; margin-bottom: 1rem; }}
165
+ table {{ width: 100%; border-collapse: collapse; }}
166
+ th, td {{ border-bottom: 1px solid #22305f; padding: .55rem; text-align: left; vertical-align: top; }}
167
+ th {{ color: #9dd3ff; }}
168
+ .score {{ font-size: 2rem; font-weight: 700; }}
169
+ </style>
170
+ </head>
171
+ <body>
172
+ <div class="card"><div class="score">{summary.health_score}/100</div><div>{summary.health_status}</div></div>
173
+ <div class="card"><strong>DNA</strong><div>{dna}</div></div>
174
+ <div class="card"><strong>Personality</strong><div>{personality}</div></div>
175
+ <div class="card"><strong>Architecture</strong><div>{architecture.get('primary', 'n/a')} / {architecture.get('secondary', 'n/a')}</div></div>
176
+ <div class="card"><strong>Survival</strong><div>{survival.get('score', 'n/a')}/100</div></div>
177
+ <div class="card"><strong>Forecast</strong><div>6 months: {forecast.get('projected_6_months', 'n/a')} | 12 months: {forecast.get('projected_12_months', 'n/a')}</div></div>
178
+ <div class="card"><strong>Baseline</strong><div>{baseline.get('health_score', 'n/a')} | History entries: {maintenance.get('history_entries', 0)}</div></div>
179
+ <div class="card"><strong>Remediation</strong><div>{len(summary.extra.get('remediation', []))} findings</div></div>
180
+ <div class="card"><strong>Warnings</strong><ul>{warnings}</ul></div>
181
+ <div class="card"><strong>Heatmap</strong><table><thead><tr><th>Bucket</th><th>Score</th><th>Label</th><th>Files</th></tr></thead><tbody>{heatmap}</tbody></table></div>
182
+ <div class="card"><strong>Artifacts</strong><table><thead><tr><th>Path</th><th>Kind</th><th>Risk</th><th>Confidence</th><th>Detail</th></tr></thead><tbody>{rows}</tbody></table></div>
183
+ </body>
184
+ </html>"""
185
+ destination.write_text(html, encoding="utf-8")
186
+ return destination
187
+
188
+
189
+ def export_pdf(summary: ScanSummary, destination: Path) -> Path:
190
+ def escape_pdf(text: str) -> str:
191
+ return text.replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)")
192
+
193
+ def build_page(lines: list[str], top: int = 760) -> str:
194
+ commands = ["BT", "/F1 11 Tf", f"40 {top} Td"]
195
+ first = True
196
+ for line in lines:
197
+ safe = escape_pdf(line[:120])
198
+ if first:
199
+ commands.append(f"({safe}) Tj")
200
+ first = False
201
+ else:
202
+ commands.append(f"0 -14 Td ({safe}) Tj")
203
+ commands.append("ET")
204
+ return "\n".join(commands)
205
+
206
+ chunks: list[list[str]] = []
207
+ header = [
208
+ "Dev Archaeologist Excavation Report",
209
+ f"Root: {summary.root}",
210
+ f"Health: {summary.health_score}/100 ({summary.health_status})",
211
+ f"Debt estimate: {summary.technical_debt_estimate:.1f}",
212
+ f"DNA: {', '.join(summary.extra.get('dna', {}).get('signature', [])) or 'n/a'}",
213
+ f"Personality: {summary.extra.get('personality', {}).get('type', 'n/a')}",
214
+ f"Architecture: {summary.extra.get('architecture', {}).get('primary', 'n/a')} / {summary.extra.get('architecture', {}).get('secondary', 'n/a')}",
215
+ f"Survival: {summary.extra.get('survival', {}).get('score', 'n/a')}/100",
216
+ f"Forecast 12 months: {summary.extra.get('forecast', {}).get('projected_12_months', 'n/a')}",
217
+ f"Baseline health: {(summary_to_dict(summary).get('maintenance', {}).get('baseline') or {}).get('health_score', 'n/a')}",
218
+ f"Remediation findings: {len(summary.extra.get('remediation', []))}",
219
+ "Warnings:",
220
+ ]
221
+ body = [f"- {warning}" for warning in (summary.warnings or ["None"])]
222
+ body.extend(["Artifacts:"])
223
+ body.extend(
224
+ f"- {artifact.kind}: {artifact.path} ({artifact.risk}, {_confidence_text(artifact.confidence)})"
225
+ for artifact in summary.artifacts[:50]
226
+ )
227
+ lines = header + body
228
+ while lines:
229
+ chunks.append(lines[:40])
230
+ lines = lines[40:]
231
+
232
+ objects: list[bytes] = []
233
+ page_objects: list[int] = []
234
+
235
+ def add_object(body: str) -> int:
236
+ objects.append(body.encode("utf-8"))
237
+ return len(objects)
238
+
239
+ catalog_id = add_object("<< /Type /Catalog /Pages 2 0 R >>")
240
+ pages_id = add_object("<< /Type /Pages /Kids [] /Count 0 >>")
241
+ font_id = add_object("<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>")
242
+
243
+ for chunk in chunks:
244
+ content = build_page(chunk)
245
+ content_id = add_object(f"<< /Length {len(content.encode('utf-8'))} >>\nstream\n{content}\nendstream")
246
+ page_id = add_object(
247
+ f"<< /Type /Page /Parent {pages_id} 0 R /MediaBox [0 0 612 792] /Contents {content_id} 0 R /Resources << /Font << /F1 {font_id} 0 R >> >> >>"
248
+ )
249
+ page_objects.append(page_id)
250
+
251
+ kids = " ".join(f"{page} 0 R" for page in page_objects)
252
+ objects[pages_id - 1] = f"<< /Type /Pages /Kids [{kids}] /Count {len(page_objects)} >>".encode("utf-8")
253
+ objects[catalog_id - 1] = b"<< /Type /Catalog /Pages 2 0 R >>"
254
+
255
+ pdf_bytes = bytearray(b"%PDF-1.4\n")
256
+ offsets = [0]
257
+ for index, body in enumerate(objects, start=1):
258
+ offsets.append(len(pdf_bytes))
259
+ pdf_bytes.extend(f"{index} 0 obj\n".encode("ascii"))
260
+ pdf_bytes.extend(body)
261
+ pdf_bytes.extend(b"\nendobj\n")
262
+ xref_offset = len(pdf_bytes)
263
+ pdf_bytes.extend(f"xref\n0 {len(objects) + 1}\n".encode("ascii"))
264
+ pdf_bytes.extend(b"0000000000 65535 f \n")
265
+ for offset in offsets[1:]:
266
+ pdf_bytes.extend(f"{offset:010d} 00000 n \n".encode("ascii"))
267
+ pdf_bytes.extend(
268
+ (
269
+ f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n"
270
+ f"startxref\n{xref_offset}\n%%EOF\n"
271
+ ).encode("ascii")
272
+ )
273
+ destination.write_bytes(pdf_bytes)
274
+ return destination
@@ -0,0 +1,2 @@
1
+ """Repository scanning helpers."""
2
+
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from ..models import ScanSummary
6
+ from .intelligence import RepositoryAnalysis, analyze_repository
7
+
8
+
9
+ def scan_repository(root: Path) -> ScanSummary:
10
+ return analyze_repository(root).summary
11
+
12
+
13
+ def analyze_repository_root(root: Path) -> RepositoryAnalysis:
14
+ return analyze_repository(root)
15
+
@@ -0,0 +1,84 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from pathlib import Path
5
+ import re
6
+
7
+ from ..models import Artifact
8
+ from ..utils.fs import RepoView, collect_repository, path_kind, read_text, safe_stat
9
+
10
+
11
+ IMPORT_RE = re.compile(
12
+ r"""(?mx)
13
+ ^\s*(?:from\s+([\w.\-/]+)\s+import|import\s+([\w.\-/]+))
14
+ """
15
+ )
16
+
17
+ REF_RE = re.compile(r"""(?i)\b([A-Za-z0-9_\-/]+\.(?:py|pyi|js|jsx|ts|tsx|md|json|yml|yaml|html|css|svg|png|jpg|jpeg|gif))\b""")
18
+
19
+
20
+ def build_text_index(view: RepoView) -> dict[Path, str]:
21
+ cache: dict[Path, str] = {}
22
+ for path in view.files:
23
+ if path_kind(path) == "text":
24
+ cache[path] = read_text(path)
25
+ return cache
26
+
27
+
28
+ def normalize_ref(root: Path, ref: str) -> Path | None:
29
+ ref = ref.strip().lstrip(".").replace(".", "/")
30
+ candidate = root / ref
31
+ if candidate.exists():
32
+ return candidate.resolve()
33
+ for suffix in ("", ".py", ".js", ".ts", ".tsx", ".jsx", ".md", ".json", ".yml", ".yaml", ".html", ".css"):
34
+ p = (root / f"{ref}{suffix}").resolve()
35
+ if p.exists():
36
+ return p
37
+ for suffix in ("", ".py", ".js", ".ts", ".tsx", ".jsx", ".md", ".json", ".yml", ".yaml", ".html", ".css"):
38
+ matches = list(root.rglob(f"{ref}{suffix}"))
39
+ if matches:
40
+ return matches[0].resolve()
41
+ return None
42
+
43
+
44
+ def build_reference_map(view: RepoView, text_index: dict[Path, str]) -> dict[Path, set[Path]]:
45
+ references: dict[Path, set[Path]] = defaultdict(set)
46
+ for source_path, content in text_index.items():
47
+ for match in IMPORT_RE.finditer(content):
48
+ target = match.group(1) or match.group(2)
49
+ if not target:
50
+ continue
51
+ normalized = normalize_ref(view.root, target)
52
+ if normalized:
53
+ references[normalized].add(source_path)
54
+ for match in REF_RE.finditer(content):
55
+ ref_path = normalize_ref(view.root, match.group(1))
56
+ if ref_path:
57
+ references[ref_path].add(source_path)
58
+ return references
59
+
60
+
61
+ def file_age_days(path: Path, git_last_commit_ts: int | None = None) -> int:
62
+ from datetime import datetime, timezone
63
+
64
+ if git_last_commit_ts is not None:
65
+ modified = datetime.fromtimestamp(git_last_commit_ts, tz=timezone.utc)
66
+ else:
67
+ modified = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
68
+ now = datetime.now(timezone.utc)
69
+ return max((now - modified).days, 0)
70
+
71
+
72
+ def iter_repo_files(root: Path) -> list[Path]:
73
+ return collect_repository(root).files
74
+
75
+
76
+ def artifact(path: Path, kind: str, risk: str, detail: str, **metadata: object) -> Artifact:
77
+ return Artifact(
78
+ path=path,
79
+ kind=kind,
80
+ risk=risk,
81
+ size_bytes=safe_stat(path),
82
+ detail=detail,
83
+ metadata=dict(metadata),
84
+ )