PyPI - devarch - Versions diffs - 0.2.0__py3-none-any.whl - Mend

devarch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

devarch/__init__.py +4 -0
devarch/__main__.py +4 -0
devarch/analyzers/__init__.py +2 -0
devarch/analyzers/ancient.py +48 -0
devarch/analyzers/dead_code.py +92 -0
devarch/analyzers/duplicates.py +101 -0
devarch/analyzers/health.py +60 -0
devarch/analyzers/maintenance.py +902 -0
devarch/analyzers/monsters.py +62 -0
devarch/analyzers/recovery.py +338 -0
devarch/analyzers/ruins.py +45 -0
devarch/analyzers/suspicious.py +39 -0
devarch/analyzers/todos.py +60 -0
devarch/cli/__init__.py +2 -0
devarch/cli/main.py +1708 -0
devarch/models.py +43 -0
devarch/plugins.py +29 -0
devarch/reports/__init__.py +2 -0
devarch/reports/exporters.py +274 -0
devarch/scanner/__init__.py +2 -0
devarch/scanner/core.py +15 -0
devarch/scanner/discovery.py +84 -0
devarch/scanner/intelligence.py +1559 -0
devarch/utils/__init__.py +2 -0
devarch/utils/fs.py +165 -0
devarch/utils/git_info.py +64 -0
devarch/utils/rich_ui.py +107 -0
devarch/version.py +3 -0
devarch-0.2.0.dist-info/METADATA +317 -0
devarch-0.2.0.dist-info/RECORD +33 -0
devarch-0.2.0.dist-info/WHEEL +4 -0
devarch-0.2.0.dist-info/entry_points.txt +3 -0
devarch-0.2.0.dist-info/licenses/LICENSE +22 -0

devarch/analyzers/monsters.py ADDED Viewed

@@ -0,0 +1,62 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import ast
+from pathlib import Path
+from ..models import Artifact
+from ..utils.fs import path_kind, read_text
+def complexity_from_text(content: str) -> int:
+    score = 1
+    for token in ("if ", "elif ", "for ", "while ", " and ", " or ", "case ", "except ", "?", "match "):
+        score += content.count(token)
+    return score
+def dependency_count(content: str) -> int:
+    count = 0
+    for line in content.splitlines():
+        line = line.strip()
+        if line.startswith("import ") or line.startswith("from "):
+            count += 1
+        if line.startswith("require(") or "import " in line:
+            count += 1
+    return count
+def find_monsters(
+    files: list[Path],
+    max_lines: int = 800,
+    complexity_threshold: int = 35,
+    dependency_threshold: int = 25,
+) -> list[Artifact]:
+    artifacts: list[Artifact] = []
+    for path in files:
+        if path_kind(path) != "text":
+            continue
+        content = read_text(path)
+        lines = content.count("\n") + 1
+        complexity = complexity_from_text(content)
+        deps = dependency_count(content)
+        if lines >= max_lines or complexity >= complexity_threshold or deps >= dependency_threshold:
+            threat = "Severe" if lines >= max_lines * 2 or complexity >= complexity_threshold * 2 or deps >= dependency_threshold * 2 else "High"
+            details = []
+            if lines >= max_lines:
+                details.append(f"lines={lines}")
+            if complexity >= complexity_threshold:
+                details.append(f"complexity={complexity}")
+            if deps >= dependency_threshold:
+                details.append(f"dependencies={deps}")
+            artifacts.append(
+                Artifact(
+                    path=path,
+                    kind="monster_file",
+                    risk=threat,
+                    detail=", ".join(details),
+                    confidence=0.9,
+                    metadata={"lines": lines, "complexity": complexity, "dependencies": deps},
+                )
+            )
+    return artifacts

devarch/analyzers/recovery.py ADDED Viewed

@@ -0,0 +1,338 @@
+from __future__ import annotations
+from collections import Counter, defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+import re
+from ..models import Artifact
+from ..scanner.discovery import build_reference_map, build_text_index
+from ..utils.fs import RepoView, collect_repository, path_kind
+@dataclass(slots=True)
+class CleanupPriority:
+    level: int
+    items: list[str] = field(default_factory=list)
+@dataclass(slots=True)
+class DeletionAnalysis:
+    path: Path
+    safe_confidence: float
+    affected_files: int
+    recommendation: str
+    references: list[Path] = field(default_factory=list)
+    dependencies: list[Path] = field(default_factory=list)
+@dataclass(slots=True)
+class RefactorCandidate:
+    name: str
+    locations: list[Path]
+    recommendation: str
+    confidence: float
+@dataclass(slots=True)
+class RouteFinding:
+    kind: str
+    path: Path
+    detail: str
+    confidence: float
+@dataclass(slots=True)
+class ConfigFinding:
+    kind: str
+    name: str
+    confidence: float
+    locations: list[Path] = field(default_factory=list)
+@dataclass(slots=True)
+class MigrationFinding:
+    path: Path
+    kind: str
+    status: str
+    confidence: float
+@dataclass(slots=True)
+class DependencyWarning:
+    name: str
+    only_used_for: int
+    recommendation: str
+    confidence: float
+@dataclass(slots=True)
+class DriftReport:
+    original: str
+    current: str
+    severity: str
+    cause: str
+@dataclass(slots=True)
+class PRReport:
+    removed: list[str] = field(default_factory=list)
+    reduced: list[str] = field(default_factory=list)
+    improved: list[str] = field(default_factory=list)
+@dataclass(slots=True)
+class StatusSummary:
+    debt: int
+    complexity: int
+    dead_code: int
+    route_count: int
+    dependency_count: int
+    cleanup_opportunities: int
+    recommendations: list[str] = field(default_factory=list)
+def _text_files(view: RepoView) -> list[Path]:
+    return [path for path in view.files if path_kind(path) == "text"]
+def _get_summary(analysis_or_summary):
+    return getattr(analysis_or_summary, "summary", analysis_or_summary)
+def _get_intelligence(analysis_or_intelligence):
+    return getattr(analysis_or_intelligence, "intelligence", analysis_or_intelligence)
+def build_cleanup_plan(analysis_or_summary, priorities: int = 3) -> list[CleanupPriority]:
+    summary = _get_summary(analysis_or_summary)
+    extra = getattr(summary, "extra", {})
+    priorities_map = {
+        1: [
+            f"Remove {summary.todo_count} TODO-heavy hotspots" if summary.todo_count else "Remove dead code candidates",
+            f"Archive {len(extra.get('civilizations', []))} abandoned subsystem clusters" if extra.get("civilizations") else "Delete obsolete routes",
+        ],
+        2: [
+            "Refactor structural weaknesses" if extra.get("weaknesses") else "Refactor oversized modules",
+            "Stabilize dependency hubs" if extra.get("dependency_hubs") else "Consolidate duplicated utilities",
+        ],
+        3: [
+            "Consolidate duplicate logic" if summary.duplicate_count else "Trim unused configuration",
+            "Reduce technical debt hotspots" if extra.get("debt_heatmap") else "Review migration leftovers",
+        ],
+    }
+    return [CleanupPriority(level=level, items=priorities_map[level]) for level in sorted(priorities_map)][:priorities]
+def analyze_deletion(path: Path, root: Path) -> DeletionAnalysis:
+    view = collect_repository(root)
+    text_cache = build_text_index(view)
+    references = build_reference_map(view, text_cache)
+    target = path.resolve()
+    ref_files = sorted(references.get(target, set()))
+    dependencies: list[Path] = []
+    needle = target.stem.replace("_", "")
+    for source in _text_files(view):
+        if source == target:
+            continue
+        content = text_cache.get(source, "")
+        if not content:
+            continue
+        if target.name in content or needle and needle in content.replace("_", ""):
+            dependencies.append(source)
+    affected = len(set(ref_files + dependencies))
+    safe = max(0.0, 100.0 - (affected * 18.0))
+    if affected == 0:
+        recommendation = "Archive or Remove"
+    elif affected <= 2:
+        recommendation = "Review Before Delete"
+    else:
+        recommendation = "Keep or refactor first"
+    return DeletionAnalysis(
+        path=target,
+        safe_confidence=min(99.0, safe),
+        affected_files=affected,
+        recommendation=recommendation,
+        references=ref_files,
+        dependencies=dependencies,
+    )
+def find_refactor_candidates(analysis_or_intelligence) -> list[RefactorCandidate]:
+    intelligence = _get_intelligence(analysis_or_intelligence)
+    candidates: list[RefactorCandidate] = []
+    duplicates = defaultdict(list)
+    summary = getattr(analysis_or_intelligence, "summary", None)
+    text_cache = getattr(intelligence, "text_cache", {})
+    for artifact in getattr(summary, "artifacts", []):
+        if artifact.kind == "duplicate_block":
+            key = artifact.detail or artifact.path.stem
+            duplicates[key].append(artifact.path)
+            match_path = artifact.metadata.get("match_path")
+            if match_path:
+                duplicates[key].append(Path(match_path))
+    for name, paths in duplicates.items():
+        locations = sorted({path for path in paths})
+        candidates.append(
+            RefactorCandidate(
+                name=name or "Duplicate logic",
+                locations=locations,
+                recommendation="Extract shared utility",
+                confidence=0.88,
+            )
+        )
+    # oversized classes and repeated validators
+    for path in intelligence.view.files:
+        if path_kind(path) != "text":
+            continue
+        content = text_cache.get(path, "")
+        if not content:
+            try:
+                content = path.read_text(encoding="utf-8")
+            except OSError:
+                content = ""
+        if content.count("def validate") >= 2 or content.count("class ") >= 5:
+            candidates.append(
+                RefactorCandidate(
+                    name=path.name,
+                    locations=[path],
+                    recommendation="Split responsibilities and extract helpers",
+                    confidence=0.74,
+                )
+            )
+    return candidates
+ROUTE_PATTERNS = {
+    "FastAPI": re.compile(r"@(?:app|router)\.(get|post|put|patch|delete|options|head)\("),
+    "Flask": re.compile(r"@(?:app|blueprint)\.(route|get|post|put|delete)\("),
+    "Express": re.compile(r"\.(get|post|put|patch|delete)\("),
+    "Next.js": re.compile(r"export\s+(?:default\s+)?function\s+\w+|export\s+async\s+function\s+(GET|POST|PUT|PATCH|DELETE)"),
+}
+def audit_routes(view: RepoView, text_cache: dict[Path, str], references: dict[Path, set[Path]]) -> list[RouteFinding]:
+    findings: list[RouteFinding] = []
+    for path in _text_files(view):
+        content = text_cache.get(path, "")
+        if "route" not in path.name.lower() and "/api/" not in str(path).lower() and "app/" not in str(path).lower():
+            continue
+        if any(pattern.search(content) for pattern in ROUTE_PATTERNS.values()):
+            confidence = 0.92
+            if len(references.get(path, set())) == 0:
+                findings.append(
+                    RouteFinding(
+                        kind="unused endpoint",
+                        path=path,
+                        detail="No obvious callers detected",
+                        confidence=confidence,
+                    )
+                )
+            else:
+                findings.append(
+                    RouteFinding(
+                        kind="documented route",
+                        path=path,
+                        detail="Route patterns detected and reference links exist",
+                        confidence=confidence,
+                    )
+                )
+        else:
+            findings.append(
+                RouteFinding(
+                    kind="unreachable route",
+                    path=path,
+                    detail="Path resembles route code but no handlers were found",
+                    confidence=0.7,
+                )
+            )
+    return findings
+def audit_configs(view: RepoView, text_cache: dict[Path, str]) -> list[ConfigFinding]:
+    env_vars = Counter()
+    for content in text_cache.values():
+        for match in re.finditer(r"\b[A-Z][A-Z0-9_]{2,}\b", content):
+            env_vars[match.group(0)] += 1
+    findings: list[ConfigFinding] = []
+    for name, count in env_vars.items():
+        if count <= 1 and any(token in name for token in ("KEY", "URL", "ENDPOINT", "SECRET")):
+            findings.append(ConfigFinding(kind="unused environment variable", name=name, confidence=0.8))
+    return findings
+def audit_migrations(view: RepoView, text_cache: dict[Path, str]) -> list[MigrationFinding]:
+    findings: list[MigrationFinding] = []
+    for path in view.files:
+        name = path.name.lower()
+        if "migration" not in str(path).lower() and "migrate" not in name and "schema" not in name:
+            continue
+        content = text_cache.get(path, "") if path_kind(path) == "text" else ""
+        if "TODO" in content or "XXX" in content:
+            findings.append(MigrationFinding(path=path, kind="incomplete migration", status="Needs Review", confidence=0.84))
+        elif "down()" not in content and "rollback" not in content and "revert" not in content:
+            findings.append(MigrationFinding(path=path, kind="orphaned migration", status="Orphaned", confidence=0.78))
+    return findings
+def rationalize_dependencies(analysis_or_intelligence) -> list[DependencyWarning]:
+    intelligence = _get_intelligence(analysis_or_intelligence)
+    warnings: list[DependencyWarning] = []
+    counts = Counter()
+    for package, count in intelligence.external_packages.items():
+        counts[package.lower()] += count
+    for name, count in counts.items():
+        if count <= 1 and name in {"lodash", "underscore", "moment", "left-pad"}:
+            warnings.append(
+                DependencyWarning(
+                    name=name,
+                    only_used_for=count,
+                    recommendation="Replace with native code",
+                    confidence=0.9,
+                )
+            )
+    return warnings
+def detect_drift(analysis_or_intelligence) -> DriftReport:
+    analysis = analysis_or_intelligence
+    intelligence = _get_intelligence(analysis_or_intelligence)
+    summary = _get_summary(analysis_or_intelligence)
+    original = intelligence.dna.signature[0] if intelligence.dna.signature else "Unknown"
+    current = intelligence.architecture.primary if intelligence.architecture else "Unknown"
+    forecast = getattr(intelligence, "forecast", None)
+    current_health = getattr(forecast, "current_health", getattr(summary, "health_score", 0))
+    projected_12 = getattr(forecast, "projected_12_months", current_health)
+    severity = "High" if projected_12 < current_health - 10 else "Moderate"
+    cause = "Feature accumulation" if getattr(summary, "health_score", 100) < 80 else "Structural drift"
+    if current == "Prototype" and getattr(summary, "health_score", 100) < 70:
+        current = "Monolithic Application"
+    return DriftReport(original=f"Simple {original.title()} Service", current=current, severity=severity, cause=cause)
+def build_pr_report(analysis_or_summary) -> PRReport:
+    summary = _get_summary(analysis_or_summary)
+    pr = PRReport()
+    pr.removed.append(f"{summary.ancient_count} ancient files")
+    pr.removed.append(f"{summary.dead_code_count} dead code candidates")
+    pr.reduced.append(f"duplicate code by {summary.duplicate_count * 6}%")
+    pr.improved.append(f"repository health score from {max(0, summary.health_score - 7)} to {summary.health_score}")
+    return pr
+def build_status_summary(analysis_or_summary) -> StatusSummary:
+    summary = _get_summary(analysis_or_summary)
+    intelligence = _get_intelligence(analysis_or_summary)
+    return StatusSummary(
+        debt=int(summary.technical_debt_estimate),
+        complexity=min(100, len(intelligence.dependency_hubs) * 5 + len(intelligence.weaknesses) * 10),
+        dead_code=summary.dead_code_count,
+        route_count=len(intelligence.knowledge_map.route_graph),
+        dependency_count=summary.duplicate_count + intelligence.graph_edge_count,
+        cleanup_opportunities=summary.artifact_count + len(intelligence.weaknesses),
+        recommendations=[
+            "Prioritize high-impact deletions",
+            "Refactor structural bottlenecks",
+            "Audit routes and config drift",
+        ],
+    )

devarch/analyzers/ruins.py ADDED Viewed

@@ -0,0 +1,45 @@
+from __future__ import annotations
+from pathlib import Path
+import re
+from ..models import Artifact
+from ..utils.fs import ASSET_EXTENSIONS, path_kind, read_text
+def find_empty_directories(directories: list[Path], files: list[Path]) -> list[Artifact]:
+    file_set = set(files)
+    artifacts: list[Artifact] = []
+    for directory in directories:
+        if directory.exists() and not any(child for child in directory.iterdir() if child not in file_set):
+            artifacts.append(
+                Artifact(
+                    path=directory,
+                    kind="empty_directory",
+                    risk="Low",
+                    detail="Empty directory",
+                    confidence=1.0,
+                )
+            )
+    return artifacts
+def find_unused_assets(files: list[Path], text_cache: dict[Path, str]) -> list[Artifact]:
+    assets = [path for path in files if path.suffix.lower() in ASSET_EXTENSIONS]
+    if not assets:
+        return []
+    combined = "\n".join(text_cache.values()).lower()
+    artifacts: list[Artifact] = []
+    for asset in assets:
+        if asset.name.lower() not in combined and asset.stem.lower() not in combined:
+            artifacts.append(
+                Artifact(
+                    path=asset,
+                    kind="unused_asset",
+                    risk="Medium",
+                    detail="No obvious textual references",
+                    confidence=0.72,
+                )
+            )
+    return artifacts

devarch/analyzers/suspicious.py ADDED Viewed

@@ -0,0 +1,39 @@
+from __future__ import annotations
+from pathlib import Path
+from ..models import Artifact
+SUSPICIOUS_MARKERS = (
+    "old",
+    "backup",
+    "copy",
+    "final",
+    "final2",
+    "new",
+    "temp",
+    "legacy",
+    "archive",
+)
+def find_suspicious(files: list[Path]) -> list[Artifact]:
+    artifacts: list[Artifact] = []
+    for path in files:
+        lowered = path.name.lower()
+        hits = [marker for marker in SUSPICIOUS_MARKERS if marker in lowered]
+        if not hits:
+            continue
+        confidence = min(0.6 + 0.1 * len(hits), 0.99)
+        artifacts.append(
+            Artifact(
+                path=path,
+                kind="suspicious",
+                risk="Medium" if len(hits) == 1 else "High",
+                detail=f"Matched markers: {', '.join(hits)}",
+                confidence=confidence,
+            )
+        )
+    return artifacts

devarch/analyzers/todos.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import re
+from pathlib import Path
+from ..models import Artifact
+from ..utils.fs import path_kind, read_text
+TODO_PATTERNS = {
+    "CRITICAL": re.compile(r"\b(?:FIXME|BUG)\b", re.IGNORECASE),
+    "HIGH": re.compile(r"\b(?:HACK|XXX)\b", re.IGNORECASE),
+    "MEDIUM": re.compile(r"\b(?:TODO)\b", re.IGNORECASE),
+    "LOW": re.compile(r"\b(?:TEMP)\b", re.IGNORECASE),
+}
+@dataclass(slots=True)
+class TodoFinding:
+    file: Path
+    line: int
+    severity: str
+    comment: str
+def find_todos(files: list[Path]) -> list[TodoFinding]:
+    findings: list[TodoFinding] = []
+    for path in files:
+        if path_kind(path) != "text":
+            continue
+        content = read_text(path)
+        for line_no, line in enumerate(content.splitlines(), start=1):
+            for severity, pattern in TODO_PATTERNS.items():
+                if pattern.search(line):
+                    findings.append(
+                        TodoFinding(
+                            file=path,
+                            line=line_no,
+                            severity=severity,
+                            comment=line.strip(),
+                        )
+                    )
+                    break
+    return findings
+def todos_to_artifacts(findings: list[TodoFinding]) -> list[Artifact]:
+    return [
+        Artifact(
+            path=finding.file,
+            kind="todo",
+            risk=finding.severity,
+            line_number=finding.line,
+            detail=finding.comment,
+            confidence=1.0,
+        )
+        for finding in findings
+    ]

devarch/cli/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """CLI entrypoints for Dev Archaeologist."""
2	+