PyPI - drift-analyzer - Versions diffs - 0.5.0__py3-none-any.whl - Mend

drift-analyzer 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

drift/__init__.py +18 -0
drift/__main__.py +6 -0
drift/analyzer.py +370 -0
drift/analyzers/typescript/alias_resolver.py +129 -0
drift/analyzers/typescript/barrel_resolver.py +131 -0
drift/analyzers/typescript/import_graph.py +172 -0
drift/analyzers/typescript/workspace_boundaries.py +116 -0
drift/cache.py +209 -0
drift/cli.py +80 -0
drift/commands/__init__.py +16 -0
drift/commands/analyze.py +142 -0
drift/commands/badge.py +75 -0
drift/commands/check.py +100 -0
drift/commands/patterns.py +70 -0
drift/commands/self_analyze.py +61 -0
drift/commands/timeline.py +42 -0
drift/commands/trend.py +125 -0
drift/config.py +127 -0
drift/embeddings.py +294 -0
drift/ingestion/__init__.py +12 -0
drift/ingestion/ast_parser.py +509 -0
drift/ingestion/file_discovery.py +156 -0
drift/ingestion/git_history.py +281 -0
drift/ingestion/ts_parser.py +452 -0
drift/models.py +240 -0
drift/output/__init__.py +18 -0
drift/output/json_output.py +147 -0
drift/output/rich_output.py +489 -0
drift/py.typed +0 -0
drift/recommendations.py +268 -0
drift/rules/tsjs/cross_package_import_ban.py +93 -0
drift/scoring/__init__.py +17 -0
drift/scoring/engine.py +269 -0
drift/signals/__init__.py +21 -0
drift/signals/architecture_violation.py +454 -0
drift/signals/base.py +108 -0
drift/signals/doc_impl_drift.py +492 -0
drift/signals/explainability_deficit.py +198 -0
drift/signals/mutant_duplicates.py +484 -0
drift/signals/pattern_fragmentation.py +175 -0
drift/signals/system_misalignment.py +217 -0
drift/signals/temporal_volatility.py +171 -0
drift/suppression.py +93 -0
drift/timeline.py +293 -0
drift_analyzer-0.5.0.dist-info/METADATA +284 -0
drift_analyzer-0.5.0.dist-info/RECORD +49 -0
drift_analyzer-0.5.0.dist-info/WHEEL +4 -0
drift_analyzer-0.5.0.dist-info/entry_points.txt +2 -0
drift_analyzer-0.5.0.dist-info/licenses/LICENSE +21 -0

drift/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Drift — Detect architectural erosion from AI-generated code."""
+from __future__ import annotations
+from importlib.metadata import PackageNotFoundError, version
+def _resolve_version() -> str:
+    """Resolve installed package version for CLI/output metadata."""
+    for package_name in ("drift-analyzer", "drift"):
+        try:
+            return version(package_name)
+        except PackageNotFoundError:
+            continue
+    return "0.0.0"
+__version__ = _resolve_version()

drift/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running drift as a module: python -m drift."""
+from drift.cli import main
+if __name__ == "__main__":
+    main()

drift/analyzer.py ADDED Viewed

@@ -0,0 +1,370 @@
+"""Main analysis orchestrator — coordinates ingestion, signals, and scoring."""
+from __future__ import annotations
+import datetime
+import importlib
+import logging
+import pkgutil
+import subprocess
+import time
+from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+import drift.signals
+from drift.cache import ParseCache
+from drift.config import DriftConfig
+from drift.embeddings import get_embedding_service
+from drift.ingestion.ast_parser import parse_file
+from drift.ingestion.file_discovery import discover_files
+from drift.ingestion.git_history import build_file_histories, parse_git_history
+from drift.models import (
+    FileInfo,
+    Finding,
+    ParseResult,
+    PatternCategory,
+    PatternInstance,
+    RepoAnalysis,
+)
+from drift.scoring.engine import (
+    assign_impact_scores,
+    composite_score,
+    compute_module_scores,
+    compute_signal_scores,
+)
+from drift.signals.base import AnalysisContext, create_signals
+from drift.suppression import filter_findings, scan_suppressions
+# Auto-discover all signal modules so @register_signal decorators execute.
+for _finder, _mod_name, _ispkg in pkgutil.iter_modules(drift.signals.__path__):
+    importlib.import_module(f"drift.signals.{_mod_name}")
+# Progress callback: (phase_name, current, total)
+ProgressCallback = Callable[[str, int, int], None]
+# Default parallelism for file parsing — threads work well here because
+# the bottleneck is disk I/O rather than pure CPU.
+_DEFAULT_WORKERS = 8
+def _is_git_repo(path: Path) -> bool:
+    """Check whether *path* is inside a git working tree."""
+    try:
+        subprocess.run(
+            ["git", "-C", str(path), "rev-parse", "--git-dir"],
+            capture_output=True,
+            check=True,
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
+def _fetch_git_history(
+    repo_path: Path, since_days: int, known_files: set[str],
+    ai_confidence_threshold: float = 0.50,
+) -> tuple[list, dict]:
+    """Run git history parsing (designed to run in a background thread)."""
+    commits = parse_git_history(
+        repo_path, since_days=since_days, file_filter=known_files,
+        ai_confidence_threshold=ai_confidence_threshold,
+    )
+    file_histories = build_file_histories(commits, known_files=known_files)
+    return commits, file_histories
+def _run_pipeline(
+    repo_path: Path,
+    files: list[FileInfo],
+    config: DriftConfig,
+    since_days: int = 90,
+    on_progress: ProgressCallback | None = None,
+    workers: int = _DEFAULT_WORKERS,
+    _start: float | None = None,
+) -> RepoAnalysis:
+    """Shared analysis pipeline: parse → git history → signals → score.
+    Both ``analyze_repo`` and ``analyze_diff`` delegate here after resolving
+    which files to analyse.  Keeping the pipeline in one place eliminates
+    duplication and ensures every code-path benefits from caching, progress
+    reporting, and resilient signal execution.
+    """
+    start = _start if _start is not None else time.monotonic()
+    def _progress(phase: str, current: int, total: int) -> None:
+        if on_progress:
+            on_progress(phase, current, total)
+    known_files = {f.path.as_posix() for f in files}
+    # --- 1. AST parsing (parallelized, cache-aware) ---
+    cache = ParseCache(repo_path / config.cache_dir)
+    cached_results: dict[int, ParseResult] = {}
+    # Keep the content hash for cache misses so we don't re-read each file
+    # after parsing just to compute the key again.
+    to_parse: list[tuple[int, FileInfo, str | None]] = []
+    for idx, finfo in enumerate(files):
+        full_path = repo_path / finfo.path
+        content_hash: str | None = None
+        try:
+            content_hash = ParseCache.file_hash(full_path)
+            hit = cache.get(content_hash)
+            if hit is not None:
+                cached_results[idx] = hit
+                continue
+        except OSError:
+            pass
+        to_parse.append((idx, finfo, content_hash))
+    _progress("Parsing files", len(cached_results), len(files))
+    has_git = _is_git_repo(repo_path)
+    with ThreadPoolExecutor(max_workers=workers) as executor:
+        # --- 2. Git history (concurrent with parsing) ---
+        git_future = (
+            executor.submit(
+                _fetch_git_history, repo_path, since_days, known_files,
+                config.thresholds.ai_confidence_threshold,
+            )
+            if has_git
+            else None
+        )
+        parse_results: list[ParseResult] = [None] * len(files)  # type: ignore[list-item]
+        for idx, cached in cached_results.items():
+            parse_results[idx] = cached
+        if to_parse:
+            new_results: list[tuple[int, str, ParseResult]] = [None] * len(to_parse)  # type: ignore[list-item]
+            futures = {
+                executor.submit(parse_file, finfo.path, repo_path, finfo.language): (
+                    i,
+                    idx,
+                    content_hash,
+                )
+                for i, (idx, finfo, content_hash) in enumerate(to_parse)
+            }
+            for future in as_completed(futures):
+                i, idx, content_hash = futures[future]
+                result = future.result()
+                parse_results[idx] = result
+                if content_hash is not None:
+                    new_results[i] = (idx, content_hash, result)
+            for entry in new_results:
+                if entry is not None:
+                    _idx, h, r = entry
+                    cache.put(h, r)
+        _progress("Parsing files", len(files), len(files))
+        if git_future is not None:
+            commits, file_histories = git_future.result()
+        else:
+            logging.getLogger("drift").info("Not a git repository — skipping git history analysis.")
+            commits, file_histories = [], {}
+    _progress("Analyzing git history", 0, 0)
+    # --- 3. Embedding service ---
+    emb_svc = None
+    if config.embeddings_enabled:
+        emb_svc = get_embedding_service(
+            cache_dir=repo_path / config.cache_dir,
+            model_name=config.embedding_model,
+            batch_size=config.embedding_batch_size,
+        )
+    # --- 4. Signals ---
+    ctx = AnalysisContext(
+        repo_path=repo_path,
+        config=config,
+        parse_results=parse_results,
+        file_histories=file_histories,
+        embedding_service=emb_svc,
+    )
+    signals = create_signals(ctx)
+    all_findings: list[Finding] = []
+    total_signals = len(signals)
+    for i, signal in enumerate(signals):
+        _progress(f"Signal: {signal.name}", i + 1, total_signals)
+        try:
+            findings = signal.analyze(parse_results, file_histories, config)
+            all_findings.extend(findings)
+        except Exception:
+            logging.getLogger("drift").warning(
+                "Signal '%s' failed; skipping.",
+                signal.name,
+                exc_info=True,
+            )
+    # --- 5. Scoring ---
+    assign_impact_scores(all_findings, config.weights)
+    # --- 5b. Inline suppression ---
+    suppressions = scan_suppressions(files, repo_path)
+    all_findings, suppressed_findings = filter_findings(all_findings, suppressions)
+    suppressed_count = len(suppressed_findings)
+    signal_scores = compute_signal_scores(all_findings)
+    repo_score = composite_score(signal_scores, config.weights)
+    module_scores = compute_module_scores(all_findings, config.weights)
+    # --- 6. Pattern catalog ---
+    pattern_catalog: dict[PatternCategory, list[PatternInstance]] = {}
+    for pr in parse_results:
+        for pattern in pr.patterns:
+            pattern_catalog.setdefault(pattern.category, []).append(pattern)
+    # --- 7. Assemble result ---
+    total_funcs = sum(len(pr.functions) for pr in parse_results)
+    ai_commits = sum(1 for c in commits if c.is_ai_attributed)
+    ai_ratio = ai_commits / max(1, len(commits))
+    duration = time.monotonic() - start
+    return RepoAnalysis(
+        repo_path=repo_path,
+        analyzed_at=datetime.datetime.now(tz=datetime.UTC),
+        drift_score=repo_score,
+        module_scores=module_scores,
+        findings=all_findings,
+        pattern_catalog=pattern_catalog,
+        total_files=len(files),
+        total_functions=total_funcs,
+        ai_attributed_ratio=round(ai_ratio, 3),
+        analysis_duration_seconds=round(duration, 2),
+        commits=commits,
+        file_histories=file_histories,
+        suppressed_count=suppressed_count,
+    )
+# ---------------------------------------------------------------------------
+# Public entry points
+# ---------------------------------------------------------------------------
+def analyze_repo(
+    repo_path: Path,
+    config: DriftConfig | None = None,
+    since_days: int = 90,
+    target_path: str | None = None,
+    on_progress: ProgressCallback | None = None,
+    workers: int = _DEFAULT_WORKERS,
+) -> RepoAnalysis:
+    """Run full drift analysis on a repository.
+    Args:
+        repo_path: Absolute path to the repository root.
+        config: Drift configuration. Loaded from drift.yaml if None.
+        since_days: How many days of git history to analyze.
+        target_path: Optional subdirectory to restrict analysis to.
+        on_progress: Optional callback (phase, current, total) for progress display.
+        workers: Number of parallel parsing threads.
+    Returns:
+        Complete RepoAnalysis with scores, findings, and module breakdowns.
+    """
+    repo_path = repo_path.resolve()
+    start = time.monotonic()
+    if config is None:
+        config = DriftConfig.load(repo_path)
+    if on_progress:
+        on_progress("Discovering files", 0, 0)
+    files = discover_files(
+        repo_path,
+        include=config.include,
+        exclude=config.exclude,
+    )
+    if target_path:
+        target = Path(target_path)
+        files = [f for f in files if str(f.path).startswith(str(target))]
+    return _run_pipeline(
+        repo_path, files, config,
+        since_days=since_days,
+        on_progress=on_progress,
+        workers=workers,
+        _start=start,
+    )
+def analyze_diff(
+    repo_path: Path,
+    config: DriftConfig | None = None,
+    diff_ref: str = "HEAD~1",
+    workers: int = _DEFAULT_WORKERS,
+    on_progress: ProgressCallback | None = None,
+    since_days: int = 90,
+) -> RepoAnalysis:
+    """Analyze only files changed since a given git ref.
+    Useful for CI — only checks files in the current diff.
+    Runs signals only on changed files rather than the entire repo.
+    """
+    logger = logging.getLogger("drift")
+    repo_path = repo_path.resolve()
+    start = time.monotonic()
+    if config is None:
+        config = DriftConfig.load(repo_path)
+    # Get changed files from git (subprocess per ADR-004)
+    changed_files: list[str] = []
+    try:
+        result = subprocess.run(
+            ["git", "diff", "--name-only", diff_ref],
+            capture_output=True,
+            text=True,
+            encoding="utf-8",
+            errors="replace",
+            cwd=repo_path,
+            check=True,
+        )
+        changed_files = [line for line in result.stdout.strip().splitlines() if line]
+    except Exception as exc:
+        logger.warning(
+            "Could not resolve diff ref '%s': %s. Falling back to full analysis.",
+            diff_ref,
+            exc,
+        )
+        return analyze_repo(repo_path, config, workers=workers)
+    if not changed_files:
+        return RepoAnalysis(
+            repo_path=repo_path,
+            analyzed_at=datetime.datetime.now(tz=datetime.UTC),
+            drift_score=0.0,
+        )
+    all_files = discover_files(
+        repo_path,
+        include=config.include,
+        exclude=config.exclude,
+    )
+    changed_set = set(changed_files)
+    files = [f for f in all_files if f.path.as_posix() in changed_set]
+    if not files:
+        return RepoAnalysis(
+            repo_path=repo_path,
+            analyzed_at=datetime.datetime.now(tz=datetime.UTC),
+            drift_score=0.0,
+        )
+    return _run_pipeline(
+        repo_path, files, config,
+        since_days=since_days,
+        on_progress=on_progress,
+        workers=workers,
+        _start=start,
+    )

drift/analyzers/typescript/alias_resolver.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Resolve TypeScript path aliases defined in tsconfig.json."""
+from __future__ import annotations
+import json
+from pathlib import Path
+_ALLOWED_EXTENSIONS = {".ts", ".tsx"}
+def _load_compiler_options(tsconfig_path: Path) -> dict[str, object]:
+    """Load compilerOptions from tsconfig.json, returning an empty mapping on errors."""
+    try:
+        data = json.loads(tsconfig_path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError):
+        return {}
+    compiler_options = data.get("compilerOptions", {})
+    if not isinstance(compiler_options, dict):
+        return {}
+    return compiler_options
+def _match_alias_pattern(alias_pattern: str, module_spec: str) -> str | None:
+    """Return wildcard capture for matching alias pattern or None if no match."""
+    if "*" not in alias_pattern:
+        return "" if alias_pattern == module_spec else None
+    if alias_pattern.count("*") != 1:
+        return None
+    prefix, suffix = alias_pattern.split("*")
+    if not module_spec.startswith(prefix):
+        return None
+    if suffix and not module_spec.endswith(suffix):
+        return None
+    captured = module_spec[len(prefix) : len(module_spec) - len(suffix) if suffix else None]
+    return captured
+def _expand_target_pattern(target_pattern: str, wildcard_capture: str) -> str | None:
+    """Expand target path pattern with wildcard capture."""
+    if "*" not in target_pattern:
+        return target_pattern if wildcard_capture == "" else None
+    if target_pattern.count("*") != 1:
+        return None
+    return target_pattern.replace("*", wildcard_capture)
+def _resolve_candidate_file(base_candidate: Path) -> Path | None:
+    """Resolve a candidate path to an existing .ts/.tsx file."""
+    if base_candidate.suffix in _ALLOWED_EXTENSIONS:
+        return base_candidate if base_candidate.is_file() else None
+    for suffix in (".ts", ".tsx"):
+        with_suffix = Path(f"{base_candidate.as_posix()}{suffix}")
+        if with_suffix.is_file():
+            return with_suffix
+    for index_name in ("index.ts", "index.tsx"):
+        index_file = base_candidate / index_name
+        if index_file.is_file():
+            return index_file
+    return None
+def resolve_tsconfig_alias_import(
+    repo_path: Path,
+    source_path: Path,
+    module_spec: str,
+) -> Path | None:
+    """Resolve a TS alias import to a repository-relative .ts/.tsx path.
+    Args:
+        repo_path: Repository root.
+        source_path: Repository-relative source file path (reserved for API parity).
+        module_spec: Import module specifier.
+    Returns:
+        Repository-relative target file path if resolved, otherwise None.
+    """
+    _ = source_path
+    if module_spec.startswith("./") or module_spec.startswith("../"):
+        return None
+    tsconfig_path = repo_path / "tsconfig.json"
+    if not tsconfig_path.is_file():
+        return None
+    compiler_options = _load_compiler_options(tsconfig_path)
+    base_url = compiler_options.get("baseUrl", ".")
+    paths = compiler_options.get("paths", {})
+    if not isinstance(base_url, str) or not isinstance(paths, dict):
+        return None
+    base_dir = tsconfig_path.parent / Path(base_url)
+    for alias_pattern, target_patterns in paths.items():
+        if not isinstance(alias_pattern, str) or not isinstance(target_patterns, list):
+            continue
+        wildcard_capture = _match_alias_pattern(alias_pattern, module_spec)
+        if wildcard_capture is None:
+            continue
+        for target_pattern in target_patterns:
+            if not isinstance(target_pattern, str):
+                continue
+            expanded = _expand_target_pattern(target_pattern, wildcard_capture)
+            if expanded is None:
+                continue
+            resolved = _resolve_candidate_file(base_dir / Path(expanded))
+            if resolved is None:
+                continue
+            try:
+                return resolved.relative_to(repo_path)
+            except ValueError:
+                continue
+    return None

drift/analyzers/typescript/barrel_resolver.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Resolve one-hop TypeScript barrel re-exports from index.ts files."""
+from __future__ import annotations
+import posixpath
+import re
+from dataclasses import dataclass
+from pathlib import Path
+_EXPORT_STAR_RE = re.compile(
+    r"^\s*export\s+\*\s+from\s+[\"']([^\"']+)[\"']\s*;?\s*$"
+)
+_EXPORT_NAMED_RE = re.compile(
+    r"^\s*export\s*\{([^}]*)\}\s*from\s+[\"']([^\"']+)[\"']\s*;?\s*$"
+)
+@dataclass(frozen=True)
+class BarrelExport:
+    """Represents a single barrel re-export statement."""
+    module_spec: str
+    exported_names: set[str] | None
+def _normalize_rel_path(path: Path) -> Path:
+    """Normalize a repository-relative path using POSIX semantics."""
+    return Path(posixpath.normpath(path.as_posix()))
+def _resolve_relative_target(repo_path: Path, source_path: Path, module_spec: str) -> Path | None:
+    """Resolve relative TS module specifier to repository-relative target file."""
+    if not (module_spec.startswith("./") or module_spec.startswith("../")):
+        return None
+    base_candidate = _normalize_rel_path(source_path.parent / module_spec)
+    if base_candidate.suffix in {".ts", ".tsx"}:
+        explicit = repo_path / base_candidate
+        return base_candidate if explicit.is_file() else None
+    for suffix in (".ts", ".tsx"):
+        candidate = _normalize_rel_path(Path(f"{base_candidate.as_posix()}{suffix}"))
+        if (repo_path / candidate).is_file():
+            return candidate
+    for index_name in ("index.ts", "index.tsx"):
+        index_candidate = _normalize_rel_path(base_candidate / index_name)
+        if (repo_path / index_candidate).is_file():
+            return index_candidate
+    return None
+def _parse_named_export_names(export_clause: str) -> set[str]:
+    """Parse exported names from ``export { ... } from`` clause."""
+    names: set[str] = set()
+    for part in export_clause.split(","):
+        token = part.strip()
+        if not token:
+            continue
+        if token.startswith("type "):
+            token = token[len("type ") :].strip()
+        if " as " in token:
+            _, exported_name = token.split(" as ", 1)
+            exported_name = exported_name.strip()
+            if exported_name:
+                names.add(exported_name)
+            continue
+        names.add(token)
+    return names
+def _extract_barrel_exports(index_text: str) -> list[BarrelExport]:
+    """Extract one-hop re-exports from an index.ts source file."""
+    exports: list[BarrelExport] = []
+    for line in index_text.splitlines():
+        star_match = _EXPORT_STAR_RE.match(line)
+        if star_match:
+            exports.append(BarrelExport(module_spec=star_match.group(1), exported_names=None))
+            continue
+        named_match = _EXPORT_NAMED_RE.match(line)
+        if named_match:
+            exports.append(
+                BarrelExport(
+                    module_spec=named_match.group(2),
+                    exported_names=_parse_named_export_names(named_match.group(1)),
+                )
+            )
+    return exports
+def resolve_index_barrel_target(
+    repo_path: Path,
+    index_path: Path,
+    imported_symbols: set[str] | None,
+) -> Path | None:
+    """Resolve imports targeting index.ts to a one-hop re-export source file.
+    Returns a repository-relative target file only when a single unambiguous
+    re-export target can be selected.
+    """
+    if index_path.name != "index.ts":
+        return None
+    index_text = (repo_path / index_path).read_text(encoding="utf-8", errors="replace")
+    candidates: set[Path] = set()
+    for barrel_export in _extract_barrel_exports(index_text):
+        if barrel_export.exported_names is not None:
+            if imported_symbols is None:
+                continue
+            if not (barrel_export.exported_names & imported_symbols):
+                continue
+        resolved = _resolve_relative_target(repo_path, index_path, barrel_export.module_spec)
+        if resolved is None:
+            continue
+        candidates.add(resolved)
+    if len(candidates) != 1:
+        return None
+    return next(iter(candidates))