PyPI - kdrift - Versions diffs - 0.1.0__py3-none-any.whl - Mend

kdrift 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

kdrift/__init__.py +5 -0
kdrift/__main__.py +5 -0
kdrift/cli.py +188 -0
kdrift/config.py +106 -0
kdrift/diff.py +232 -0
kdrift/discover.py +317 -0
kdrift/git.py +184 -0
kdrift/logging.py +82 -0
kdrift/lsp_server.py +576 -0
kdrift/mcp_server.py +165 -0
kdrift/models.py +145 -0
kdrift/pipeline.py +223 -0
kdrift/py.typed +0 -0
kdrift/render.py +182 -0
kdrift/watch.py +163 -0
kdrift-0.1.0.dist-info/METADATA +13 -0
kdrift-0.1.0.dist-info/RECORD +19 -0
kdrift-0.1.0.dist-info/WHEEL +4 -0
kdrift-0.1.0.dist-info/entry_points.txt +2 -0

kdrift/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Kustomize manifest drift detection tool."""
+import yaml
+safe_loader: type = getattr(yaml, "CSafeLoader", yaml.SafeLoader)

kdrift/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Allow running as `python -m kdrift`."""
+from kdrift.cli import main
+main()

kdrift/cli.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""CLI entrypoint."""
+from __future__ import annotations
+import json
+import sys
+from pathlib import Path
+import click
+import structlog
+from kdrift import config, git, models, pipeline
+from kdrift import logging as kdrift_logging
+from kdrift import watch as kdrift_watch
+log: structlog.stdlib.BoundLogger = structlog.get_logger()
+@click.group()
+@click.option("--log-level", default="WARNING", help="Log level (DEBUG, INFO, WARNING, ERROR).")
+@click.pass_context
+def main(ctx: click.Context, log_level: str) -> None:
+    """Kustomize manifest drift detection tool."""
+    cfg = config.AppConfig()
+    kdrift_logging.configure_logging(log_level=log_level, log_format=cfg.log_format)
+    ctx.ensure_object(dict)
+    ctx.obj["config"] = cfg
+    ctx.obj["log_level"] = log_level
+def _parse_ref_range(ref: str) -> tuple[str, str | None]:
+    """Parse a ref or ref range (A..B) into (base_ref, target_ref)."""
+    if ".." in ref:
+        parts = ref.split("..", 1)
+        if not parts[0] or not parts[1]:
+            msg = f"Invalid ref range '{ref}': both sides of '..' are required"
+            raise click.BadParameter(msg, param_hint="'--ref'")
+        return parts[0], parts[1]
+    return ref, None
+@main.command()
+@click.argument("paths", nargs=-1, type=click.Path(exists=False))
+@click.option("--repo", "-C", "repo_path", type=click.Path(exists=True), default=None, help="Repository root.")
+@click.option("--ref", default="HEAD", help="Git ref for baseline, or A..B for two-ref comparison.")
+@click.option("--overlay", type=click.Path(), default=None, help="Diff only this overlay.")
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["unified", "json"]),
+    default="unified",
+    help="Output format.",
+)
+@click.option("--watch", "watch_mode", is_flag=True, help="Watch for changes and re-diff continuously.")
+@click.option("--check", is_flag=True, help="Exit non-zero if any overlay has drift (CI/pre-commit mode).")
+@click.pass_context
+def diff(
+    ctx: click.Context,
+    paths: tuple[str, ...],
+    repo_path: str | None,
+    ref: str,
+    overlay: str | None,
+    output_format: str,
+    watch_mode: bool,
+    check: bool,
+) -> None:
+    """Diff kustomize overlays against a baseline ref."""
+    start = Path(repo_path) if repo_path else (Path(paths[0]).resolve() if paths else None)
+    try:
+        repo_root = git.find_repo_root(start)
+    except git.GitError as e:
+        click.echo(f"Error: {e}", err=True)
+        sys.exit(1)
+    if not git.has_commits(repo_root):
+        click.echo("Error: repository has no commits yet", err=True)
+        sys.exit(1)
+    base_ref, target_ref = _parse_ref_range(ref)
+    proj_config = config.load_project_config(repo_root)
+    path_list = [Path(p) for p in paths] if paths else None
+    if watch_mode:
+        if target_ref is not None:
+            click.echo("Error: --watch is not supported with ref ranges (A..B)", err=True)
+            sys.exit(1)
+        kdrift_watch.watch(
+            repo_root=repo_root,
+            ref=base_ref,
+            paths=path_list,
+            output_format=output_format,
+            kustomize_args=proj_config.kustomize_args,
+        )
+        return
+    overlay_path = Path(overlay) if overlay else None
+    try:
+        result = pipeline.run_diff(
+            repo_root=repo_root,
+            ref=base_ref,
+            paths=path_list,
+            overlay_filter=overlay_path,
+            kustomize_args=proj_config.kustomize_args,
+            target_ref=target_ref,
+        )
+    except Exception as e:
+        click.echo(f"Error: {e}", err=True)
+        sys.exit(1)
+    if output_format == "json":
+        _print_json(result)
+    else:
+        _print_unified(result)
+    if result.has_errors:
+        sys.exit(1)
+    if check and result.has_changes:
+        sys.exit(1)
+    if not result.has_changes:
+        sys.exit(0)
+@main.command()
+@click.option("--debug", is_flag=True, help="Enable debug logging and file log (~/.cache/kdrift/kdrift.log).")
+@click.pass_context
+def mcp(ctx: click.Context, debug: bool) -> None:
+    """Start the MCP server for AI agent integration."""
+    log_level = "DEBUG" if debug else ctx.obj["log_level"]
+    kdrift_logging.configure_logging(log_level=log_level, stream="stderr", log_file=debug)
+    from kdrift import mcp_server
+    mcp_server.run_mcp_server()
+@main.command()
+@click.option("--debug", is_flag=True, help="Enable debug logging and file log (~/.cache/kdrift/kdrift.log).")
+@click.pass_context
+def lsp(ctx: click.Context, debug: bool) -> None:
+    """Start the LSP server for IDE integration."""
+    log_level = "DEBUG" if debug else ctx.obj["log_level"]
+    kdrift_logging.configure_logging(log_level=log_level, stream="stderr", log_file=debug)
+    from kdrift import lsp_server
+    lsp_server.run_lsp_server()
+def _print_json(result: models.DiffResult) -> None:
+    """Print structured JSON output."""
+    output = json.loads(result.model_dump_json())
+    click.echo(json.dumps(output, indent=2))
+def _print_unified(result: models.DiffResult) -> None:
+    """Print unified diff output."""
+    if not result.has_changes and not result.has_errors:
+        return
+    for overlay_result in result.overlays:
+        if overlay_result.has_error:
+            click.echo(f"ERROR [{overlay_result.path}]: {overlay_result.error}", err=True)
+            continue
+        if not overlay_result.has_changes:
+            continue
+        click.echo(f"=== {overlay_result.path} ===")
+        for change in overlay_result.changes:
+            status_marker = {
+                models.DiffStatus.ADDED: "[NEW]",
+                models.DiffStatus.REMOVED: "[DEL]",
+                models.DiffStatus.MODIFIED: "",
+            }[change.status]
+            rid = change.resource_id
+            header = f"{rid.gvk} {rid.namespace}/{rid.name}" if rid.namespace else f"{rid.gvk} {rid.name}"
+            if status_marker:
+                header = f"{status_marker} {header}"
+            click.echo(f"\n--- {header} ---")
+            if change.diff_text:
+                click.echo(change.diff_text)
+        click.echo()
+    for error in result.errors:
+        click.echo(f"ERROR: {error}", err=True)

kdrift/config.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Configuration hierarchy: .kdrift.yaml (project > org > user)."""
+from __future__ import annotations
+import os
+from pathlib import Path
+import pydantic
+import pydantic_settings
+import yaml
+from kdrift import safe_loader
+class AppConfig(pydantic_settings.BaseSettings):
+    """Application configuration loaded from environment variables."""
+    model_config = pydantic_settings.SettingsConfigDict(
+        env_prefix="KDRIFT_",
+        frozen=True,
+    )
+    log_level: str = "INFO"
+    log_format: str = "json"
+    external_diff: str | None = None
+class ProjectConfig(pydantic.BaseModel):
+    """Configuration from .kdrift.yaml files."""
+    model_config = pydantic.ConfigDict(frozen=True)
+    kustomize_args: list[str] = pydantic.Field(
+        default_factory=lambda: [
+            "--enable-helm",
+            "--load-restrictor",
+            "LoadRestrictionsNone",
+        ]
+    )
+    kustomize_binary: str | None = None
+def load_project_config(start_dir: Path | None = None) -> ProjectConfig:
+    """Load project config by walking upward from start_dir.
+    Searches for .kdrift.yaml files from start_dir up to filesystem root,
+    then checks the user-level XDG config. More specific files override
+    less specific ones (per key).
+    """
+    configs: list[dict[str, object]] = []
+    user_config = _user_config_path()
+    if user_config.is_file():
+        data = _load_yaml(user_config)
+        if data:
+            configs.append(data)
+    start = start_dir or Path.cwd()
+    path_configs = _walk_up_configs(start)
+    configs.extend(reversed(path_configs))
+    if not configs:
+        return ProjectConfig()
+    merged: dict[str, object] = {}
+    for cfg in configs:
+        merged.update(cfg)
+    return ProjectConfig.model_validate(merged)
+def _walk_up_configs(start: Path) -> list[dict[str, object]]:
+    """Walk upward from start collecting .kdrift.yaml files (most specific first)."""
+    configs: list[dict[str, object]] = []
+    current = start.resolve()
+    while True:
+        cfg_file = current / ".kdrift.yaml"
+        if cfg_file.is_file():
+            data = _load_yaml(cfg_file)
+            if data:
+                configs.append(data)
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return configs
+def _user_config_path() -> Path:
+    """Get the user-level config path (XDG_CONFIG_HOME/kdrift/config.yaml)."""
+    xdg = os.environ.get("XDG_CONFIG_HOME", str(Path.home() / ".config"))
+    return Path(xdg) / "kdrift" / "config.yaml"
+def _load_yaml(path: Path) -> dict[str, object] | None:
+    """Load a YAML file, returning None on error."""
+    try:
+        with path.open() as f:
+            data = yaml.load(f, Loader=safe_loader)
+        if isinstance(data, dict):
+            return data
+    except (yaml.YAMLError, OSError):
+        pass
+    return None

kdrift/diff.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Per-resource structured diffs with two-phase matching.
+Phase 1: exact match by GVK + namespace + name.
+Phase 2: generator-aware matching for configMapGenerator/secretGenerator
+hash-suffixed names, using longest-name-first ordering to prevent false
+matches (e.g., dex-config-abc12 matching generator `dex` instead of
+`dex-config`).
+"""
+from __future__ import annotations
+import difflib
+import re
+from pathlib import Path
+import yaml
+from kdrift import models, safe_loader
+_KUSTOMIZE_HASH_CHARS = "bcdfghjklmnpqrstvwxz2456789"
+_HASH_SUFFIX_RE = re.compile(rf"^(.+)-[{_KUSTOMIZE_HASH_CHARS}]{{5,10}}$")
+def diff_rendered(
+    baseline: str,
+    candidate: str,
+    overlay_path: Path,
+) -> models.OverlayResult:
+    """Diff two rendered YAML strings and produce per-resource changes.
+    Args:
+        baseline: Rendered YAML from the baseline ref.
+        candidate: Rendered YAML from the working tree.
+        overlay_path: Path to the overlay (for identification).
+    Returns:
+        OverlayResult with per-resource changes.
+    """
+    baseline_resources = _parse_resources(baseline)
+    candidate_resources = _parse_resources(candidate)
+    changes = _match_and_diff(baseline_resources, candidate_resources)
+    return models.OverlayResult(path=overlay_path, changes=changes)
+def _parse_resources(rendered: str) -> dict[models.ResourceId, str]:
+    """Split rendered YAML into individual resources keyed by identity."""
+    resources: dict[models.ResourceId, str] = {}
+    if not rendered.strip():
+        return resources
+    docs = rendered.split("\n---")
+    for raw_doc in docs:
+        doc = raw_doc.strip()
+        if not doc or doc == "---":
+            continue
+        try:
+            parsed = yaml.load(doc, Loader=safe_loader)
+        except yaml.YAMLError:
+            continue
+        if not isinstance(parsed, dict) or "kind" not in parsed:
+            continue
+        resource_id = models.ResourceId.from_manifest(parsed)
+        resources[resource_id] = doc
+    return resources
+def _match_and_diff(
+    baseline: dict[models.ResourceId, str],
+    candidate: dict[models.ResourceId, str],
+) -> list[models.ResourceChange]:
+    """Two-phase resource matching and diffing."""
+    changes: list[models.ResourceChange] = []
+    matched_baseline: set[models.ResourceId] = set()
+    matched_candidate: set[models.ResourceId] = set()
+    for rid, candidate_yaml in candidate.items():
+        if rid in baseline:
+            matched_baseline.add(rid)
+            matched_candidate.add(rid)
+            diff_text = _unified_diff(baseline[rid], candidate_yaml, rid)
+            if diff_text:
+                added, removed = _count_diff_lines(diff_text)
+                changes.append(
+                    models.ResourceChange(
+                        resource_id=rid,
+                        status=models.DiffStatus.MODIFIED,
+                        diff_text=diff_text,
+                        lines_added=added,
+                        lines_removed=removed,
+                    )
+                )
+    unmatched_baseline = {rid: y for rid, y in baseline.items() if rid not in matched_baseline}
+    unmatched_candidate = {rid: y for rid, y in candidate.items() if rid not in matched_candidate}
+    if unmatched_baseline and unmatched_candidate:
+        gen_matches = _generator_aware_match(unmatched_baseline, unmatched_candidate)
+        for b_rid, c_rid in gen_matches:
+            matched_baseline.add(b_rid)
+            matched_candidate.add(c_rid)
+            diff_text = _unified_diff(baseline[b_rid], candidate[c_rid], c_rid)
+            if diff_text:
+                added, removed = _count_diff_lines(diff_text)
+                changes.append(
+                    models.ResourceChange(
+                        resource_id=c_rid,
+                        status=models.DiffStatus.MODIFIED,
+                        diff_text=diff_text,
+                        lines_added=added,
+                        lines_removed=removed,
+                    )
+                )
+    for rid in sorted(unmatched_candidate.keys() - matched_candidate, key=lambda r: r.name):
+        changes.append(
+            models.ResourceChange(
+                resource_id=rid,
+                status=models.DiffStatus.ADDED,
+                diff_text=_added_diff(candidate[rid], rid),
+                lines_added=len(candidate[rid].splitlines()),
+            )
+        )
+    for rid in sorted(unmatched_baseline.keys() - matched_baseline, key=lambda r: r.name):
+        changes.append(
+            models.ResourceChange(
+                resource_id=rid,
+                status=models.DiffStatus.REMOVED,
+                diff_text=_removed_diff(baseline[rid], rid),
+                lines_removed=len(baseline[rid].splitlines()),
+            )
+        )
+    return changes
+def _generator_aware_match(
+    baseline: dict[models.ResourceId, str],
+    candidate: dict[models.ResourceId, str],
+) -> list[tuple[models.ResourceId, models.ResourceId]]:
+    """Phase 2: match resources with hash-suffixed names.
+    Sorts by name length (longest first) to prevent short generator
+    names from stealing matches. E.g., generator `dex-config` should
+    match `dex-config-abc12` before generator `dex` gets a chance.
+    """
+    matches: list[tuple[models.ResourceId, models.ResourceId]] = []
+    used_candidate: set[models.ResourceId] = set()
+    baseline_sorted = sorted(baseline.keys(), key=lambda r: len(r.name), reverse=True)
+    for b_rid in baseline_sorted:
+        b_base = _strip_hash_suffix(b_rid.name)
+        for c_rid in candidate:
+            if c_rid in used_candidate:
+                continue
+            if c_rid.group != b_rid.group or c_rid.version != b_rid.version:
+                continue
+            if c_rid.kind != b_rid.kind or c_rid.namespace != b_rid.namespace:
+                continue
+            c_base = _strip_hash_suffix(c_rid.name)
+            if b_base == c_base:
+                matches.append((b_rid, c_rid))
+                used_candidate.add(c_rid)
+                break
+    return matches
+def _strip_hash_suffix(name: str) -> str:
+    """Strip a kustomize hash suffix from a resource name."""
+    match = _HASH_SUFFIX_RE.match(name)
+    return match.group(1) if match else name
+def _unified_diff(baseline_yaml: str, candidate_yaml: str, rid: models.ResourceId) -> str:
+    """Produce a unified diff between two YAML strings."""
+    if baseline_yaml and not baseline_yaml.endswith("\n"):
+        baseline_yaml += "\n"
+    if candidate_yaml and not candidate_yaml.endswith("\n"):
+        candidate_yaml += "\n"
+    baseline_lines = baseline_yaml.splitlines(keepends=True)
+    candidate_lines = candidate_yaml.splitlines(keepends=True)
+    diff = difflib.unified_diff(
+        baseline_lines,
+        candidate_lines,
+        fromfile=f"baseline/{rid.gvk}/{rid.namespace}/{rid.name}",
+        tofile=f"candidate/{rid.gvk}/{rid.namespace}/{rid.name}",
+    )
+    return "".join(diff)
+def _added_diff(yaml_text: str, rid: models.ResourceId) -> str:
+    """Produce a diff showing a newly added resource."""
+    lines = yaml_text.splitlines(keepends=True)
+    diff = difflib.unified_diff(
+        [],
+        lines,
+        fromfile="/dev/null",
+        tofile=f"candidate/{rid.gvk}/{rid.namespace}/{rid.name}",
+    )
+    return "".join(diff)
+def _removed_diff(yaml_text: str, rid: models.ResourceId) -> str:
+    """Produce a diff showing a removed resource."""
+    lines = yaml_text.splitlines(keepends=True)
+    diff = difflib.unified_diff(
+        lines,
+        [],
+        fromfile=f"baseline/{rid.gvk}/{rid.namespace}/{rid.name}",
+        tofile="/dev/null",
+    )
+    return "".join(diff)
+def _count_diff_lines(diff_text: str) -> tuple[int, int]:
+    """Count added and removed lines in a unified diff."""
+    added = 0
+    removed = 0
+    for line in diff_text.splitlines():
+        if line.startswith("+") and not line.startswith("+++"):
+            added += 1
+        elif line.startswith("-") and not line.startswith("---"):
+            removed += 1
+    return added, removed