PyPI - kontra - Versions diffs - 0.5.2__py3-none-any.whl - Mend

kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

kontra/__init__.py +1871 -0
kontra/api/__init__.py +22 -0
kontra/api/compare.py +340 -0
kontra/api/decorators.py +153 -0
kontra/api/results.py +2121 -0
kontra/api/rules.py +681 -0
kontra/cli/__init__.py +0 -0
kontra/cli/commands/__init__.py +1 -0
kontra/cli/commands/config.py +153 -0
kontra/cli/commands/diff.py +450 -0
kontra/cli/commands/history.py +196 -0
kontra/cli/commands/profile.py +289 -0
kontra/cli/commands/validate.py +468 -0
kontra/cli/constants.py +6 -0
kontra/cli/main.py +48 -0
kontra/cli/renderers.py +304 -0
kontra/cli/utils.py +28 -0
kontra/config/__init__.py +34 -0
kontra/config/loader.py +127 -0
kontra/config/models.py +49 -0
kontra/config/settings.py +797 -0
kontra/connectors/__init__.py +0 -0
kontra/connectors/db_utils.py +251 -0
kontra/connectors/detection.py +323 -0
kontra/connectors/handle.py +368 -0
kontra/connectors/postgres.py +127 -0
kontra/connectors/sqlserver.py +226 -0
kontra/engine/__init__.py +0 -0
kontra/engine/backends/duckdb_session.py +227 -0
kontra/engine/backends/duckdb_utils.py +18 -0
kontra/engine/backends/polars_backend.py +47 -0
kontra/engine/engine.py +1205 -0
kontra/engine/executors/__init__.py +15 -0
kontra/engine/executors/base.py +50 -0
kontra/engine/executors/database_base.py +528 -0
kontra/engine/executors/duckdb_sql.py +607 -0
kontra/engine/executors/postgres_sql.py +162 -0
kontra/engine/executors/registry.py +69 -0
kontra/engine/executors/sqlserver_sql.py +163 -0
kontra/engine/materializers/__init__.py +14 -0
kontra/engine/materializers/base.py +42 -0
kontra/engine/materializers/duckdb.py +110 -0
kontra/engine/materializers/factory.py +22 -0
kontra/engine/materializers/polars_connector.py +131 -0
kontra/engine/materializers/postgres.py +157 -0
kontra/engine/materializers/registry.py +138 -0
kontra/engine/materializers/sqlserver.py +160 -0
kontra/engine/result.py +15 -0
kontra/engine/sql_utils.py +611 -0
kontra/engine/sql_validator.py +609 -0
kontra/engine/stats.py +194 -0
kontra/engine/types.py +138 -0
kontra/errors.py +533 -0
kontra/logging.py +85 -0
kontra/preplan/__init__.py +5 -0
kontra/preplan/planner.py +253 -0
kontra/preplan/postgres.py +179 -0
kontra/preplan/sqlserver.py +191 -0
kontra/preplan/types.py +24 -0
kontra/probes/__init__.py +20 -0
kontra/probes/compare.py +400 -0
kontra/probes/relationship.py +283 -0
kontra/reporters/__init__.py +0 -0
kontra/reporters/json_reporter.py +190 -0
kontra/reporters/rich_reporter.py +11 -0
kontra/rules/__init__.py +35 -0
kontra/rules/base.py +186 -0
kontra/rules/builtin/__init__.py +40 -0
kontra/rules/builtin/allowed_values.py +156 -0
kontra/rules/builtin/compare.py +188 -0
kontra/rules/builtin/conditional_not_null.py +213 -0
kontra/rules/builtin/conditional_range.py +310 -0
kontra/rules/builtin/contains.py +138 -0
kontra/rules/builtin/custom_sql_check.py +182 -0
kontra/rules/builtin/disallowed_values.py +140 -0
kontra/rules/builtin/dtype.py +203 -0
kontra/rules/builtin/ends_with.py +129 -0
kontra/rules/builtin/freshness.py +240 -0
kontra/rules/builtin/length.py +193 -0
kontra/rules/builtin/max_rows.py +35 -0
kontra/rules/builtin/min_rows.py +46 -0
kontra/rules/builtin/not_null.py +121 -0
kontra/rules/builtin/range.py +222 -0
kontra/rules/builtin/regex.py +143 -0
kontra/rules/builtin/starts_with.py +129 -0
kontra/rules/builtin/unique.py +124 -0
kontra/rules/condition_parser.py +203 -0
kontra/rules/execution_plan.py +455 -0
kontra/rules/factory.py +103 -0
kontra/rules/predicates.py +25 -0
kontra/rules/registry.py +24 -0
kontra/rules/static_predicates.py +120 -0
kontra/scout/__init__.py +9 -0
kontra/scout/backends/__init__.py +17 -0
kontra/scout/backends/base.py +111 -0
kontra/scout/backends/duckdb_backend.py +359 -0
kontra/scout/backends/postgres_backend.py +519 -0
kontra/scout/backends/sqlserver_backend.py +577 -0
kontra/scout/dtype_mapping.py +150 -0
kontra/scout/patterns.py +69 -0
kontra/scout/profiler.py +801 -0
kontra/scout/reporters/__init__.py +39 -0
kontra/scout/reporters/json_reporter.py +165 -0
kontra/scout/reporters/markdown_reporter.py +152 -0
kontra/scout/reporters/rich_reporter.py +144 -0
kontra/scout/store.py +208 -0
kontra/scout/suggest.py +200 -0
kontra/scout/types.py +652 -0
kontra/state/__init__.py +29 -0
kontra/state/backends/__init__.py +79 -0
kontra/state/backends/base.py +348 -0
kontra/state/backends/local.py +480 -0
kontra/state/backends/postgres.py +1010 -0
kontra/state/backends/s3.py +543 -0
kontra/state/backends/sqlserver.py +969 -0
kontra/state/fingerprint.py +166 -0
kontra/state/types.py +1061 -0
kontra/version.py +1 -0
kontra-0.5.2.dist-info/METADATA +122 -0
kontra-0.5.2.dist-info/RECORD +124 -0
kontra-0.5.2.dist-info/WHEEL +5 -0
kontra-0.5.2.dist-info/entry_points.txt +2 -0
kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
kontra-0.5.2.dist-info/top_level.txt +1 -0

kontra/cli/renderers.py ADDED Viewed

@@ -0,0 +1,304 @@
+"""CLI output rendering functions."""
+from __future__ import annotations
+import typer
+def print_rich_stats(stats: dict | None) -> None:
+    """Pretty-print the optional stats block (concise, high-signal)."""
+    if not stats:
+        return
+    ds = stats.get("dataset", {}) or {}
+    run = stats.get("run_meta", {}) or {}
+    proj = stats.get("projection") or {}
+    # Prefer the human-friendly engine label if present
+    engine_label = run.get("engine") or run.get("engine_label")
+    nrows = ds.get("nrows")
+    ncols = ds.get("ncols")
+    dur = run.get("duration_ms_total")
+    if nrows is not None and ncols is not None and dur is not None:
+        base = f"\nStats  •  rows={nrows:,}  cols={ncols}  duration={dur} ms"
+        if engine_label:
+            base += f"  engine={engine_label}"
+        typer.secho(base, fg=typer.colors.BLUE)
+    elif nrows is not None and ncols is not None:
+        typer.secho(f"\nStats  •  rows={nrows:,}  cols={ncols}", fg=typer.colors.BLUE)
+    # Preplan / pushdown timing (if available)
+    preplan_ms = (run.get("preplan_breakdown_ms") or {}).get("analyze")
+    push_ms = run.get("pushdown_breakdown_ms") or {}
+    if preplan_ms is not None:
+        typer.secho(f"Preplan: analyze={preplan_ms} ms", fg=typer.colors.BLUE)
+    if push_ms:
+        parts = []
+        for k in ("compile", "execute", "introspect"):
+            v = push_ms.get(k)
+            if v is not None:
+                parts.append(f"{k}={v} ms")
+        if parts:
+            typer.secho("SQL pushdown: " + ", ".join(parts), fg=typer.colors.BLUE)
+    # If present, show RG pruning summary from preplan (engine may emit either key)
+    manifest = stats.get("pushdown_manifest") or {}
+    if manifest:
+        kept = manifest.get("row_groups_kept")
+        total = manifest.get("row_groups_total")
+        if kept is not None and total is not None:
+            typer.secho(
+                f"Preplan manifest: row-groups {kept}/{total} kept",
+                fg=typer.colors.BLUE,
+            )
+    # Explicit validated vs loaded columns (short previews)
+    validated = stats.get("columns_validated") or []
+    loaded = stats.get("columns_loaded") or []
+    if validated:
+        v_preview = ", ".join(validated[:6]) + ("…" if len(validated) > 6 else "")
+        typer.secho(
+            f"Columns validated ({len(validated)}): {v_preview}",
+            fg=typer.colors.BLUE,
+        )
+    if loaded:
+        l_preview = ", ".join(loaded[:6]) + ("…" if len(loaded) > 6 else "")
+        typer.secho(
+            f"Columns loaded ({len(loaded)}): {l_preview}",
+            fg=typer.colors.BLUE,
+        )
+    # Projection effectiveness (req/loaded/avail)
+    if proj:
+        enabled = proj.get("enabled", True)
+        required = proj.get("required_count", 0)
+        loaded_cnt = proj.get("loaded_count", 0)
+        available = proj.get("available_count")
+        effectiveness = "(pruned)" if proj.get("effective") else "(no reduction)"
+        if available is not None:
+            msg = (
+                f"Projection [{'on' if enabled else 'off'}]: "
+                f"{required}/{loaded_cnt}/{available} (req/loaded/avail) {effectiveness}"
+            )
+        else:
+            msg = (
+                f"Projection [{'on' if enabled else 'off'}]: "
+                f"{required}/{loaded_cnt} (req/loaded) {effectiveness}"
+            )
+        typer.secho(msg, fg=typer.colors.BLUE)
+    # Optional per-column profile (if requested)
+    prof = stats.get("profile")
+    if prof:
+        typer.secho("Profile:", fg=typer.colors.BLUE)
+        for col, s in prof.items():
+            parts = [
+                f"nulls={s.get('nulls', 0)}",
+                f"distinct={s.get('distinct', 0)}",
+            ]
+            if {"min", "max", "mean"} <= s.keys():
+                parts += [
+                    f"min={s['min']}",
+                    f"max={s['max']}",
+                    f"mean={round(s['mean'], 3)}",
+                ]
+            typer.echo(f"  - {col}: " + ", ".join(parts))
+def render_diff_rich(diff) -> str:
+    """Render validation diff in human-readable format."""
+    lines = []
+    # Header
+    before_ts = diff.before.run_at.strftime("%Y-%m-%d %H:%M")
+    after_ts = diff.after.run_at.strftime("%Y-%m-%d %H:%M")
+    lines.append(f"Diff: {diff.after.contract_name}")
+    lines.append(f"Comparing: {before_ts} → {after_ts}")
+    lines.append("=" * 50)
+    # Overall status
+    if diff.status_changed:
+        before_status = "PASSED" if diff.before.summary.passed else "FAILED"
+        after_status = "PASSED" if diff.after.summary.passed else "FAILED"
+        lines.append(f"\nOverall: {before_status} → {after_status}")
+    else:
+        status = "PASSED" if diff.after.summary.passed else "FAILED"
+        lines.append(f"\nOverall: {status} (unchanged)")
+    # Summary
+    lines.append(
+        f"\nRules: {diff.before.summary.passed_rules}/{diff.before.summary.total_rules} → "
+        f"{diff.after.summary.passed_rules}/{diff.after.summary.total_rules}"
+    )
+    # New failures - group by severity
+    if diff.new_failures:
+        # Separate by severity
+        blocking = [rd for rd in diff.new_failures if rd.severity == "blocking"]
+        warnings = [rd for rd in diff.new_failures if rd.severity == "warning"]
+        infos = [rd for rd in diff.new_failures if rd.severity == "info"]
+        if blocking:
+            lines.append(f"\n❌ New Blocking Failures ({len(blocking)})")
+            for rd in blocking:
+                count_info = (
+                    f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
+                )
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(f"  - {rd.rule_id}{count_info}{mode_info}")
+        if warnings:
+            lines.append(f"\n⚠️  New Warnings ({len(warnings)})")
+            for rd in warnings:
+                count_info = (
+                    f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
+                )
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(f"  - {rd.rule_id}{count_info}{mode_info}")
+        if infos:
+            lines.append(f"\nℹ️  New Info Issues ({len(infos)})")
+            for rd in infos:
+                count_info = (
+                    f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
+                )
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(f"  - {rd.rule_id}{count_info}{mode_info}")
+    # Regressions - group by severity
+    if diff.regressions:
+        blocking_reg = [rd for rd in diff.regressions if rd.severity == "blocking"]
+        warning_reg = [rd for rd in diff.regressions if rd.severity == "warning"]
+        info_reg = [rd for rd in diff.regressions if rd.severity == "info"]
+        if blocking_reg:
+            lines.append(f"\n❌ Blocking Regressions ({len(blocking_reg)})")
+            for rd in blocking_reg:
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(
+                    f"  - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
+                )
+        if warning_reg:
+            lines.append(f"\n⚠️  Warning Regressions ({len(warning_reg)})")
+            for rd in warning_reg:
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(
+                    f"  - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
+                )
+        if info_reg:
+            lines.append(f"\nℹ️  Info Regressions ({len(info_reg)})")
+            for rd in info_reg:
+                mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
+                lines.append(
+                    f"  - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
+                )
+    # Resolved
+    if diff.resolved:
+        lines.append(f"\n✅ Resolved ({len(diff.resolved)})")
+        for rd in diff.resolved:
+            lines.append(f"  - {rd.rule_id}")
+    # Improvements
+    if diff.improvements:
+        lines.append(f"\n📈 Improvements ({len(diff.improvements)})")
+        for rd in diff.improvements:
+            lines.append(
+                f"  - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} ({rd.delta:,})"
+            )
+    # No changes
+    if (
+        not diff.new_failures
+        and not diff.regressions
+        and not diff.resolved
+        and not diff.improvements
+    ):
+        lines.append("\n✓ No changes detected")
+    return "\n".join(lines)
+def render_profile_diff_rich(diff) -> str:
+    """Render profile diff in human-readable format."""
+    lines = []
+    # Header
+    lines.append(f"Profile Diff: {diff.after.source_uri}")
+    lines.append(
+        f"Comparing: {diff.before.profiled_at[:16]} → {diff.after.profiled_at[:16]}"
+    )
+    lines.append("=" * 50)
+    # Row count
+    if diff.row_count_delta != 0:
+        sign = "+" if diff.row_count_delta > 0 else ""
+        lines.append(
+            f"\nRows: {diff.row_count_before:,} → {diff.row_count_after:,} "
+            f"({sign}{diff.row_count_delta:,}, {diff.row_count_pct_change:+.1f}%)"
+        )
+    else:
+        lines.append(f"\nRows: {diff.row_count_after:,} (unchanged)")
+    # Column count
+    if diff.column_count_before != diff.column_count_after:
+        lines.append(
+            f"Columns: {diff.column_count_before} → {diff.column_count_after}"
+        )
+    # Schema changes
+    if diff.columns_added:
+        lines.append(f"\n➕ Columns Added ({len(diff.columns_added)})")
+        for col in diff.columns_added[:10]:
+            lines.append(f"  - {col}")
+        if len(diff.columns_added) > 10:
+            lines.append(f"  ... and {len(diff.columns_added) - 10} more")
+    if diff.columns_removed:
+        lines.append(f"\n➖ Columns Removed ({len(diff.columns_removed)})")
+        for col in diff.columns_removed[:10]:
+            lines.append(f"  - {col}")
+    # Type changes
+    if diff.dtype_changes:
+        lines.append(f"\n🔄 Type Changes ({len(diff.dtype_changes)})")
+        for cd in diff.dtype_changes[:10]:
+            lines.append(f"  - {cd.column_name}: {cd.dtype_before} → {cd.dtype_after}")
+    # Null rate increases (potential data quality issues)
+    if diff.null_rate_increases:
+        lines.append(f"\n⚠️  Null Rate Increases ({len(diff.null_rate_increases)})")
+        for cd in diff.null_rate_increases[:10]:
+            lines.append(
+                f"  - {cd.column_name}: {cd.null_rate_before:.1%} → {cd.null_rate_after:.1%}"
+            )
+    # Null rate decreases (improvements)
+    if diff.null_rate_decreases:
+        lines.append(f"\n✅ Null Rate Decreases ({len(diff.null_rate_decreases)})")
+        for cd in diff.null_rate_decreases[:10]:
+            lines.append(
+                f"  - {cd.column_name}: {cd.null_rate_before:.1%} → {cd.null_rate_after:.1%}"
+            )
+    # Cardinality changes
+    if diff.cardinality_changes:
+        lines.append(f"\n📊 Cardinality Changes ({len(diff.cardinality_changes)})")
+        for cd in diff.cardinality_changes[:10]:
+            sign = "+" if cd.distinct_count_delta > 0 else ""
+            lines.append(
+                f"  - {cd.column_name}: {cd.distinct_count_before:,} → "
+                f"{cd.distinct_count_after:,} ({sign}{cd.distinct_count_delta:,})"
+            )
+    if not diff.has_changes:
+        lines.append("\n✓ No significant changes detected")
+    return "\n".join(lines)

kontra/cli/utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""CLI utility functions."""
+from __future__ import annotations
+import re
+def parse_duration(duration_str: str) -> int:
+    """
+    Parse a duration string like '7d', '24h', '30m' into seconds.
+    Supported formats:
+    - Xd: X days
+    - Xh: X hours
+    - Xm: X minutes
+    - Xs: X seconds
+    """
+    match = re.match(r"^(\d+)([dhms])$", duration_str.lower())
+    if not match:
+        raise ValueError(
+            f"Invalid duration format: {duration_str}. Use '7d', '24h', '30m', or '60s'."
+        )
+    value = int(match.group(1))
+    unit = match.group(2)
+    multipliers = {"d": 86400, "h": 3600, "m": 60, "s": 1}
+    return value * multipliers[unit]

kontra/config/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+# src/kontra/config/__init__.py
+"""
+Kontra configuration module - Contract and settings handling.
+Public API:
+    - Contract, RuleSpec: Data models for contracts
+    - ContractLoader: Loads contracts from files or S3
+    - KontraConfig, EffectiveConfig: Configuration models
+    - load_config: Load project configuration
+"""
+from kontra.config.models import Contract, RuleSpec
+from kontra.config.loader import ContractLoader
+from kontra.config.settings import (
+    KontraConfig,
+    EffectiveConfig,
+    load_config_file,
+    resolve_effective_config,
+    find_config_file,
+)
+__all__ = [
+    # Contract models
+    "Contract",
+    "RuleSpec",
+    # Loader
+    "ContractLoader",
+    # Config
+    "KontraConfig",
+    "EffectiveConfig",
+    "load_config_file",
+    "find_config_file",
+    "resolve_effective_config",
+]

kontra/config/loader.py ADDED Viewed

@@ -0,0 +1,127 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Dict, List, Union
+import os
+import yaml
+from kontra.config.models import Contract, RuleSpec
+class ContractLoader:
+    """Static helpers to load a Contract from different sources."""
+    @staticmethod
+    def from_uri(uri: Union[str, Path]) -> Contract:
+        uri_str = str(uri)
+        if uri_str.lower().startswith("s3://"):
+            return ContractLoader.from_s3(uri_str)
+        return ContractLoader.from_path(uri_str)
+    @staticmethod
+    def from_path(path: Union[str, Path]) -> Contract:
+        p = Path(path)
+        if not p.exists():
+            raise FileNotFoundError(f"Contract file not found: {p}")
+        with p.open("r") as f:
+            raw = yaml.safe_load(f)
+        return ContractLoader._parse_and_validate(raw, source=str(p))
+    # ---------- NEW/UPDATED S3 LOADER ----------
+    @staticmethod
+    def _s3_storage_options() -> Dict[str, Any]:
+        """
+        Build fsspec/s3fs storage_options from env. Works with AWS S3 and MinIO.
+        """
+        opts: Dict[str, Any] = {"anon": False}
+        key = os.getenv("AWS_ACCESS_KEY_ID")
+        secret = os.getenv("AWS_SECRET_ACCESS_KEY")
+        if key and secret:
+            opts["key"] = key
+            opts["secret"] = secret
+        endpoint = os.getenv("AWS_ENDPOINT_URL")
+        if endpoint:
+            # MinIO/custom endpoints
+            opts["client_kwargs"] = {"endpoint_url": endpoint}
+            # Path-style is typical for MinIO
+            opts["config_kwargs"] = {"s3": {"addressing_style": "path"}}
+            # Use SSL only if endpoint is https
+            opts["use_ssl"] = endpoint.startswith("https")
+        region = os.getenv("AWS_REGION")
+        if region:
+            opts.setdefault("client_kwargs", {})
+            opts["client_kwargs"].setdefault("region_name", region)
+        return opts
+    @staticmethod
+    def from_s3(uri: str) -> Contract:
+        """
+        Load contract YAML from S3/MinIO using s3fs via fsspec with storage_options.
+        Requires: pip install s3fs
+        """
+        try:
+            import fsspec  # s3fs discovered by fsspec
+        except ImportError as e:
+            raise RuntimeError(
+                "Reading contracts from S3 requires 's3fs'. Install with: pip install s3fs"
+            ) from e
+        storage_options = ContractLoader._s3_storage_options()
+        try:
+            fs = fsspec.filesystem("s3", **storage_options)
+            with fs.open(uri, mode="r") as f:
+                raw = yaml.safe_load(f)
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Contract file not found on S3: {uri}")
+        except PermissionError as e:
+            raise RuntimeError(f"Failed to read contract from S3 '{uri}': Permission denied") from e
+        except Exception as e:
+            raise RuntimeError(f"Failed to read contract from S3 '{uri}': {e}") from e
+        return ContractLoader._parse_and_validate(raw, source=uri)
+    # ----------------- unchanged -----------------
+    @staticmethod
+    def _parse_and_validate(raw: Any, source: str) -> Contract:
+        if not isinstance(raw, dict):
+            raise ValueError(
+                f"Invalid or empty contract YAML at {source}. "
+                "Expected a mapping with keys like 'datasource' and 'rules'."
+            )
+        # datasource is optional - defaults to "inline" when data is passed directly
+        rules_raw = raw.get("rules", []) or []
+        if not isinstance(rules_raw, list):
+            raise ValueError("Contract 'rules' must be a list.")
+        rules: List[RuleSpec] = []
+        for i, r in enumerate(rules_raw):
+            if not isinstance(r, dict):
+                raise ValueError(f"Rule at index {i} is not a mapping.")
+            if "name" not in r:
+                raise ValueError(f"Rule at index {i} missing required key: 'name'.")
+            params = r.get("params", {}) or {}
+            if not isinstance(params, dict):
+                raise ValueError(f"Rule at index {i} has non-dict 'params'.")
+            context = r.get("context", {}) or {}
+            if not isinstance(context, dict):
+                raise ValueError(f"Rule at index {i} has non-dict 'context'.")
+            rules.append(RuleSpec(
+                name=r["name"],
+                id=r.get("id"),
+                params=params,
+                severity=r.get("severity", "blocking"),
+                context=context,
+            ))
+        # Use 'datasource' if present, otherwise fall back to 'dataset' for backwards compat
+        # If neither is present, default to "inline" (handled by Contract model)
+        datasource_value = raw.get("datasource") or raw.get("dataset") or "inline"
+        return Contract(
+            name=raw.get("name"),
+            datasource=str(datasource_value),
+            rules=rules,
+        )

kontra/config/models.py ADDED Viewed

@@ -0,0 +1,49 @@
+# src/kontra/config/models.py
+from pydantic import BaseModel, Field, model_validator
+from typing import Dict, Any, List, Literal, Optional
+class RuleSpec(BaseModel):
+    """
+    Declarative specification for a rule from contract.yml
+    The `context` field is for consumer-defined metadata that Kontra stores
+    but does not use for validation. Consumers/agents can read context for
+    routing, explanations, fix hints, etc.
+    """
+    name: str = Field(..., description="The rule name (e.g., not_null, unique).")
+    id: Optional[str] = Field(default=None, description="Explicit rule ID (optional, auto-generated if not provided).")
+    params: Dict[str, Any] = Field(default_factory=dict, description="Parameters passed to the rule.")
+    severity: Literal["blocking", "warning", "info"] = Field(
+        default="blocking",
+        description="Rule severity: blocking (fails pipeline), warning (warns but continues), info (logs only)."
+    )
+    context: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Consumer-defined context (owner, tags, fix_hint, etc.). Stored but not used by Kontra."
+    )
+class Contract(BaseModel):
+    """
+    Data contract specification.
+    The `datasource` field can be:
+    - A named datasource from config: "prod_db.users"
+    - A file path: "./data/users.parquet"
+    - A URI: "s3://bucket/users.parquet", "postgres:///public.users"
+    - Omitted when data is passed directly to validate()
+    """
+    name: Optional[str] = Field(default=None, description="Contract name (optional, used for identification).")
+    datasource: str = Field(default="inline", description="Data source: named datasource, path, or URI. Defaults to 'inline' when data is passed directly.")
+    rules: List[RuleSpec] = Field(default_factory=list)
+    # Backwards compatibility: accept 'dataset' as alias for 'datasource'
+    @model_validator(mode="before")
+    @classmethod
+    def handle_dataset_alias(cls, data: Any) -> Any:
+        """Accept 'dataset' as deprecated alias for 'datasource'."""
+        if isinstance(data, dict):
+            if "dataset" in data and "datasource" not in data:
+                data["datasource"] = data.pop("dataset")
+        return data