PyPI - odibi - Versions diffs - 2.5.0__py3-none-any.whl - Mend

odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

odibi/__init__.py +32 -0
odibi/__main__.py +8 -0
odibi/catalog.py +3011 -0
odibi/cli/__init__.py +11 -0
odibi/cli/__main__.py +6 -0
odibi/cli/catalog.py +553 -0
odibi/cli/deploy.py +69 -0
odibi/cli/doctor.py +161 -0
odibi/cli/export.py +66 -0
odibi/cli/graph.py +150 -0
odibi/cli/init_pipeline.py +242 -0
odibi/cli/lineage.py +259 -0
odibi/cli/main.py +215 -0
odibi/cli/run.py +98 -0
odibi/cli/schema.py +208 -0
odibi/cli/secrets.py +232 -0
odibi/cli/story.py +379 -0
odibi/cli/system.py +132 -0
odibi/cli/test.py +286 -0
odibi/cli/ui.py +31 -0
odibi/cli/validate.py +39 -0
odibi/config.py +3541 -0
odibi/connections/__init__.py +9 -0
odibi/connections/azure_adls.py +499 -0
odibi/connections/azure_sql.py +709 -0
odibi/connections/base.py +28 -0
odibi/connections/factory.py +322 -0
odibi/connections/http.py +78 -0
odibi/connections/local.py +119 -0
odibi/connections/local_dbfs.py +61 -0
odibi/constants.py +17 -0
odibi/context.py +528 -0
odibi/diagnostics/__init__.py +12 -0
odibi/diagnostics/delta.py +520 -0
odibi/diagnostics/diff.py +169 -0
odibi/diagnostics/manager.py +171 -0
odibi/engine/__init__.py +20 -0
odibi/engine/base.py +334 -0
odibi/engine/pandas_engine.py +2178 -0
odibi/engine/polars_engine.py +1114 -0
odibi/engine/registry.py +54 -0
odibi/engine/spark_engine.py +2362 -0
odibi/enums.py +7 -0
odibi/exceptions.py +297 -0
odibi/graph.py +426 -0
odibi/introspect.py +1214 -0
odibi/lineage.py +511 -0
odibi/node.py +3341 -0
odibi/orchestration/__init__.py +0 -0
odibi/orchestration/airflow.py +90 -0
odibi/orchestration/dagster.py +77 -0
odibi/patterns/__init__.py +24 -0
odibi/patterns/aggregation.py +599 -0
odibi/patterns/base.py +94 -0
odibi/patterns/date_dimension.py +423 -0
odibi/patterns/dimension.py +696 -0
odibi/patterns/fact.py +748 -0
odibi/patterns/merge.py +128 -0
odibi/patterns/scd2.py +148 -0
odibi/pipeline.py +2382 -0
odibi/plugins.py +80 -0
odibi/project.py +581 -0
odibi/references.py +151 -0
odibi/registry.py +246 -0
odibi/semantics/__init__.py +71 -0
odibi/semantics/materialize.py +392 -0
odibi/semantics/metrics.py +361 -0
odibi/semantics/query.py +743 -0
odibi/semantics/runner.py +430 -0
odibi/semantics/story.py +507 -0
odibi/semantics/views.py +432 -0
odibi/state/__init__.py +1203 -0
odibi/story/__init__.py +55 -0
odibi/story/doc_story.py +554 -0
odibi/story/generator.py +1431 -0
odibi/story/lineage.py +1043 -0
odibi/story/lineage_utils.py +324 -0
odibi/story/metadata.py +608 -0
odibi/story/renderers.py +453 -0
odibi/story/templates/run_story.html +2520 -0
odibi/story/themes.py +216 -0
odibi/testing/__init__.py +13 -0
odibi/testing/assertions.py +75 -0
odibi/testing/fixtures.py +85 -0
odibi/testing/source_pool.py +277 -0
odibi/transformers/__init__.py +122 -0
odibi/transformers/advanced.py +1472 -0
odibi/transformers/delete_detection.py +610 -0
odibi/transformers/manufacturing.py +1029 -0
odibi/transformers/merge_transformer.py +778 -0
odibi/transformers/relational.py +675 -0
odibi/transformers/scd.py +579 -0
odibi/transformers/sql_core.py +1356 -0
odibi/transformers/validation.py +165 -0
odibi/ui/__init__.py +0 -0
odibi/ui/app.py +195 -0
odibi/utils/__init__.py +66 -0
odibi/utils/alerting.py +667 -0
odibi/utils/config_loader.py +343 -0
odibi/utils/console.py +231 -0
odibi/utils/content_hash.py +202 -0
odibi/utils/duration.py +43 -0
odibi/utils/encoding.py +102 -0
odibi/utils/extensions.py +28 -0
odibi/utils/hashing.py +61 -0
odibi/utils/logging.py +203 -0
odibi/utils/logging_context.py +740 -0
odibi/utils/progress.py +429 -0
odibi/utils/setup_helpers.py +302 -0
odibi/utils/telemetry.py +140 -0
odibi/validation/__init__.py +62 -0
odibi/validation/engine.py +765 -0
odibi/validation/explanation_linter.py +155 -0
odibi/validation/fk.py +547 -0
odibi/validation/gate.py +252 -0
odibi/validation/quarantine.py +605 -0
odibi/writers/__init__.py +15 -0
odibi/writers/sql_server_writer.py +2081 -0
odibi-2.5.0.dist-info/METADATA +255 -0
odibi-2.5.0.dist-info/RECORD +124 -0
odibi-2.5.0.dist-info/WHEEL +5 -0
odibi-2.5.0.dist-info/entry_points.txt +2 -0
odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
odibi-2.5.0.dist-info/top_level.txt +1 -0

odibi/transformers/validation.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Validation transformers."""
+import time
+from typing import Any, List
+from pydantic import BaseModel, Field
+from odibi.context import EngineContext
+from odibi.exceptions import ValidationError
+from odibi.registry import transform
+from odibi.utils.logging_context import get_logging_context
+class CrossCheckParams(BaseModel):
+    """
+    Configuration for cross-node validation checks.
+    Example (Row Count Mismatch):
+    ```yaml
+    transformer: "cross_check"
+    params:
+      type: "row_count_diff"
+      inputs: ["node_a", "node_b"]
+      threshold: 0.05  # Allow 5% difference
+    ```
+    Example (Schema Match):
+    ```yaml
+    transformer: "cross_check"
+    params:
+      type: "schema_match"
+      inputs: ["staging_orders", "prod_orders"]
+    ```
+    """
+    type: str = Field(description="Check type: 'row_count_diff', 'schema_match'")
+    inputs: List[str] = Field(description="List of node names to compare")
+    threshold: float = Field(default=0.0, description="Threshold for diff (0.0-1.0)")
+@transform("cross_check", param_model=CrossCheckParams)
+def cross_check(context: EngineContext, params: CrossCheckParams) -> Any:
+    """
+    Perform cross-node validation checks.
+    Does not return a DataFrame (returns None).
+    Raises ValidationError on failure.
+    """
+    ctx = get_logging_context()
+    start_time = time.time()
+    ctx.debug(
+        "CrossCheck starting",
+        check_type=params.type,
+        inputs=params.inputs,
+        threshold=params.threshold,
+    )
+    if len(params.inputs) < 2:
+        ctx.error(
+            "CrossCheck failed: insufficient inputs",
+            inputs_count=len(params.inputs),
+        )
+        raise ValueError(
+            f"Cross-check requires at least 2 inputs to compare, but got {len(params.inputs)}. "
+            f"Inputs provided: {params.inputs!r}. "
+            "Add another input dataset to the 'inputs' list."
+        )
+    dfs = {}
+    for name in params.inputs:
+        df = context.context.get(name)
+        if df is None:
+            ctx.error(
+                "CrossCheck failed: input not found",
+                missing_input=name,
+                available_inputs=(
+                    list(context.context._data.keys())
+                    if hasattr(context.context, "_data")
+                    else None
+                ),
+            )
+            raise ValueError(
+                f"Cross-check input '{name}' not found in context. "
+                f"Available inputs: {list(context.context._data.keys()) if hasattr(context.context, '_data') else 'unknown'}. "
+                f"Ensure '{name}' is listed in 'depends_on' for this node."
+            )
+        dfs[name] = df
+    if params.type == "row_count_diff":
+        counts = {name: context.engine.count_rows(df) for name, df in dfs.items()}
+        base_name = params.inputs[0]
+        base_count = counts[base_name]
+        ctx.debug(
+            "CrossCheck row counts",
+            counts=counts,
+        )
+        failures = []
+        for name, count in counts.items():
+            if name == base_name:
+                continue
+            if base_count == 0:
+                if count > 0:
+                    diff = 1.0
+                else:
+                    diff = 0.0
+            else:
+                diff = abs(count - base_count) / base_count
+            if diff > params.threshold:
+                failures.append(
+                    f"Row count mismatch: {name} ({count}) vs {base_name} ({base_count}). "
+                    f"Diff {diff:.1%} > {params.threshold:.1%}"
+                )
+        if failures:
+            ctx.warning(
+                "CrossCheck validation failed",
+                failures=failures,
+            )
+            raise ValidationError("cross_check", failures)
+    elif params.type == "schema_match":
+        base_name = params.inputs[0]
+        base_schema = context.engine.get_schema(dfs[base_name])
+        failures = []
+        for name, df in dfs.items():
+            if name == base_name:
+                continue
+            schema = context.engine.get_schema(df)
+            if base_schema != schema:
+                set_base = set(base_schema.items())
+                set_curr = set(schema.items())
+                missing = set_base - set_curr
+                extra = set_curr - set_base
+                msg = f"Schema mismatch: {name} vs {base_name}."
+                if missing:
+                    msg += f" Missing/Changed: {missing}"
+                if extra:
+                    msg += f" Extra/Changed: {extra}"
+                failures.append(msg)
+        if failures:
+            ctx.warning(
+                "CrossCheck validation failed",
+                failures=failures,
+            )
+            raise ValidationError("cross_check", failures)
+    elapsed_ms = (time.time() - start_time) * 1000
+    ctx.debug(
+        "CrossCheck completed",
+        check_type=params.type,
+        passed=True,
+        elapsed_ms=round(elapsed_ms, 2),
+    )
+    return None

odibi/ui/__init__.py ADDED Viewed

File without changes

odibi/ui/app.py ADDED Viewed

@@ -0,0 +1,195 @@
+import os
+from pathlib import Path
+from fastapi import FastAPI, Request
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from odibi.state import StateManager
+app = FastAPI(title="Odibi UI")
+# Resolve paths
+BASE_DIR = Path(__file__).parent
+TEMPLATES_DIR = BASE_DIR / "templates"
+templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
+@app.get("/", response_class=HTMLResponse)
+async def dashboard(request: Request):
+    # Load config to get state backend
+    config_path = os.getenv("ODIBI_CONFIG")
+    if not config_path:
+        # Fallback to defaults
+        for p in ["odibi.yaml", "odibi.yml", "project.yaml"]:
+            if os.path.exists(p):
+                config_path = p
+                break
+    if config_path:
+        from odibi.config import load_config_from_file
+        from odibi.state import create_state_backend
+        try:
+            config = load_config_from_file(config_path)
+            # Create backend connected to System Catalog
+            backend = create_state_backend(config, project_root=os.path.dirname(config_path))
+            state_mgr = StateManager(backend=backend)
+            state = state_mgr.backend.load_state()
+        except Exception as e:
+            print(f"Failed to load state backend: {e}")
+            state = {}
+    else:
+        state = {}
+    pipelines = []
+    if state and "pipelines" in state:
+        for name, p_data in state["pipelines"].items():
+            nodes = p_data.get("nodes", {})
+            total = len(nodes)
+            # Determine pipeline status based on nodes
+            # This is a heuristic as we don't store pipeline-level success explicitly in simple backend
+            # usually pipeline_data has it?
+            # LocalFileStateBackend saves: {"last_run": ..., "nodes": ...}
+            # We don't check individual nodes for pipeline level status in backend save.
+            # But we can infer.
+            failed_nodes = [n for n in nodes.values() if not n.get("success")]
+            status = "FAILED" if failed_nodes else "SUCCESS"
+            if total == 0:
+                status = "UNKNOWN"
+            pipelines.append(
+                {
+                    "name": name,
+                    "last_run": p_data.get("last_run"),
+                    "status": status,
+                    "nodes_total": total,
+                    "nodes_success": total - len(failed_nodes),
+                }
+            )
+    return templates.TemplateResponse(
+        "index.html", {"request": request, "pipelines": pipelines, "project": "Odibi Project"}
+    )
+@app.get("/stories", response_class=HTMLResponse)
+async def stories(request: Request):
+    # Determine stories root from config if available
+    stories_root = Path("stories")
+    config_path = os.getenv("ODIBI_CONFIG")
+    if config_path:
+        from odibi.config import load_config_from_file
+        try:
+            config = load_config_from_file(config_path)
+            # Resolve story path
+            # Connection: system -> base_path + config.story.path
+            conn_name = config.story.connection
+            conn_config = config.connections.get(conn_name)
+            if conn_config and conn_config.type == "local":
+                base = conn_config.base_path
+                if not os.path.isabs(base):
+                    base = os.path.join(os.path.dirname(config_path), base)
+                stories_root = Path(base) / config.story.path
+        except Exception:
+            pass
+    runs = []
+    if stories_root.exists():
+        # Traverse: pipeline/date/run.html
+        for p_dir in stories_root.iterdir():
+            if p_dir.is_dir():
+                for d_dir in p_dir.iterdir():
+                    if d_dir.is_dir():
+                        for f in d_dir.glob("*.html"):
+                            runs.append(
+                                {
+                                    "pipeline": p_dir.name,
+                                    "date": d_dir.name,
+                                    "name": f.name,
+                                    "path": f"/stories_static/{p_dir.name}/{d_dir.name}/{f.name}",
+                                }
+                            )
+    # Sort
+    runs.sort(key=lambda x: (x["date"], x["name"]), reverse=True)
+    return templates.TemplateResponse("stories.html", {"request": request, "runs": runs})
+@app.get("/config", response_class=HTMLResponse)
+async def config_view(request: Request):
+    config_path = os.getenv("ODIBI_CONFIG")
+    content = ""
+    error = None
+    if config_path:
+        try:
+            with open(config_path, "r") as f:
+                content = f.read()
+        except Exception as e:
+            error = str(e)
+    else:
+        # Try default locations
+        for p in ["odibi.yaml", "odibi.yml", "project.yaml"]:
+            if os.path.exists(p):
+                config_path = p
+                with open(p, "r") as f:
+                    content = f.read()
+                break
+        if not content:
+            error = "No configuration file found. Run with 'odibi ui config.yaml'"
+    return templates.TemplateResponse(
+        "config.html",
+        {"request": request, "config_path": config_path, "content": content, "error": error},
+    )
+# Mount static files for stories
+# We try to mount the configured stories path if possible, otherwise default
+# This is tricky because mounting happens at startup, but config might change per request?
+# Actually config is set via env var before startup in CLI.
+config_path_env = os.getenv("ODIBI_CONFIG")
+print(f"DEBUG: ODIBI_CONFIG Env Var: {config_path_env}")
+static_stories_dir = Path("stories")
+if config_path_env and os.path.exists(config_path_env):
+    # Resolve absolute path to avoid ambiguity
+    abs_config_path = Path(config_path_env).resolve()
+    from odibi.config import load_config_from_file
+    try:
+        # Use the official loader to get Pydantic defaults/validation
+        config = load_config_from_file(str(abs_config_path))
+        s_conn = config.story.connection
+        s_path = config.story.path
+        print(f"DEBUG: Story Conn: {s_conn}, Path: {s_path}")
+        print(f"DEBUG: Available Connections: {list(config.connections.keys())}")
+        if s_conn in config.connections:
+            c_conf = config.connections[s_conn]
+            if c_conf.type == "local":
+                base = c_conf.base_path
+                if not os.path.isabs(base):
+                    base = os.path.join(abs_config_path.parent, base)
+                static_stories_dir = Path(base) / s_path
+                print(f"DEBUG: Config Path: {abs_config_path}")
+                print(f"DEBUG: Calculated Base: {base}")
+                print(f"DEBUG: Calculated Stories Dir: {static_stories_dir}")
+                print(f"DEBUG: Exists? {static_stories_dir.exists()}")
+    except Exception as e:
+        print(f"DEBUG: Failed to resolve story path: {e}")
+if static_stories_dir.exists():
+    print(f"DEBUG: Mounting stories from {static_stories_dir}")
+    app.mount(
+        "/stories_static", StaticFiles(directory=str(static_stories_dir)), name="stories_static"
+    )

odibi/utils/__init__.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""Utilities for ODIBI setup and configuration.
+Includes:
+- Configuration loading with env var substitution
+- Structured logging and context-aware logging
+- Key Vault and connection helpers
+- Rich console output utilities
+- Pipeline progress visualization
+"""
+from .config_loader import load_yaml_with_env
+from .console import (
+    error,
+    get_console,
+    info,
+    is_rich_available,
+    print_panel,
+    print_rule,
+    print_table,
+    success,
+    warning,
+)
+from .logging import StructuredLogger, configure_logging, logger
+from .logging_context import (
+    LoggingContext,
+    OperationMetrics,
+    OperationType,
+    create_logging_context,
+    get_logging_context,
+    set_logging_context,
+)
+from .progress import NodeStatus, PipelineProgress
+from .setup_helpers import (
+    configure_connections_parallel,
+    fetch_keyvault_secrets_parallel,
+    validate_databricks_environment,
+)
+__all__ = [
+    "fetch_keyvault_secrets_parallel",
+    "configure_connections_parallel",
+    "validate_databricks_environment",
+    "load_yaml_with_env",
+    "StructuredLogger",
+    "configure_logging",
+    "logger",
+    "LoggingContext",
+    "OperationMetrics",
+    "OperationType",
+    "create_logging_context",
+    "get_logging_context",
+    "set_logging_context",
+    # Console utilities
+    "is_rich_available",
+    "get_console",
+    "success",
+    "error",
+    "warning",
+    "info",
+    "print_table",
+    "print_panel",
+    "print_rule",
+    # Progress utilities
+    "NodeStatus",
+    "PipelineProgress",
+]