PyPI - odibi - Versions diffs - 2.5.0__py3-none-any.whl - Mend

odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

odibi/__init__.py +32 -0
odibi/__main__.py +8 -0
odibi/catalog.py +3011 -0
odibi/cli/__init__.py +11 -0
odibi/cli/__main__.py +6 -0
odibi/cli/catalog.py +553 -0
odibi/cli/deploy.py +69 -0
odibi/cli/doctor.py +161 -0
odibi/cli/export.py +66 -0
odibi/cli/graph.py +150 -0
odibi/cli/init_pipeline.py +242 -0
odibi/cli/lineage.py +259 -0
odibi/cli/main.py +215 -0
odibi/cli/run.py +98 -0
odibi/cli/schema.py +208 -0
odibi/cli/secrets.py +232 -0
odibi/cli/story.py +379 -0
odibi/cli/system.py +132 -0
odibi/cli/test.py +286 -0
odibi/cli/ui.py +31 -0
odibi/cli/validate.py +39 -0
odibi/config.py +3541 -0
odibi/connections/__init__.py +9 -0
odibi/connections/azure_adls.py +499 -0
odibi/connections/azure_sql.py +709 -0
odibi/connections/base.py +28 -0
odibi/connections/factory.py +322 -0
odibi/connections/http.py +78 -0
odibi/connections/local.py +119 -0
odibi/connections/local_dbfs.py +61 -0
odibi/constants.py +17 -0
odibi/context.py +528 -0
odibi/diagnostics/__init__.py +12 -0
odibi/diagnostics/delta.py +520 -0
odibi/diagnostics/diff.py +169 -0
odibi/diagnostics/manager.py +171 -0
odibi/engine/__init__.py +20 -0
odibi/engine/base.py +334 -0
odibi/engine/pandas_engine.py +2178 -0
odibi/engine/polars_engine.py +1114 -0
odibi/engine/registry.py +54 -0
odibi/engine/spark_engine.py +2362 -0
odibi/enums.py +7 -0
odibi/exceptions.py +297 -0
odibi/graph.py +426 -0
odibi/introspect.py +1214 -0
odibi/lineage.py +511 -0
odibi/node.py +3341 -0
odibi/orchestration/__init__.py +0 -0
odibi/orchestration/airflow.py +90 -0
odibi/orchestration/dagster.py +77 -0
odibi/patterns/__init__.py +24 -0
odibi/patterns/aggregation.py +599 -0
odibi/patterns/base.py +94 -0
odibi/patterns/date_dimension.py +423 -0
odibi/patterns/dimension.py +696 -0
odibi/patterns/fact.py +748 -0
odibi/patterns/merge.py +128 -0
odibi/patterns/scd2.py +148 -0
odibi/pipeline.py +2382 -0
odibi/plugins.py +80 -0
odibi/project.py +581 -0
odibi/references.py +151 -0
odibi/registry.py +246 -0
odibi/semantics/__init__.py +71 -0
odibi/semantics/materialize.py +392 -0
odibi/semantics/metrics.py +361 -0
odibi/semantics/query.py +743 -0
odibi/semantics/runner.py +430 -0
odibi/semantics/story.py +507 -0
odibi/semantics/views.py +432 -0
odibi/state/__init__.py +1203 -0
odibi/story/__init__.py +55 -0
odibi/story/doc_story.py +554 -0
odibi/story/generator.py +1431 -0
odibi/story/lineage.py +1043 -0
odibi/story/lineage_utils.py +324 -0
odibi/story/metadata.py +608 -0
odibi/story/renderers.py +453 -0
odibi/story/templates/run_story.html +2520 -0
odibi/story/themes.py +216 -0
odibi/testing/__init__.py +13 -0
odibi/testing/assertions.py +75 -0
odibi/testing/fixtures.py +85 -0
odibi/testing/source_pool.py +277 -0
odibi/transformers/__init__.py +122 -0
odibi/transformers/advanced.py +1472 -0
odibi/transformers/delete_detection.py +610 -0
odibi/transformers/manufacturing.py +1029 -0
odibi/transformers/merge_transformer.py +778 -0
odibi/transformers/relational.py +675 -0
odibi/transformers/scd.py +579 -0
odibi/transformers/sql_core.py +1356 -0
odibi/transformers/validation.py +165 -0
odibi/ui/__init__.py +0 -0
odibi/ui/app.py +195 -0
odibi/utils/__init__.py +66 -0
odibi/utils/alerting.py +667 -0
odibi/utils/config_loader.py +343 -0
odibi/utils/console.py +231 -0
odibi/utils/content_hash.py +202 -0
odibi/utils/duration.py +43 -0
odibi/utils/encoding.py +102 -0
odibi/utils/extensions.py +28 -0
odibi/utils/hashing.py +61 -0
odibi/utils/logging.py +203 -0
odibi/utils/logging_context.py +740 -0
odibi/utils/progress.py +429 -0
odibi/utils/setup_helpers.py +302 -0
odibi/utils/telemetry.py +140 -0
odibi/validation/__init__.py +62 -0
odibi/validation/engine.py +765 -0
odibi/validation/explanation_linter.py +155 -0
odibi/validation/fk.py +547 -0
odibi/validation/gate.py +252 -0
odibi/validation/quarantine.py +605 -0
odibi/writers/__init__.py +15 -0
odibi/writers/sql_server_writer.py +2081 -0
odibi-2.5.0.dist-info/METADATA +255 -0
odibi-2.5.0.dist-info/RECORD +124 -0
odibi-2.5.0.dist-info/WHEEL +5 -0
odibi-2.5.0.dist-info/entry_points.txt +2 -0
odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
odibi-2.5.0.dist-info/top_level.txt +1 -0

odibi/utils/progress.py ADDED Viewed

@@ -0,0 +1,429 @@
+"""Pipeline progress tracking with Rich visualization.
+This module provides progress visualization for pipeline execution with
+auto-detection of environment (CLI vs notebook) and graceful fallback
+when Rich is not available.
+"""
+from typing import Any, Dict, List, Optional
+from odibi.utils.console import is_rich_available, get_console, _is_notebook_environment
+class NodeStatus:
+    """Status constants for node execution."""
+    PENDING = "pending"
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+class PipelineProgress:
+    """Progress tracker for pipeline execution.
+    Provides visual feedback during pipeline runs with auto-detection
+    of environment (CLI/notebook) and Rich availability.
+    Example:
+        >>> progress = PipelineProgress("my_pipeline", ["node1", "node2"])
+        >>> progress.start()
+        >>> progress.update_node("node1", NodeStatus.SUCCESS, duration=1.5, rows=1000)
+        >>> progress.update_node("node2", NodeStatus.FAILED, duration=0.5)
+        >>> progress.finish()
+    """
+    def __init__(
+        self,
+        pipeline_name: str,
+        node_names: List[str],
+        engine: str = "pandas",
+    ) -> None:
+        """Initialize progress tracker.
+        Args:
+            pipeline_name: Name of the pipeline being executed.
+            node_names: List of node names in execution order.
+            engine: Engine type (pandas/spark).
+        """
+        self.pipeline_name = pipeline_name
+        self.node_names = node_names
+        self.engine = engine
+        self.is_notebook = _is_notebook_environment()
+        self.use_rich = is_rich_available()
+        self._node_statuses: Dict[str, Dict[str, Any]] = {
+            name: {"status": NodeStatus.PENDING, "duration": None, "rows": None}
+            for name in node_names
+        }
+        self._live: Optional[Any] = None
+        self._table: Optional[Any] = None
+        self._start_time: Optional[float] = None
+    def start(self) -> None:
+        """Start progress display."""
+        import time
+        self._start_time = time.time()
+        if self.use_rich:
+            self._start_rich()
+        else:
+            self._start_plain()
+    def _start_rich(self) -> None:
+        """Start Rich live display."""
+        from rich.live import Live
+        console = get_console()
+        header = self._create_header_panel()
+        console.print(header)
+        if not self.is_notebook:
+            self._table = self._create_progress_table()
+            self._live = Live(
+                self._table,
+                console=console,
+                refresh_per_second=4,
+                transient=True,
+            )
+            self._live.start()
+        else:
+            console.print(f"[dim]Executing {len(self.node_names)} nodes...[/dim]\n")
+    def _start_plain(self) -> None:
+        """Start plain text display."""
+        print(f"\n{'=' * 60}")
+        print(f"  Pipeline: {self.pipeline_name}")
+        print(f"  Engine: {self.engine}")
+        print(f"  Nodes: {len(self.node_names)}")
+        print(f"{'=' * 60}\n")
+    def _create_header_panel(self) -> Any:
+        """Create the header panel."""
+        from rich.panel import Panel
+        from rich.text import Text
+        header_text = Text()
+        header_text.append("Pipeline: ", style="dim")
+        header_text.append(f"{self.pipeline_name}\n", style="bold cyan")
+        header_text.append("Engine: ", style="dim")
+        header_text.append(f"{self.engine}  ", style="green")
+        header_text.append("Nodes: ", style="dim")
+        header_text.append(f"{len(self.node_names)}", style="yellow")
+        return Panel(
+            header_text,
+            title="[bold]Odibi Pipeline[/bold]",
+            border_style="blue",
+            padding=(0, 2),
+        )
+    def _create_progress_table(self) -> Any:
+        """Create the progress table."""
+        from rich.table import Table
+        table = Table(
+            show_header=True,
+            header_style="bold",
+            box=None,
+            padding=(0, 1),
+        )
+        table.add_column("Node", style="cyan", min_width=30)
+        table.add_column("Status", justify="center", min_width=10)
+        table.add_column("Duration", justify="right", min_width=10)
+        table.add_column("Rows", justify="right", min_width=12)
+        for name in self.node_names:
+            info = self._node_statuses[name]
+            status_str = self._format_status(info["status"])
+            duration_str = self._format_duration(info["duration"])
+            rows_str = self._format_rows(info["rows"])
+            table.add_row(name, status_str, duration_str, rows_str)
+        return table
+    def _format_status(self, status: str) -> str:
+        """Format status with Rich markup."""
+        status_map = {
+            NodeStatus.PENDING: "[dim]○ pending[/dim]",
+            NodeStatus.RUNNING: "[yellow]◉ running[/yellow]",
+            NodeStatus.SUCCESS: "[green]✓ success[/green]",
+            NodeStatus.FAILED: "[red]✗ failed[/red]",
+            NodeStatus.SKIPPED: "[dim]⏭ skipped[/dim]",
+        }
+        return status_map.get(status, status)
+    def _format_status_plain(self, status: str) -> str:
+        """Format status for plain text."""
+        status_map = {
+            NodeStatus.PENDING: "○ pending",
+            NodeStatus.RUNNING: "◉ running",
+            NodeStatus.SUCCESS: "✓ success",
+            NodeStatus.FAILED: "✗ failed",
+            NodeStatus.SKIPPED: "⏭ skipped",
+        }
+        return status_map.get(status, status)
+    def _format_duration(self, duration: Optional[float]) -> str:
+        """Format duration value."""
+        if duration is None:
+            return "-"
+        if duration < 1:
+            return f"{duration * 1000:.0f}ms"
+        return f"{duration:.2f}s"
+    def _format_rows(self, rows: Optional[int]) -> str:
+        """Format row count."""
+        if rows is None:
+            return "-"
+        if rows >= 1_000_000:
+            return f"{rows / 1_000_000:.1f}M"
+        if rows >= 1_000:
+            return f"{rows / 1_000:.1f}K"
+        return str(rows)
+    def update_node(
+        self,
+        name: str,
+        status: str,
+        duration: Optional[float] = None,
+        rows: Optional[int] = None,
+        phase_timings: Optional[Dict[str, float]] = None,
+    ) -> None:
+        """Update node status.
+        Args:
+            name: Node name.
+            status: Status from NodeStatus constants.
+            duration: Execution duration in seconds.
+            rows: Number of rows processed.
+            phase_timings: Optional dict of phase name -> duration in ms.
+        """
+        if name not in self._node_statuses:
+            return
+        self._node_statuses[name] = {
+            "status": status,
+            "duration": duration,
+            "rows": rows,
+            "phase_timings": phase_timings,
+        }
+        if self.use_rich:
+            self._update_rich(name, status, duration, rows)
+        else:
+            self._update_plain(name, status, duration, rows)
+    def _update_rich(
+        self,
+        name: str,
+        status: str,
+        duration: Optional[float],
+        rows: Optional[int],
+    ) -> None:
+        """Update Rich display."""
+        if self._live and not self.is_notebook:
+            self._table = self._create_progress_table()
+            self._live.update(self._table)
+        elif self.is_notebook:
+            console = get_console()
+            status_str = self._format_status(status)
+            duration_str = self._format_duration(duration)
+            rows_str = self._format_rows(rows)
+            console.print(f"  {name}: {status_str} ({duration_str}, {rows_str} rows)")
+    def _update_plain(
+        self,
+        name: str,
+        status: str,
+        duration: Optional[float],
+        rows: Optional[int],
+    ) -> None:
+        """Update plain text display."""
+        status_str = self._format_status_plain(status)
+        duration_str = self._format_duration(duration)
+        rows_str = self._format_rows(rows)
+        print(f"  {name}: {status_str} ({duration_str}, {rows_str} rows)")
+    def finish(
+        self,
+        completed: int = 0,
+        failed: int = 0,
+        skipped: int = 0,
+        duration: Optional[float] = None,
+    ) -> None:
+        """Finish progress display and show summary.
+        Args:
+            completed: Number of completed nodes.
+            failed: Number of failed nodes.
+            skipped: Number of skipped nodes.
+            duration: Total pipeline duration in seconds.
+        """
+        if self._live:
+            self._live.stop()
+            self._live = None
+        import time
+        total_duration = duration or ((time.time() - self._start_time) if self._start_time else 0)
+        if self.use_rich:
+            self._finish_rich(completed, failed, skipped, total_duration)
+        else:
+            self._finish_plain(completed, failed, skipped, total_duration)
+    def _finish_rich(
+        self,
+        completed: int,
+        failed: int,
+        skipped: int,
+        duration: float,
+    ) -> None:
+        """Finish with Rich summary."""
+        from rich.panel import Panel
+        from rich.text import Text
+        console = get_console()
+        final_table = self._create_progress_table()
+        console.print(final_table)
+        console.print()
+        status = "[green]SUCCESS[/green]" if failed == 0 else "[red]FAILED[/red]"
+        summary = Text()
+        summary.append("Status: ")
+        summary.append_text(Text.from_markup(status))
+        summary.append("\n")
+        summary.append("Duration: ", style="dim")
+        summary.append(f"{duration:.2f}s\n")
+        summary.append("Completed: ", style="dim")
+        summary.append(f"{completed}", style="green")
+        if failed > 0:
+            summary.append("  Failed: ", style="dim")
+            summary.append(f"{failed}", style="red")
+        if skipped > 0:
+            summary.append("  Skipped: ", style="dim")
+            summary.append(f"{skipped}", style="yellow")
+        panel_style = "green" if failed == 0 else "red"
+        panel = Panel(
+            summary,
+            title="[bold]Pipeline Complete[/bold]",
+            border_style=panel_style,
+            padding=(0, 2),
+        )
+        console.print(panel)
+    def _finish_plain(
+        self,
+        completed: int,
+        failed: int,
+        skipped: int,
+        duration: float,
+    ) -> None:
+        """Finish with plain text summary."""
+        status = "SUCCESS" if failed == 0 else "FAILED"
+        print(f"\n{'=' * 60}")
+        print(f"  Pipeline: {status}")
+        print(f"  Duration: {duration:.2f}s")
+        print(f"  Completed: {completed}, Failed: {failed}, Skipped: {skipped}")
+        print(f"{'=' * 60}\n")
+    def get_phase_timing_summary(self) -> Dict[str, Dict[str, float]]:
+        """Get phase timing breakdown for all nodes.
+        Returns:
+            Dict mapping node names to their phase timings (in ms).
+        """
+        return {
+            name: info.get("phase_timings", {})
+            for name, info in self._node_statuses.items()
+            if info.get("phase_timings")
+        }
+    def get_aggregate_phase_timings(self) -> Dict[str, float]:
+        """Get max phase timings across all nodes (bottleneck per phase).
+        Returns:
+            Dict mapping phase names to max time spent by any node (in ms).
+        """
+        max_timings: Dict[str, float] = {}
+        for info in self._node_statuses.values():
+            phase_timings = info.get("phase_timings") or {}
+            for phase, duration_ms in phase_timings.items():
+                max_timings[phase] = max(max_timings.get(phase, 0), duration_ms)
+        return {k: round(v, 2) for k, v in max_timings.items()}
+    def print_phase_timing_report(self, pipeline_duration_s: Optional[float] = None) -> None:
+        """Print a detailed phase timing report.
+        Args:
+            pipeline_duration_s: Actual pipeline wall-clock duration in seconds.
+                Used for percentage calculations. Falls back to sum of max phases.
+        """
+        aggregate = self.get_aggregate_phase_timings()
+        if not aggregate:
+            return
+        # Use actual pipeline duration for percentage, or fall back to sum of max phases
+        if pipeline_duration_s is not None:
+            total_ms = pipeline_duration_s * 1000
+        else:
+            total_ms = sum(aggregate.values())
+        if self.use_rich:
+            self._print_phase_timing_rich(aggregate, total_ms)
+        else:
+            self._print_phase_timing_plain(aggregate, total_ms)
+    def _print_phase_timing_rich(self, aggregate: Dict[str, float], total_ms: float) -> None:
+        """Print phase timing report with Rich."""
+        from rich.panel import Panel
+        from rich.table import Table
+        console = get_console()
+        table = Table(
+            show_header=True,
+            header_style="bold",
+            box=None,
+            padding=(0, 1),
+        )
+        table.add_column("Phase", style="cyan")
+        table.add_column("Slowest", justify="right")
+        table.add_column("% of Pipeline", justify="right")
+        # Sort by time descending
+        sorted_phases = sorted(aggregate.items(), key=lambda x: x[1], reverse=True)
+        for phase, duration_ms in sorted_phases:
+            pct = (duration_ms / total_ms * 100) if total_ms > 0 else 0
+            duration_str = (
+                f"{duration_ms:.0f}ms" if duration_ms < 1000 else f"{duration_ms / 1000:.2f}s"
+            )
+            table.add_row(phase, duration_str, f"{pct:.1f}%")
+        panel = Panel(
+            table,
+            title="[bold]Phase Bottlenecks (slowest node per phase)[/bold]",
+            border_style="dim",
+            padding=(0, 1),
+        )
+        console.print(panel)
+    def _print_phase_timing_plain(self, aggregate: Dict[str, float], total_ms: float) -> None:
+        """Print phase timing report in plain text."""
+        print("\n--- Phase Bottlenecks (slowest node per phase) ---")
+        sorted_phases = sorted(aggregate.items(), key=lambda x: x[1], reverse=True)
+        for phase, duration_ms in sorted_phases:
+            pct = (duration_ms / total_ms * 100) if total_ms > 0 else 0
+            duration_str = (
+                f"{duration_ms:.0f}ms" if duration_ms < 1000 else f"{duration_ms / 1000:.2f}s"
+            )
+            print(f"  {phase}: {duration_str} ({pct:.1f}% of pipeline)")
+        print("-" * 48 + "\n")