PyPI - odibi - Versions diffs - 2.5.0__py3-none-any.whl - Mend

odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

odibi/__init__.py +32 -0
odibi/__main__.py +8 -0
odibi/catalog.py +3011 -0
odibi/cli/__init__.py +11 -0
odibi/cli/__main__.py +6 -0
odibi/cli/catalog.py +553 -0
odibi/cli/deploy.py +69 -0
odibi/cli/doctor.py +161 -0
odibi/cli/export.py +66 -0
odibi/cli/graph.py +150 -0
odibi/cli/init_pipeline.py +242 -0
odibi/cli/lineage.py +259 -0
odibi/cli/main.py +215 -0
odibi/cli/run.py +98 -0
odibi/cli/schema.py +208 -0
odibi/cli/secrets.py +232 -0
odibi/cli/story.py +379 -0
odibi/cli/system.py +132 -0
odibi/cli/test.py +286 -0
odibi/cli/ui.py +31 -0
odibi/cli/validate.py +39 -0
odibi/config.py +3541 -0
odibi/connections/__init__.py +9 -0
odibi/connections/azure_adls.py +499 -0
odibi/connections/azure_sql.py +709 -0
odibi/connections/base.py +28 -0
odibi/connections/factory.py +322 -0
odibi/connections/http.py +78 -0
odibi/connections/local.py +119 -0
odibi/connections/local_dbfs.py +61 -0
odibi/constants.py +17 -0
odibi/context.py +528 -0
odibi/diagnostics/__init__.py +12 -0
odibi/diagnostics/delta.py +520 -0
odibi/diagnostics/diff.py +169 -0
odibi/diagnostics/manager.py +171 -0
odibi/engine/__init__.py +20 -0
odibi/engine/base.py +334 -0
odibi/engine/pandas_engine.py +2178 -0
odibi/engine/polars_engine.py +1114 -0
odibi/engine/registry.py +54 -0
odibi/engine/spark_engine.py +2362 -0
odibi/enums.py +7 -0
odibi/exceptions.py +297 -0
odibi/graph.py +426 -0
odibi/introspect.py +1214 -0
odibi/lineage.py +511 -0
odibi/node.py +3341 -0
odibi/orchestration/__init__.py +0 -0
odibi/orchestration/airflow.py +90 -0
odibi/orchestration/dagster.py +77 -0
odibi/patterns/__init__.py +24 -0
odibi/patterns/aggregation.py +599 -0
odibi/patterns/base.py +94 -0
odibi/patterns/date_dimension.py +423 -0
odibi/patterns/dimension.py +696 -0
odibi/patterns/fact.py +748 -0
odibi/patterns/merge.py +128 -0
odibi/patterns/scd2.py +148 -0
odibi/pipeline.py +2382 -0
odibi/plugins.py +80 -0
odibi/project.py +581 -0
odibi/references.py +151 -0
odibi/registry.py +246 -0
odibi/semantics/__init__.py +71 -0
odibi/semantics/materialize.py +392 -0
odibi/semantics/metrics.py +361 -0
odibi/semantics/query.py +743 -0
odibi/semantics/runner.py +430 -0
odibi/semantics/story.py +507 -0
odibi/semantics/views.py +432 -0
odibi/state/__init__.py +1203 -0
odibi/story/__init__.py +55 -0
odibi/story/doc_story.py +554 -0
odibi/story/generator.py +1431 -0
odibi/story/lineage.py +1043 -0
odibi/story/lineage_utils.py +324 -0
odibi/story/metadata.py +608 -0
odibi/story/renderers.py +453 -0
odibi/story/templates/run_story.html +2520 -0
odibi/story/themes.py +216 -0
odibi/testing/__init__.py +13 -0
odibi/testing/assertions.py +75 -0
odibi/testing/fixtures.py +85 -0
odibi/testing/source_pool.py +277 -0
odibi/transformers/__init__.py +122 -0
odibi/transformers/advanced.py +1472 -0
odibi/transformers/delete_detection.py +610 -0
odibi/transformers/manufacturing.py +1029 -0
odibi/transformers/merge_transformer.py +778 -0
odibi/transformers/relational.py +675 -0
odibi/transformers/scd.py +579 -0
odibi/transformers/sql_core.py +1356 -0
odibi/transformers/validation.py +165 -0
odibi/ui/__init__.py +0 -0
odibi/ui/app.py +195 -0
odibi/utils/__init__.py +66 -0
odibi/utils/alerting.py +667 -0
odibi/utils/config_loader.py +343 -0
odibi/utils/console.py +231 -0
odibi/utils/content_hash.py +202 -0
odibi/utils/duration.py +43 -0
odibi/utils/encoding.py +102 -0
odibi/utils/extensions.py +28 -0
odibi/utils/hashing.py +61 -0
odibi/utils/logging.py +203 -0
odibi/utils/logging_context.py +740 -0
odibi/utils/progress.py +429 -0
odibi/utils/setup_helpers.py +302 -0
odibi/utils/telemetry.py +140 -0
odibi/validation/__init__.py +62 -0
odibi/validation/engine.py +765 -0
odibi/validation/explanation_linter.py +155 -0
odibi/validation/fk.py +547 -0
odibi/validation/gate.py +252 -0
odibi/validation/quarantine.py +605 -0
odibi/writers/__init__.py +15 -0
odibi/writers/sql_server_writer.py +2081 -0
odibi-2.5.0.dist-info/METADATA +255 -0
odibi-2.5.0.dist-info/RECORD +124 -0
odibi-2.5.0.dist-info/WHEEL +5 -0
odibi-2.5.0.dist-info/entry_points.txt +2 -0
odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
odibi-2.5.0.dist-info/top_level.txt +1 -0

odibi/cli/story.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""
+Story CLI Commands
+==================
+Commands for generating and managing pipeline documentation stories.
+"""
+from pathlib import Path
+import yaml
+from odibi.config import ProjectConfig
+from odibi.story import DocStoryGenerator
+def story_command(args):
+    """
+    Handle story subcommands.
+    Args:
+        args: Parsed command-line arguments
+    Returns:
+        Exit code (0 for success, 1 for error)
+    """
+    if args.story_command == "generate":
+        return generate_command(args)
+    elif args.story_command == "diff":
+        return diff_command(args)
+    elif args.story_command == "list":
+        return list_command(args)
+    else:
+        print(f"Unknown story command: {args.story_command}")
+        return 1
+def generate_command(args):
+    """
+    Generate documentation story from pipeline config.
+    Args:
+        args: Parsed arguments with config, output, format, validate, etc.
+    Returns:
+        Exit code
+    """
+    try:
+        # Load configuration
+        print(f"📖 Loading configuration from {args.config}...")
+        with open(args.config, "r") as f:
+            config_data = yaml.safe_load(f)
+        config = ProjectConfig(**config_data)
+        # Get the pipeline config (assume first pipeline if not specified)
+        if config.pipelines:
+            pipeline_config = config.pipelines[0]
+        else:
+            print("❌ No pipelines found in configuration")
+            return 1
+        # Create doc story generator
+        print("📝 Generating documentation story...")
+        generator = DocStoryGenerator(
+            pipeline_config=pipeline_config,
+            project_config=config if hasattr(config, "project") else None,
+        )
+        # Determine output path
+        if args.output:
+            output_path = args.output
+        else:
+            # Auto-generate output filename
+            format_ext = {"html": ".html", "markdown": ".md", "json": ".json"}.get(
+                args.format.lower(), ".html"
+            )
+            output_path = f"docs/{pipeline_config.pipeline}_documentation{format_ext}"
+        # Load theme if HTML format
+        theme = None
+        if args.format.lower() == "html" and args.theme:
+            from odibi.story.themes import get_theme
+            try:
+                theme = get_theme(args.theme)
+                print(f"🎨 Using theme: {theme.name}")
+            except ValueError as e:
+                print(f"⚠️  Theme warning: {e}, using default theme")
+        # Generate story
+        result_path = generator.generate(
+            output_path=output_path,
+            format=args.format,
+            validate=not args.no_validate,
+            include_flow_diagram=not args.no_diagram,
+            theme=theme,
+        )
+        print(f"✅ Documentation generated: {result_path}")
+        print(f"📄 Format: {args.format.upper()}")
+        if args.format.lower() == "html":
+            print(f"🌐 Open in browser: file://{Path(result_path).absolute()}")
+        return 0
+    except FileNotFoundError as e:
+        print(f"❌ Configuration file not found: {e}")
+        return 1
+    except ValueError as e:
+        print(f"❌ Validation error: {e}")
+        return 1
+    except Exception as e:
+        print(f"❌ Error generating documentation: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 1
+def diff_command(args):
+    """
+    Compare two pipeline run stories.
+    Args:
+        args: Parsed arguments with story1, story2 paths
+    Returns:
+        Exit code
+    """
+    try:
+        import json
+        print("📊 Comparing stories...")
+        print(f"  Story 1: {args.story1}")
+        print(f"  Story 2: {args.story2}")
+        # Load story metadata from JSON files
+        with open(args.story1, "r") as f:
+            story1_data = json.load(f)
+        with open(args.story2, "r") as f:
+            story2_data = json.load(f)
+        # Compare basic metrics
+        print("\n📈 Comparison Results:")
+        print("=" * 60)
+        # Pipeline info
+        print(f"\nPipeline: {story1_data.get('pipeline_name', 'Unknown')}")
+        # Execution times
+        print("\n⏱️  Execution Time:")
+        print(f"  Story 1: {story1_data.get('duration', 0):.2f}s")
+        print(f"  Story 2: {story2_data.get('duration', 0):.2f}s")
+        time_diff = story2_data.get("duration", 0) - story1_data.get("duration", 0)
+        if time_diff > 0:
+            print(f"  Difference: +{time_diff:.2f}s (slower)")
+        elif time_diff < 0:
+            print(f"  Difference: {time_diff:.2f}s (faster)")
+        else:
+            print("  Difference: No change")
+        # Success rate
+        print("\n✅ Success Rate:")
+        print(f"  Story 1: {story1_data.get('success_rate', 0):.1f}%")
+        print(f"  Story 2: {story2_data.get('success_rate', 0):.1f}%")
+        # Row counts
+        print("\n📊 Rows Processed:")
+        print(f"  Story 1: {story1_data.get('total_rows_processed', 0):,}")
+        print(f"  Story 2: {story2_data.get('total_rows_processed', 0):,}")
+        row_diff = story2_data.get("total_rows_processed", 0) - story1_data.get(
+            "total_rows_processed", 0
+        )
+        if row_diff != 0:
+            print(f"  Difference: {row_diff:+,} rows")
+        # Node-level differences
+        if args.detailed:
+            print("\n🔍 Node-Level Details:")
+            print("-" * 60)
+            story1_nodes = {n["node_name"]: n for n in story1_data.get("nodes", [])}
+            story2_nodes = {n["node_name"]: n for n in story2_data.get("nodes", [])}
+            all_nodes = set(story1_nodes.keys()) | set(story2_nodes.keys())
+            for node_name in sorted(all_nodes):
+                node1 = story1_nodes.get(node_name, {})
+                node2 = story2_nodes.get(node_name, {})
+                print(f"\n  {node_name}:")
+                if node1 and node2:
+                    # Compare durations
+                    dur1 = node1.get("duration", 0)
+                    dur2 = node2.get("duration", 0)
+                    dur_diff = dur2 - dur1
+                    print(f"    Duration: {dur1:.3f}s → {dur2:.3f}s ({dur_diff:+.3f}s)")
+                    # Compare row counts
+                    rows1 = node1.get("rows_out", 0) or 0
+                    rows2 = node2.get("rows_out", 0) or 0
+                    if rows1 or rows2:
+                        row_diff = rows2 - rows1
+                        print(f"    Rows: {rows1:,} → {rows2:,} ({row_diff:+,})")
+                    # Status changes
+                    status1 = node1.get("status", "unknown")
+                    status2 = node2.get("status", "unknown")
+                    if status1 != status2:
+                        print(f"    ⚠️  Status changed: {status1} → {status2}")
+                elif node1:
+                    print("    ❌ Removed in Story 2")
+                elif node2:
+                    print("    ➕ Added in Story 2")
+        print("\n" + "=" * 60)
+        return 0
+    except FileNotFoundError as e:
+        print(f"❌ Story file not found: {e}")
+        return 1
+    except json.JSONDecodeError as e:
+        print(f"❌ Invalid JSON in story file: {e}")
+        return 1
+    except Exception as e:
+        print(f"❌ Error comparing stories: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 1
+def list_command(args):
+    """
+    List available story files.
+    Args:
+        args: Parsed arguments with directory path
+    Returns:
+        Exit code
+    """
+    try:
+        from datetime import datetime
+        story_dir = Path(args.directory)
+        if not story_dir.exists():
+            print(f"❌ Directory not found: {story_dir}")
+            return 1
+        # Find story files (JSON, HTML, MD)
+        story_files = []
+        for ext in ["*.json", "*.html", "*.md"]:
+            story_files.extend(story_dir.glob(ext))
+        if not story_files:
+            print(f"ℹ️  No story files found in {story_dir}")
+            return 0
+        # Sort by modification time (newest first)
+        story_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+        print(f"\n📚 Stories in {story_dir}:")
+        print("=" * 80)
+        for story_file in story_files[: args.limit]:
+            # Get file metadata
+            stat = story_file.stat()
+            size = stat.st_size
+            modified = datetime.fromtimestamp(stat.st_mtime)
+            # Format size
+            if size < 1024:
+                size_str = f"{size}B"
+            elif size < 1024 * 1024:
+                size_str = f"{size / 1024:.1f}KB"
+            else:
+                size_str = f"{size / 1024 / 1024:.1f}MB"
+            print(f"\n  📄 {story_file.name}")
+            print(f"     Modified: {modified.strftime('%Y-%m-%d %H:%M:%S')}")
+            print(f"     Size: {size_str}")
+            print(f"     Path: {story_file}")
+        if len(story_files) > args.limit:
+            print(f"\n  ... and {len(story_files) - args.limit} more")
+            print("  (Use --limit to show more)")
+        print()
+        return 0
+    except Exception as e:
+        print(f"❌ Error listing stories: {e}")
+        return 1
+def add_story_parser(subparsers):
+    """
+    Add story subcommand parser.
+    Args:
+        subparsers: Argparse subparsers object
+    Returns:
+        Story parser
+    """
+    story_parser = subparsers.add_parser(
+        "story", help="Generate and manage pipeline documentation stories"
+    )
+    story_subparsers = story_parser.add_subparsers(dest="story_command", help="Story commands")
+    # odibi story generate
+    generate_parser = story_subparsers.add_parser(
+        "generate", help="Generate documentation story from pipeline config"
+    )
+    generate_parser.add_argument("config", help="Path to pipeline YAML config file")
+    generate_parser.add_argument(
+        "-o", "--output", help="Output file path (auto-generated if not specified)"
+    )
+    generate_parser.add_argument(
+        "-f",
+        "--format",
+        choices=["html", "markdown", "md", "json"],
+        default="html",
+        help="Output format (default: html)",
+    )
+    generate_parser.add_argument(
+        "--no-validate", action="store_true", help="Skip explanation quality validation"
+    )
+    generate_parser.add_argument(
+        "--no-diagram", action="store_true", help="Exclude flow diagram from documentation"
+    )
+    generate_parser.add_argument(
+        "-t",
+        "--theme",
+        default="default",
+        help="Theme name or path to custom theme YAML (default: default, options: corporate, dark, minimal)",
+    )
+    generate_parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Verbose output with stack traces"
+    )
+    # odibi story diff
+    diff_parser = story_subparsers.add_parser("diff", help="Compare two pipeline run stories")
+    diff_parser.add_argument("story1", help="Path to first story JSON file")
+    diff_parser.add_argument("story2", help="Path to second story JSON file")
+    diff_parser.add_argument(
+        "-d", "--detailed", action="store_true", help="Show detailed node-level comparison"
+    )
+    diff_parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    # odibi story list
+    list_parser = story_subparsers.add_parser("list", help="List available story files")
+    list_parser.add_argument(
+        "-d",
+        "--directory",
+        default="stories/runs",
+        help="Directory to search for stories (default: stories/runs)",
+    )
+    list_parser.add_argument(
+        "-l",
+        "--limit",
+        type=int,
+        default=10,
+        help="Maximum number of stories to show (default: 10)",
+    )
+    return story_parser

odibi/cli/system.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""System CLI command for managing system catalog operations."""
+from pathlib import Path
+from odibi.pipeline import PipelineManager
+from odibi.state import create_state_backend, create_sync_source_backend, sync_system_data
+from odibi.utils.extensions import load_extensions
+from odibi.utils.logging import logger
+def add_system_parser(subparsers):
+    """Add system subcommand parser."""
+    system_parser = subparsers.add_parser(
+        "system",
+        help="Manage System Catalog operations",
+        description="Commands for syncing and managing system catalog data",
+    )
+    system_subparsers = system_parser.add_subparsers(dest="system_command", help="System commands")
+    # odibi system sync
+    sync_parser = system_subparsers.add_parser(
+        "sync",
+        help="Sync system data from source to target backend",
+    )
+    sync_parser.add_argument("config", help="Path to YAML config file")
+    sync_parser.add_argument(
+        "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
+    )
+    sync_parser.add_argument(
+        "--tables",
+        nargs="+",
+        choices=["runs", "state"],
+        default=None,
+        help="Tables to sync (default: all)",
+    )
+    sync_parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show what would be synced without making changes",
+    )
+    return system_parser
+def system_command(args):
+    """Execute system command."""
+    if not hasattr(args, "system_command") or args.system_command is None:
+        print("Usage: odibi system <command>")
+        print("\nAvailable commands:")
+        print("  sync       Sync system data from source to target backend")
+        return 1
+    command_map = {
+        "sync": _sync_command,
+    }
+    handler = command_map.get(args.system_command)
+    if handler:
+        return handler(args)
+    else:
+        print(f"Unknown system command: {args.system_command}")
+        return 1
+def _sync_command(args) -> int:
+    """Sync system data from source to target."""
+    try:
+        config_path = Path(args.config).resolve()
+        load_extensions(config_path.parent)
+        if config_path.parent.parent != config_path.parent:
+            load_extensions(config_path.parent.parent)
+        if config_path.parent != Path.cwd():
+            load_extensions(Path.cwd())
+        manager = PipelineManager.from_yaml(args.config, environment=getattr(args, "env", None))
+        project_config = manager.config
+        if not project_config.system:
+            logger.error("System Catalog not configured. Add 'system' section to config.")
+            return 1
+        if not project_config.system.sync_from:
+            logger.error(
+                "No sync_from configured in system config. "
+                "Add 'sync_from' section with connection and path."
+            )
+            return 1
+        # Create source backend
+        sync_from = project_config.system.sync_from
+        source_backend = create_sync_source_backend(
+            sync_from_config=sync_from,
+            connections=project_config.connections,
+            project_root=str(config_path.parent),
+        )
+        # Create target backend
+        target_backend = create_state_backend(
+            config=project_config,
+            project_root=str(config_path.parent),
+        )
+        source_conn = sync_from.connection
+        target_conn = project_config.system.connection
+        tables = args.tables or ["runs", "state"]
+        if args.dry_run:
+            print("[DRY RUN] Would sync system data:")
+            print(f"  Source: {source_conn}")
+            print(f"  Target: {target_conn}")
+            print(f"  Tables: {', '.join(tables)}")
+            return 0
+        print(f"Syncing system data from '{source_conn}' to '{target_conn}'...")
+        result = sync_system_data(
+            source_backend=source_backend,
+            target_backend=target_backend,
+            tables=tables,
+        )
+        print("\nSync complete!")
+        print(f"  Runs synced:  {result['runs']}")
+        print(f"  State synced: {result['state']}")
+        return 0
+    except Exception as e:
+        logger.error(f"Sync failed: {e}")
+        return 1