PyPI - odibi - Versions diffs - 2.5.0__py3-none-any.whl - Mend

odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

odibi/__init__.py +32 -0
odibi/__main__.py +8 -0
odibi/catalog.py +3011 -0
odibi/cli/__init__.py +11 -0
odibi/cli/__main__.py +6 -0
odibi/cli/catalog.py +553 -0
odibi/cli/deploy.py +69 -0
odibi/cli/doctor.py +161 -0
odibi/cli/export.py +66 -0
odibi/cli/graph.py +150 -0
odibi/cli/init_pipeline.py +242 -0
odibi/cli/lineage.py +259 -0
odibi/cli/main.py +215 -0
odibi/cli/run.py +98 -0
odibi/cli/schema.py +208 -0
odibi/cli/secrets.py +232 -0
odibi/cli/story.py +379 -0
odibi/cli/system.py +132 -0
odibi/cli/test.py +286 -0
odibi/cli/ui.py +31 -0
odibi/cli/validate.py +39 -0
odibi/config.py +3541 -0
odibi/connections/__init__.py +9 -0
odibi/connections/azure_adls.py +499 -0
odibi/connections/azure_sql.py +709 -0
odibi/connections/base.py +28 -0
odibi/connections/factory.py +322 -0
odibi/connections/http.py +78 -0
odibi/connections/local.py +119 -0
odibi/connections/local_dbfs.py +61 -0
odibi/constants.py +17 -0
odibi/context.py +528 -0
odibi/diagnostics/__init__.py +12 -0
odibi/diagnostics/delta.py +520 -0
odibi/diagnostics/diff.py +169 -0
odibi/diagnostics/manager.py +171 -0
odibi/engine/__init__.py +20 -0
odibi/engine/base.py +334 -0
odibi/engine/pandas_engine.py +2178 -0
odibi/engine/polars_engine.py +1114 -0
odibi/engine/registry.py +54 -0
odibi/engine/spark_engine.py +2362 -0
odibi/enums.py +7 -0
odibi/exceptions.py +297 -0
odibi/graph.py +426 -0
odibi/introspect.py +1214 -0
odibi/lineage.py +511 -0
odibi/node.py +3341 -0
odibi/orchestration/__init__.py +0 -0
odibi/orchestration/airflow.py +90 -0
odibi/orchestration/dagster.py +77 -0
odibi/patterns/__init__.py +24 -0
odibi/patterns/aggregation.py +599 -0
odibi/patterns/base.py +94 -0
odibi/patterns/date_dimension.py +423 -0
odibi/patterns/dimension.py +696 -0
odibi/patterns/fact.py +748 -0
odibi/patterns/merge.py +128 -0
odibi/patterns/scd2.py +148 -0
odibi/pipeline.py +2382 -0
odibi/plugins.py +80 -0
odibi/project.py +581 -0
odibi/references.py +151 -0
odibi/registry.py +246 -0
odibi/semantics/__init__.py +71 -0
odibi/semantics/materialize.py +392 -0
odibi/semantics/metrics.py +361 -0
odibi/semantics/query.py +743 -0
odibi/semantics/runner.py +430 -0
odibi/semantics/story.py +507 -0
odibi/semantics/views.py +432 -0
odibi/state/__init__.py +1203 -0
odibi/story/__init__.py +55 -0
odibi/story/doc_story.py +554 -0
odibi/story/generator.py +1431 -0
odibi/story/lineage.py +1043 -0
odibi/story/lineage_utils.py +324 -0
odibi/story/metadata.py +608 -0
odibi/story/renderers.py +453 -0
odibi/story/templates/run_story.html +2520 -0
odibi/story/themes.py +216 -0
odibi/testing/__init__.py +13 -0
odibi/testing/assertions.py +75 -0
odibi/testing/fixtures.py +85 -0
odibi/testing/source_pool.py +277 -0
odibi/transformers/__init__.py +122 -0
odibi/transformers/advanced.py +1472 -0
odibi/transformers/delete_detection.py +610 -0
odibi/transformers/manufacturing.py +1029 -0
odibi/transformers/merge_transformer.py +778 -0
odibi/transformers/relational.py +675 -0
odibi/transformers/scd.py +579 -0
odibi/transformers/sql_core.py +1356 -0
odibi/transformers/validation.py +165 -0
odibi/ui/__init__.py +0 -0
odibi/ui/app.py +195 -0
odibi/utils/__init__.py +66 -0
odibi/utils/alerting.py +667 -0
odibi/utils/config_loader.py +343 -0
odibi/utils/console.py +231 -0
odibi/utils/content_hash.py +202 -0
odibi/utils/duration.py +43 -0
odibi/utils/encoding.py +102 -0
odibi/utils/extensions.py +28 -0
odibi/utils/hashing.py +61 -0
odibi/utils/logging.py +203 -0
odibi/utils/logging_context.py +740 -0
odibi/utils/progress.py +429 -0
odibi/utils/setup_helpers.py +302 -0
odibi/utils/telemetry.py +140 -0
odibi/validation/__init__.py +62 -0
odibi/validation/engine.py +765 -0
odibi/validation/explanation_linter.py +155 -0
odibi/validation/fk.py +547 -0
odibi/validation/gate.py +252 -0
odibi/validation/quarantine.py +605 -0
odibi/writers/__init__.py +15 -0
odibi/writers/sql_server_writer.py +2081 -0
odibi-2.5.0.dist-info/METADATA +255 -0
odibi-2.5.0.dist-info/RECORD +124 -0
odibi-2.5.0.dist-info/WHEEL +5 -0
odibi-2.5.0.dist-info/entry_points.txt +2 -0
odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
odibi-2.5.0.dist-info/top_level.txt +1 -0

odibi/cli/lineage.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""CLI commands for cross-pipeline lineage tracking."""
+import json
+from typing import Dict, List, Optional
+from odibi.config import load_config_from_file
+def add_lineage_parser(subparsers) -> None:
+    """Add lineage-related subcommands to the CLI."""
+    lineage_parser = subparsers.add_parser("lineage", help="Cross-pipeline lineage commands")
+    lineage_subparsers = lineage_parser.add_subparsers(dest="lineage_command")
+    # odibi lineage upstream <table>
+    upstream_parser = lineage_subparsers.add_parser(
+        "upstream", help="Trace upstream sources of a table"
+    )
+    upstream_parser.add_argument("table", help="Table path (e.g., gold/customer_360)")
+    upstream_parser.add_argument("--config", help="Path to YAML config file")
+    upstream_parser.add_argument(
+        "--depth", type=int, default=3, help="Maximum depth to traverse (default: 3)"
+    )
+    upstream_parser.add_argument(
+        "--format",
+        choices=["tree", "json"],
+        default="tree",
+        help="Output format (default: tree)",
+    )
+    # odibi lineage downstream <table>
+    downstream_parser = lineage_subparsers.add_parser(
+        "downstream", help="Trace downstream consumers of a table"
+    )
+    downstream_parser.add_argument("table", help="Table path (e.g., bronze/customers_raw)")
+    downstream_parser.add_argument("--config", help="Path to YAML config file")
+    downstream_parser.add_argument(
+        "--depth", type=int, default=3, help="Maximum depth to traverse (default: 3)"
+    )
+    downstream_parser.add_argument(
+        "--format",
+        choices=["tree", "json"],
+        default="tree",
+        help="Output format (default: tree)",
+    )
+    # odibi lineage impact <table>
+    impact_parser = lineage_subparsers.add_parser(
+        "impact", help="Impact analysis for schema changes"
+    )
+    impact_parser.add_argument("table", help="Table path to analyze impact for")
+    impact_parser.add_argument("--config", help="Path to YAML config file")
+    impact_parser.add_argument(
+        "--depth", type=int, default=3, help="Maximum depth to traverse (default: 3)"
+    )
+def lineage_command(args) -> int:
+    """Execute lineage commands."""
+    if not hasattr(args, "lineage_command") or not args.lineage_command:
+        print("Usage: odibi lineage <command> [options]")
+        print("Commands: upstream, downstream, impact")
+        return 1
+    if args.lineage_command == "upstream":
+        return _lineage_upstream(args)
+    elif args.lineage_command == "downstream":
+        return _lineage_downstream(args)
+    elif args.lineage_command == "impact":
+        return _lineage_impact(args)
+    else:
+        print(f"Unknown lineage command: {args.lineage_command}")
+        return 1
+def _get_catalog_manager(config_path: Optional[str]):
+    """Get CatalogManager instance from config."""
+    if not config_path:
+        print("Error: --config is required")
+        return None
+    try:
+        project_config = load_config_from_file(config_path)
+        from odibi.catalog import CatalogManager
+        from odibi.engine import get_engine
+        engine = get_engine(project_config.engine)
+        system_conn = project_config.connections.get(project_config.system.connection)
+        if hasattr(system_conn, "base_path"):
+            base_path = f"{system_conn.base_path.rstrip('/')}/{project_config.system.path}"
+        else:
+            base_path = project_config.system.path
+        catalog = CatalogManager(
+            spark=None,
+            config=project_config.system,
+            base_path=base_path,
+            engine=engine,
+            connection=system_conn,
+        )
+        return catalog
+    except FileNotFoundError:
+        print(f"Error: Config file not found: {config_path}")
+        return None
+    except Exception as e:
+        print(f"Error loading config: {e}")
+        return None
+def _build_tree(records: List[Dict], root: str, direction: str = "upstream") -> Dict:
+    """Build a tree structure from lineage records."""
+    tree = {"name": root, "children": []}
+    by_depth = {}
+    for record in records:
+        depth = record.get("depth", 0)
+        if depth not in by_depth:
+            by_depth[depth] = []
+        by_depth[depth].append(record)
+    if direction == "upstream":
+        depth_0_records = by_depth.get(0, [])
+        for record in depth_0_records:
+            source = record.get("source_table")
+            node_info = ""
+            if record.get("source_pipeline") and record.get("source_node"):
+                node_info = f" ({record['source_pipeline']}.{record['source_node']})"
+            child = {"name": f"{source}{node_info}", "children": []}
+            tree["children"].append(child)
+    else:
+        depth_0_records = by_depth.get(0, [])
+        for record in depth_0_records:
+            target = record.get("target_table")
+            node_info = ""
+            if record.get("target_pipeline") and record.get("target_node"):
+                node_info = f" ({record['target_pipeline']}.{record['target_node']})"
+            child = {"name": f"{target}{node_info}", "children": []}
+            tree["children"].append(child)
+    return tree
+def _print_tree(node: Dict, prefix: str = "", is_last: bool = True, depth: int = 0) -> None:
+    """Print a tree structure in ASCII format."""
+    connector = "└── " if is_last else "├── "
+    if depth == 0:
+        print(node["name"])
+    else:
+        print(f"{prefix}{connector}{node['name']}")
+    children = node.get("children", [])
+    child_prefix = prefix + ("    " if is_last else "│   ")
+    for i, child in enumerate(children):
+        is_child_last = i == len(children) - 1
+        _print_tree(child, child_prefix, is_child_last, depth + 1)
+def _lineage_upstream(args) -> int:
+    """Trace upstream lineage for a table."""
+    catalog = _get_catalog_manager(args.config)
+    if not catalog:
+        return 1
+    upstream = catalog.get_upstream(args.table, depth=args.depth)
+    if not upstream:
+        print(f"No upstream lineage found for: {args.table}")
+        return 0
+    if args.format == "json":
+        print(json.dumps(upstream, indent=2, default=str))
+        return 0
+    print(f"\nUpstream Lineage: {args.table}")
+    print("=" * 60)
+    tree = _build_tree(upstream, args.table, direction="upstream")
+    _print_tree(tree)
+    print()
+    return 0
+def _lineage_downstream(args) -> int:
+    """Trace downstream lineage for a table."""
+    catalog = _get_catalog_manager(args.config)
+    if not catalog:
+        return 1
+    downstream = catalog.get_downstream(args.table, depth=args.depth)
+    if not downstream:
+        print(f"No downstream lineage found for: {args.table}")
+        return 0
+    if args.format == "json":
+        print(json.dumps(downstream, indent=2, default=str))
+        return 0
+    print(f"\nDownstream Lineage: {args.table}")
+    print("=" * 60)
+    tree = _build_tree(downstream, args.table, direction="downstream")
+    _print_tree(tree)
+    print()
+    return 0
+def _lineage_impact(args) -> int:
+    """Perform impact analysis for a table."""
+    catalog = _get_catalog_manager(args.config)
+    if not catalog:
+        return 1
+    downstream = catalog.get_downstream(args.table, depth=args.depth)
+    if not downstream:
+        print(f"No downstream dependencies found for: {args.table}")
+        return 0
+    affected_tables = set()
+    affected_pipelines = set()
+    for record in downstream:
+        target = record.get("target_table")
+        if target:
+            affected_tables.add(target)
+        pipeline = record.get("target_pipeline")
+        if pipeline:
+            affected_pipelines.add(pipeline)
+    print(f"\n⚠️  Impact Analysis: {args.table}")
+    print("=" * 60)
+    print(f"\nChanges to {args.table} would affect:")
+    print()
+    if affected_tables:
+        print("  Affected Tables:")
+        for table in sorted(affected_tables):
+            pipeline_info = ""
+            for record in downstream:
+                if record.get("target_table") == table:
+                    if record.get("target_pipeline"):
+                        pipeline_info = f" (pipeline: {record['target_pipeline']})"
+                    break
+            print(f"    - {table}{pipeline_info}")
+    print()
+    print("  Summary:")
+    print(
+        f"    Total: {len(affected_tables)} downstream table(s) in {len(affected_pipelines)} pipeline(s)"
+    )
+    print()
+    return 0

odibi/cli/main.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""Main CLI entry point."""
+import argparse
+import sys
+from odibi.cli.catalog import add_catalog_parser, catalog_command
+from odibi.cli.doctor import add_doctor_parser, doctor_command
+from odibi.cli.export import add_export_parser, export_command
+from odibi.cli.graph import graph_command
+from odibi.cli.init_pipeline import add_init_parser, init_pipeline_command
+from odibi.cli.lineage import add_lineage_parser, lineage_command
+from odibi.cli.run import run_command
+from odibi.cli.schema import add_schema_parser, schema_command
+from odibi.cli.secrets import add_secrets_parser, secrets_command
+from odibi.cli.story import add_story_parser, story_command
+from odibi.cli.system import add_system_parser, system_command
+from odibi.cli.test import test_command
+from odibi.cli.ui import add_ui_parser, ui_command
+from odibi.cli.validate import validate_command
+from odibi.introspect import generate_docs
+from odibi.utils.telemetry import setup_telemetry
+def main():
+    """Main CLI entry point."""
+    # Configure telemetry early
+    setup_telemetry()
+    parser = argparse.ArgumentParser(
+        description="Odibi Data Pipeline Framework",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  odibi run config.yaml                    Run a pipeline
+  odibi validate config.yaml               Validate configuration
+  odibi graph config.yaml                  Visualize dependencies
+  odibi story generate config.yaml        Generate documentation
+  odibi story diff run1.json run2.json    Compare two runs
+  odibi story list                         List story files
+        """,
+    )
+    # Global arguments
+    parser.add_argument(
+        "--log-level",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+        default="INFO",
+        help="Set logging verbosity (default: INFO)",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    # odibi run
+    run_parser = subparsers.add_parser("run", help="Execute pipeline")
+    run_parser.add_argument("config", help="Path to YAML config file")
+    run_parser.add_argument(
+        "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
+    )
+    run_parser.add_argument(
+        "--dry-run", action="store_true", help="Simulate execution without running operations"
+    )
+    run_parser.add_argument(
+        "--resume", action="store_true", help="Resume from last failure (skip successful nodes)"
+    )
+    run_parser.add_argument(
+        "--parallel", action="store_true", help="Run independent nodes in parallel"
+    )
+    run_parser.add_argument(
+        "--workers",
+        type=int,
+        default=4,
+        help="Number of worker threads for parallel execution (default: 4)",
+    )
+    run_parser.add_argument(
+        "--on-error",
+        choices=["fail_fast", "fail_later", "ignore"],
+        help="Override error handling strategy",
+    )
+    run_parser.add_argument(
+        "--tag",
+        help="Filter nodes by tag (e.g., --tag daily)",
+    )
+    run_parser.add_argument(
+        "--pipeline",
+        dest="pipeline_name",
+        help="Run specific pipeline by name",
+    )
+    run_parser.add_argument(
+        "--node",
+        dest="node_name",
+        help="Run specific node by name",
+    )
+    # odibi deploy
+    deploy_parser = subparsers.add_parser("deploy", help="Deploy definitions to System Catalog")
+    deploy_parser.add_argument("config", help="Path to YAML config file")
+    deploy_parser.add_argument(
+        "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
+    )
+    # odibi validate
+    validate_parser = subparsers.add_parser("validate", help="Validate config")
+    validate_parser.add_argument("config", help="Path to YAML config file")
+    validate_parser.add_argument(
+        "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
+    )
+    # odibi test
+    test_parser = subparsers.add_parser("test", help="Run unit tests for transformations")
+    test_parser.add_argument(
+        "path", nargs="?", default="tests", help="Path to tests directory or file (default: tests)"
+    )
+    test_parser.add_argument("--snapshot", action="store_true", help="Update snapshots for tests")
+    # odibi docs
+    subparsers.add_parser("docs", help="Generate API documentation")
+    # odibi graph
+    graph_parser = subparsers.add_parser("graph", help="Visualize dependency graph")
+    graph_parser.add_argument("config", help="Path to YAML config file")
+    graph_parser.add_argument("--pipeline", help="Pipeline name (optional)")
+    graph_parser.add_argument(
+        "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
+    )
+    graph_parser.add_argument(
+        "--format",
+        choices=["ascii", "dot", "mermaid"],
+        default="ascii",
+        help="Output format (default: ascii)",
+    )
+    graph_parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    # odibi story
+    add_story_parser(subparsers)
+    # odibi secrets
+    add_secrets_parser(subparsers)
+    # odibi init-pipeline (create/init)
+    add_init_parser(subparsers)
+    # odibi doctor
+    add_doctor_parser(subparsers)
+    # odibi ui
+    add_ui_parser(subparsers)
+    # odibi export
+    add_export_parser(subparsers)
+    # odibi catalog
+    add_catalog_parser(subparsers)
+    # odibi schema
+    add_schema_parser(subparsers)
+    # odibi lineage
+    add_lineage_parser(subparsers)
+    # odibi system
+    add_system_parser(subparsers)
+    args = parser.parse_args()
+    # Configure logging
+    import logging
+    logging.basicConfig(
+        level=getattr(logging, args.log_level),
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    if args.command == "run":
+        return run_command(args)
+    elif args.command == "deploy":
+        from odibi.cli.deploy import deploy_command
+        return deploy_command(args)
+    elif args.command == "docs":
+        generate_docs()
+        return 0
+    elif args.command == "validate":
+        return validate_command(args)
+    elif args.command == "test":
+        return test_command(args)
+    elif args.command == "graph":
+        return graph_command(args)
+    elif args.command == "story":
+        return story_command(args)
+    elif args.command == "secrets":
+        return secrets_command(args)
+    elif args.command in ["init-pipeline", "create", "init", "generate-project"]:
+        return init_pipeline_command(args)
+    elif args.command == "doctor":
+        return doctor_command(args)
+    elif args.command == "ui":
+        return ui_command(args)
+    elif args.command == "export":
+        return export_command(args)
+    elif args.command == "catalog":
+        return catalog_command(args)
+    elif args.command == "schema":
+        return schema_command(args)
+    elif args.command == "lineage":
+        return lineage_command(args)
+    elif args.command == "system":
+        return system_command(args)
+    else:
+        parser.print_help()
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

odibi/cli/run.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Run command implementation."""
+from pathlib import Path
+from odibi.pipeline import PipelineManager
+from odibi.utils.extensions import load_extensions
+from odibi.utils.logging import logger
+def run_command(args):
+    """Execute pipeline from config file."""
+    try:
+        config_path = Path(args.config).resolve()
+        project_root = config_path.parent
+        # Change CWD to config directory to resolve relative paths consistently
+        import os
+        original_cwd = os.getcwd()
+        os.chdir(project_root)
+        logger.debug(f"Changed working directory to: {project_root}")
+        try:
+            # Load extensions from config dir (which is now CWD)
+            load_extensions(project_root)
+            manager = PipelineManager.from_yaml(config_path.name, env=args.env)
+            results = manager.run(
+                pipelines=getattr(args, "pipeline_name", None),
+                dry_run=args.dry_run,
+                resume_from_failure=args.resume,
+                parallel=args.parallel,
+                max_workers=args.workers,
+                on_error=args.on_error,
+                tag=getattr(args, "tag", None),
+                node=getattr(args, "node_name", None),
+            )
+        finally:
+            # Restore CWD
+            os.chdir(original_cwd)
+        # Check results for failures
+        failed = False
+        if isinstance(results, dict):
+            # Multiple pipelines
+            for result in results.values():
+                if result.failed:
+                    failed = True
+                    logger.error(f"Pipeline '{result.pipeline_name}' failed")
+                    for node_name in result.failed:
+                        node_res = result.node_results.get(node_name)
+                        if node_res and node_res.error:
+                            logger.error(f"Node '{node_name}' error: {node_res.error}")
+                            # Unbury Suggestions
+                            error_obj = node_res.error
+                            suggestions = getattr(error_obj, "suggestions", [])
+                            if not suggestions and hasattr(error_obj, "original_error"):
+                                suggestions = getattr(error_obj.original_error, "suggestions", [])
+                            if suggestions:
+                                logger.info("💡 Suggestions:")
+                                for suggestion in suggestions:
+                                    logger.info(f"   - {suggestion}")
+                    break
+        else:
+            # Single pipeline
+            if results.failed:
+                failed = True
+                logger.error(f"Pipeline '{results.pipeline_name}' failed")
+                for node_name in results.failed:
+                    node_res = results.node_results.get(node_name)
+                    if node_res and node_res.error:
+                        logger.error(f"Node '{node_name}' error: {node_res.error}")
+                        # Unbury Suggestions
+                        error_obj = node_res.error
+                        suggestions = getattr(error_obj, "suggestions", [])
+                        if not suggestions and hasattr(error_obj, "original_error"):
+                            suggestions = getattr(error_obj.original_error, "suggestions", [])
+                        if suggestions:
+                            logger.info("Suggestions:")
+                            for suggestion in suggestions:
+                                logger.info(f"   - {suggestion}")
+        if failed:
+            logger.error("Pipeline execution failed")
+            return 1
+        else:
+            logger.info("Pipeline completed successfully")
+            return 0
+    except Exception as e:
+        logger.error(f"Pipeline failed: {e}")
+        return 1