odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,165 @@
1
+ """Validation transformers."""
2
+
3
+ import time
4
+ from typing import Any, List
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from odibi.context import EngineContext
9
+ from odibi.exceptions import ValidationError
10
+ from odibi.registry import transform
11
+ from odibi.utils.logging_context import get_logging_context
12
+
13
+
14
+ class CrossCheckParams(BaseModel):
15
+ """
16
+ Configuration for cross-node validation checks.
17
+
18
+ Example (Row Count Mismatch):
19
+ ```yaml
20
+ transformer: "cross_check"
21
+ params:
22
+ type: "row_count_diff"
23
+ inputs: ["node_a", "node_b"]
24
+ threshold: 0.05 # Allow 5% difference
25
+ ```
26
+
27
+ Example (Schema Match):
28
+ ```yaml
29
+ transformer: "cross_check"
30
+ params:
31
+ type: "schema_match"
32
+ inputs: ["staging_orders", "prod_orders"]
33
+ ```
34
+ """
35
+
36
+ type: str = Field(description="Check type: 'row_count_diff', 'schema_match'")
37
+ inputs: List[str] = Field(description="List of node names to compare")
38
+ threshold: float = Field(default=0.0, description="Threshold for diff (0.0-1.0)")
39
+
40
+
41
+ @transform("cross_check", param_model=CrossCheckParams)
42
+ def cross_check(context: EngineContext, params: CrossCheckParams) -> Any:
43
+ """
44
+ Perform cross-node validation checks.
45
+
46
+ Does not return a DataFrame (returns None).
47
+ Raises ValidationError on failure.
48
+ """
49
+ ctx = get_logging_context()
50
+ start_time = time.time()
51
+
52
+ ctx.debug(
53
+ "CrossCheck starting",
54
+ check_type=params.type,
55
+ inputs=params.inputs,
56
+ threshold=params.threshold,
57
+ )
58
+
59
+ if len(params.inputs) < 2:
60
+ ctx.error(
61
+ "CrossCheck failed: insufficient inputs",
62
+ inputs_count=len(params.inputs),
63
+ )
64
+ raise ValueError(
65
+ f"Cross-check requires at least 2 inputs to compare, but got {len(params.inputs)}. "
66
+ f"Inputs provided: {params.inputs!r}. "
67
+ "Add another input dataset to the 'inputs' list."
68
+ )
69
+
70
+ dfs = {}
71
+ for name in params.inputs:
72
+ df = context.context.get(name)
73
+ if df is None:
74
+ ctx.error(
75
+ "CrossCheck failed: input not found",
76
+ missing_input=name,
77
+ available_inputs=(
78
+ list(context.context._data.keys())
79
+ if hasattr(context.context, "_data")
80
+ else None
81
+ ),
82
+ )
83
+ raise ValueError(
84
+ f"Cross-check input '{name}' not found in context. "
85
+ f"Available inputs: {list(context.context._data.keys()) if hasattr(context.context, '_data') else 'unknown'}. "
86
+ f"Ensure '{name}' is listed in 'depends_on' for this node."
87
+ )
88
+ dfs[name] = df
89
+
90
+ if params.type == "row_count_diff":
91
+ counts = {name: context.engine.count_rows(df) for name, df in dfs.items()}
92
+ base_name = params.inputs[0]
93
+ base_count = counts[base_name]
94
+
95
+ ctx.debug(
96
+ "CrossCheck row counts",
97
+ counts=counts,
98
+ )
99
+
100
+ failures = []
101
+ for name, count in counts.items():
102
+ if name == base_name:
103
+ continue
104
+
105
+ if base_count == 0:
106
+ if count > 0:
107
+ diff = 1.0
108
+ else:
109
+ diff = 0.0
110
+ else:
111
+ diff = abs(count - base_count) / base_count
112
+
113
+ if diff > params.threshold:
114
+ failures.append(
115
+ f"Row count mismatch: {name} ({count}) vs {base_name} ({base_count}). "
116
+ f"Diff {diff:.1%} > {params.threshold:.1%}"
117
+ )
118
+
119
+ if failures:
120
+ ctx.warning(
121
+ "CrossCheck validation failed",
122
+ failures=failures,
123
+ )
124
+ raise ValidationError("cross_check", failures)
125
+
126
+ elif params.type == "schema_match":
127
+ base_name = params.inputs[0]
128
+ base_schema = context.engine.get_schema(dfs[base_name])
129
+
130
+ failures = []
131
+ for name, df in dfs.items():
132
+ if name == base_name:
133
+ continue
134
+
135
+ schema = context.engine.get_schema(df)
136
+ if base_schema != schema:
137
+ set_base = set(base_schema.items())
138
+ set_curr = set(schema.items())
139
+
140
+ missing = set_base - set_curr
141
+ extra = set_curr - set_base
142
+
143
+ msg = f"Schema mismatch: {name} vs {base_name}."
144
+ if missing:
145
+ msg += f" Missing/Changed: {missing}"
146
+ if extra:
147
+ msg += f" Extra/Changed: {extra}"
148
+ failures.append(msg)
149
+
150
+ if failures:
151
+ ctx.warning(
152
+ "CrossCheck validation failed",
153
+ failures=failures,
154
+ )
155
+ raise ValidationError("cross_check", failures)
156
+
157
+ elapsed_ms = (time.time() - start_time) * 1000
158
+ ctx.debug(
159
+ "CrossCheck completed",
160
+ check_type=params.type,
161
+ passed=True,
162
+ elapsed_ms=round(elapsed_ms, 2),
163
+ )
164
+
165
+ return None
odibi/ui/__init__.py ADDED
File without changes
odibi/ui/app.py ADDED
@@ -0,0 +1,195 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ from fastapi import FastAPI, Request
5
+ from fastapi.responses import HTMLResponse
6
+ from fastapi.staticfiles import StaticFiles
7
+ from fastapi.templating import Jinja2Templates
8
+
9
+ from odibi.state import StateManager
10
+
11
+ app = FastAPI(title="Odibi UI")
12
+
13
+ # Resolve paths
14
+ BASE_DIR = Path(__file__).parent
15
+ TEMPLATES_DIR = BASE_DIR / "templates"
16
+
17
+ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
18
+
19
+
20
+ @app.get("/", response_class=HTMLResponse)
21
+ async def dashboard(request: Request):
22
+ # Load config to get state backend
23
+ config_path = os.getenv("ODIBI_CONFIG")
24
+ if not config_path:
25
+ # Fallback to defaults
26
+ for p in ["odibi.yaml", "odibi.yml", "project.yaml"]:
27
+ if os.path.exists(p):
28
+ config_path = p
29
+ break
30
+
31
+ if config_path:
32
+ from odibi.config import load_config_from_file
33
+ from odibi.state import create_state_backend
34
+
35
+ try:
36
+ config = load_config_from_file(config_path)
37
+ # Create backend connected to System Catalog
38
+ backend = create_state_backend(config, project_root=os.path.dirname(config_path))
39
+ state_mgr = StateManager(backend=backend)
40
+ state = state_mgr.backend.load_state()
41
+ except Exception as e:
42
+ print(f"Failed to load state backend: {e}")
43
+ state = {}
44
+ else:
45
+ state = {}
46
+
47
+ pipelines = []
48
+ if state and "pipelines" in state:
49
+ for name, p_data in state["pipelines"].items():
50
+ nodes = p_data.get("nodes", {})
51
+ total = len(nodes)
52
+ # Determine pipeline status based on nodes
53
+ # This is a heuristic as we don't store pipeline-level success explicitly in simple backend
54
+ # usually pipeline_data has it?
55
+ # LocalFileStateBackend saves: {"last_run": ..., "nodes": ...}
56
+ # We don't check individual nodes for pipeline level status in backend save.
57
+ # But we can infer.
58
+
59
+ failed_nodes = [n for n in nodes.values() if not n.get("success")]
60
+ status = "FAILED" if failed_nodes else "SUCCESS"
61
+ if total == 0:
62
+ status = "UNKNOWN"
63
+
64
+ pipelines.append(
65
+ {
66
+ "name": name,
67
+ "last_run": p_data.get("last_run"),
68
+ "status": status,
69
+ "nodes_total": total,
70
+ "nodes_success": total - len(failed_nodes),
71
+ }
72
+ )
73
+
74
+ return templates.TemplateResponse(
75
+ "index.html", {"request": request, "pipelines": pipelines, "project": "Odibi Project"}
76
+ )
77
+
78
+
79
+ @app.get("/stories", response_class=HTMLResponse)
80
+ async def stories(request: Request):
81
+ # Determine stories root from config if available
82
+ stories_root = Path("stories")
83
+ config_path = os.getenv("ODIBI_CONFIG")
84
+ if config_path:
85
+ from odibi.config import load_config_from_file
86
+
87
+ try:
88
+ config = load_config_from_file(config_path)
89
+ # Resolve story path
90
+ # Connection: system -> base_path + config.story.path
91
+ conn_name = config.story.connection
92
+ conn_config = config.connections.get(conn_name)
93
+ if conn_config and conn_config.type == "local":
94
+ base = conn_config.base_path
95
+ if not os.path.isabs(base):
96
+ base = os.path.join(os.path.dirname(config_path), base)
97
+ stories_root = Path(base) / config.story.path
98
+ except Exception:
99
+ pass
100
+
101
+ runs = []
102
+
103
+ if stories_root.exists():
104
+ # Traverse: pipeline/date/run.html
105
+ for p_dir in stories_root.iterdir():
106
+ if p_dir.is_dir():
107
+ for d_dir in p_dir.iterdir():
108
+ if d_dir.is_dir():
109
+ for f in d_dir.glob("*.html"):
110
+ runs.append(
111
+ {
112
+ "pipeline": p_dir.name,
113
+ "date": d_dir.name,
114
+ "name": f.name,
115
+ "path": f"/stories_static/{p_dir.name}/{d_dir.name}/{f.name}",
116
+ }
117
+ )
118
+
119
+ # Sort
120
+ runs.sort(key=lambda x: (x["date"], x["name"]), reverse=True)
121
+
122
+ return templates.TemplateResponse("stories.html", {"request": request, "runs": runs})
123
+
124
+
125
+ @app.get("/config", response_class=HTMLResponse)
126
+ async def config_view(request: Request):
127
+ config_path = os.getenv("ODIBI_CONFIG")
128
+ content = ""
129
+ error = None
130
+
131
+ if config_path:
132
+ try:
133
+ with open(config_path, "r") as f:
134
+ content = f.read()
135
+ except Exception as e:
136
+ error = str(e)
137
+ else:
138
+ # Try default locations
139
+ for p in ["odibi.yaml", "odibi.yml", "project.yaml"]:
140
+ if os.path.exists(p):
141
+ config_path = p
142
+ with open(p, "r") as f:
143
+ content = f.read()
144
+ break
145
+ if not content:
146
+ error = "No configuration file found. Run with 'odibi ui config.yaml'"
147
+
148
+ return templates.TemplateResponse(
149
+ "config.html",
150
+ {"request": request, "config_path": config_path, "content": content, "error": error},
151
+ )
152
+
153
+
154
+ # Mount static files for stories
155
+ # We try to mount the configured stories path if possible, otherwise default
156
+ # This is tricky because mounting happens at startup, but config might change per request?
157
+ # Actually config is set via env var before startup in CLI.
158
+ config_path_env = os.getenv("ODIBI_CONFIG")
159
+ print(f"DEBUG: ODIBI_CONFIG Env Var: {config_path_env}")
160
+ static_stories_dir = Path("stories")
161
+
162
+ if config_path_env and os.path.exists(config_path_env):
163
+ # Resolve absolute path to avoid ambiguity
164
+ abs_config_path = Path(config_path_env).resolve()
165
+
166
+ from odibi.config import load_config_from_file
167
+
168
+ try:
169
+ # Use the official loader to get Pydantic defaults/validation
170
+ config = load_config_from_file(str(abs_config_path))
171
+
172
+ s_conn = config.story.connection
173
+ s_path = config.story.path
174
+ print(f"DEBUG: Story Conn: {s_conn}, Path: {s_path}")
175
+ print(f"DEBUG: Available Connections: {list(config.connections.keys())}")
176
+
177
+ if s_conn in config.connections:
178
+ c_conf = config.connections[s_conn]
179
+ if c_conf.type == "local":
180
+ base = c_conf.base_path
181
+ if not os.path.isabs(base):
182
+ base = os.path.join(abs_config_path.parent, base)
183
+ static_stories_dir = Path(base) / s_path
184
+ print(f"DEBUG: Config Path: {abs_config_path}")
185
+ print(f"DEBUG: Calculated Base: {base}")
186
+ print(f"DEBUG: Calculated Stories Dir: {static_stories_dir}")
187
+ print(f"DEBUG: Exists? {static_stories_dir.exists()}")
188
+ except Exception as e:
189
+ print(f"DEBUG: Failed to resolve story path: {e}")
190
+
191
+ if static_stories_dir.exists():
192
+ print(f"DEBUG: Mounting stories from {static_stories_dir}")
193
+ app.mount(
194
+ "/stories_static", StaticFiles(directory=str(static_stories_dir)), name="stories_static"
195
+ )
@@ -0,0 +1,66 @@
1
+ """Utilities for ODIBI setup and configuration.
2
+
3
+ Includes:
4
+ - Configuration loading with env var substitution
5
+ - Structured logging and context-aware logging
6
+ - Key Vault and connection helpers
7
+ - Rich console output utilities
8
+ - Pipeline progress visualization
9
+ """
10
+
11
+ from .config_loader import load_yaml_with_env
12
+ from .console import (
13
+ error,
14
+ get_console,
15
+ info,
16
+ is_rich_available,
17
+ print_panel,
18
+ print_rule,
19
+ print_table,
20
+ success,
21
+ warning,
22
+ )
23
+ from .logging import StructuredLogger, configure_logging, logger
24
+ from .logging_context import (
25
+ LoggingContext,
26
+ OperationMetrics,
27
+ OperationType,
28
+ create_logging_context,
29
+ get_logging_context,
30
+ set_logging_context,
31
+ )
32
+ from .progress import NodeStatus, PipelineProgress
33
+ from .setup_helpers import (
34
+ configure_connections_parallel,
35
+ fetch_keyvault_secrets_parallel,
36
+ validate_databricks_environment,
37
+ )
38
+
39
+ __all__ = [
40
+ "fetch_keyvault_secrets_parallel",
41
+ "configure_connections_parallel",
42
+ "validate_databricks_environment",
43
+ "load_yaml_with_env",
44
+ "StructuredLogger",
45
+ "configure_logging",
46
+ "logger",
47
+ "LoggingContext",
48
+ "OperationMetrics",
49
+ "OperationType",
50
+ "create_logging_context",
51
+ "get_logging_context",
52
+ "set_logging_context",
53
+ # Console utilities
54
+ "is_rich_available",
55
+ "get_console",
56
+ "success",
57
+ "error",
58
+ "warning",
59
+ "info",
60
+ "print_table",
61
+ "print_panel",
62
+ "print_rule",
63
+ # Progress utilities
64
+ "NodeStatus",
65
+ "PipelineProgress",
66
+ ]