odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/enums.py
ADDED
odibi/exceptions.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""Custom exceptions for ODIBI framework."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OdibiException(Exception):
|
|
7
|
+
"""Base exception for all ODIBI errors."""
|
|
8
|
+
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConfigValidationError(OdibiException):
|
|
13
|
+
"""Configuration validation failed."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, message: str, file: Optional[str] = None, line: Optional[int] = None):
|
|
16
|
+
self.message = message
|
|
17
|
+
self.file = file
|
|
18
|
+
self.line = line
|
|
19
|
+
super().__init__(self._format_error())
|
|
20
|
+
|
|
21
|
+
def _format_error(self) -> str:
|
|
22
|
+
"""Format error message with location info."""
|
|
23
|
+
parts = ["Configuration validation error"]
|
|
24
|
+
if self.file:
|
|
25
|
+
parts.append(f"\n File: {self.file}")
|
|
26
|
+
if self.line:
|
|
27
|
+
parts.append(f"\n Line: {self.line}")
|
|
28
|
+
parts.append(f"\n Error: {self.message}")
|
|
29
|
+
return "".join(parts)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConnectionError(OdibiException):
|
|
33
|
+
"""Connection failed or invalid."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, connection_name: str, reason: str, suggestions: Optional[List[str]] = None):
|
|
36
|
+
self.connection_name = connection_name
|
|
37
|
+
self.reason = reason
|
|
38
|
+
self.suggestions = suggestions or []
|
|
39
|
+
super().__init__(self._format_error())
|
|
40
|
+
|
|
41
|
+
def _format_error(self) -> str:
|
|
42
|
+
"""Format connection error with suggestions."""
|
|
43
|
+
parts = [
|
|
44
|
+
f"[X] Connection validation failed: {self.connection_name}",
|
|
45
|
+
f"\n Reason: {self.reason}",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
if self.suggestions:
|
|
49
|
+
parts.append("\n\n Suggestions:")
|
|
50
|
+
for i, suggestion in enumerate(self.suggestions, 1):
|
|
51
|
+
parts.append(f"\n {i}. {suggestion}")
|
|
52
|
+
|
|
53
|
+
return "".join(parts)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class DependencyError(OdibiException):
|
|
57
|
+
"""Dependency graph error (cycles, missing nodes, etc.)."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, message: str, cycle: Optional[List[str]] = None):
|
|
60
|
+
self.message = message
|
|
61
|
+
self.cycle = cycle
|
|
62
|
+
super().__init__(self._format_error())
|
|
63
|
+
|
|
64
|
+
def _format_error(self) -> str:
|
|
65
|
+
"""Format dependency error."""
|
|
66
|
+
parts = [f"[X] Dependency error: {self.message}"]
|
|
67
|
+
|
|
68
|
+
if self.cycle:
|
|
69
|
+
parts.append("\n Cycle detected: " + " -> ".join(self.cycle))
|
|
70
|
+
|
|
71
|
+
return "".join(parts)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ExecutionContext:
|
|
75
|
+
"""Runtime context for error reporting."""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
node_name: str,
|
|
80
|
+
config_file: Optional[str] = None,
|
|
81
|
+
config_line: Optional[int] = None,
|
|
82
|
+
step_index: Optional[int] = None,
|
|
83
|
+
total_steps: Optional[int] = None,
|
|
84
|
+
input_schema: Optional[List[str]] = None,
|
|
85
|
+
input_shape: Optional[tuple] = None,
|
|
86
|
+
previous_steps: Optional[List[str]] = None,
|
|
87
|
+
):
|
|
88
|
+
self.node_name = node_name
|
|
89
|
+
self.config_file = config_file
|
|
90
|
+
self.config_line = config_line
|
|
91
|
+
self.step_index = step_index
|
|
92
|
+
self.total_steps = total_steps
|
|
93
|
+
self.input_schema = input_schema or []
|
|
94
|
+
self.input_shape = input_shape
|
|
95
|
+
self.previous_steps = previous_steps or []
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class NodeExecutionError(OdibiException):
|
|
99
|
+
"""Node execution failed."""
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
message: str,
|
|
104
|
+
context: ExecutionContext,
|
|
105
|
+
original_error: Optional[Exception] = None,
|
|
106
|
+
suggestions: Optional[List[str]] = None,
|
|
107
|
+
story_path: Optional[str] = None,
|
|
108
|
+
):
|
|
109
|
+
self.message = message
|
|
110
|
+
self.context = context
|
|
111
|
+
self.original_error = original_error
|
|
112
|
+
self.suggestions = suggestions or []
|
|
113
|
+
self.story_path = story_path
|
|
114
|
+
super().__init__(self._format_error())
|
|
115
|
+
|
|
116
|
+
def _clean_spark_error(self, error: Exception) -> Tuple[str, str]:
|
|
117
|
+
"""Extract clean error message from Spark exception.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
error: Original exception
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
(Clean Message, Error Type)
|
|
124
|
+
"""
|
|
125
|
+
error_type = type(error).__name__
|
|
126
|
+
msg = str(error)
|
|
127
|
+
|
|
128
|
+
# Check for Py4J Java Error
|
|
129
|
+
if "Py4JJavaError" in error_type:
|
|
130
|
+
# The message usually contains the full Java stack trace
|
|
131
|
+
# We want to find the actual exception message, usually after the first line
|
|
132
|
+
# or specific patterns like "AnalysisException: ..."
|
|
133
|
+
|
|
134
|
+
# Try to find the Java Exception class name
|
|
135
|
+
import re
|
|
136
|
+
|
|
137
|
+
# Match patterns like "org.apache.spark.sql.AnalysisException: ..."
|
|
138
|
+
# or just "AnalysisException: ..." at start of line
|
|
139
|
+
|
|
140
|
+
# Common Spark Exceptions to look for
|
|
141
|
+
patterns = [
|
|
142
|
+
r"org\.apache\.spark\.sql\.AnalysisException: (.*)",
|
|
143
|
+
r"org\.apache\.spark\.sql\.catalyst\.parser\.ParseException: (.*)",
|
|
144
|
+
r"java\.io\.FileNotFoundException: (.*)",
|
|
145
|
+
r"java\.lang\.IllegalArgumentException: (.*)",
|
|
146
|
+
r"org\.apache\.hadoop\.mapred\.InvalidInputException: (.*)",
|
|
147
|
+
# Catch-all for simple class names
|
|
148
|
+
r"([a-zA-Z0-9]+Exception): (.*)",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
for pattern in patterns:
|
|
152
|
+
match = re.search(pattern, msg)
|
|
153
|
+
if match:
|
|
154
|
+
# Found a clean message
|
|
155
|
+
clean_msg = (
|
|
156
|
+
match.group(1)
|
|
157
|
+
if len(match.groups()) == 1
|
|
158
|
+
else f"{match.group(1)}: {match.group(2)}"
|
|
159
|
+
)
|
|
160
|
+
# If it's the generic catch-all, simplify the type
|
|
161
|
+
if len(match.groups()) == 2:
|
|
162
|
+
error_type = match.group(1).split(".")[-1]
|
|
163
|
+
clean_msg = match.group(2)
|
|
164
|
+
else:
|
|
165
|
+
# Try to guess type from the pattern we matched?
|
|
166
|
+
# For named patterns, we know the type
|
|
167
|
+
if "AnalysisException" in pattern:
|
|
168
|
+
error_type = "AnalysisException"
|
|
169
|
+
elif "ParseException" in pattern:
|
|
170
|
+
error_type = "ParseException"
|
|
171
|
+
elif "FileNotFoundException" in pattern:
|
|
172
|
+
error_type = "FileNotFoundException"
|
|
173
|
+
|
|
174
|
+
return clean_msg.strip(), error_type
|
|
175
|
+
|
|
176
|
+
# Fallback: If we can't parse it, take the first few lines before the stack trace
|
|
177
|
+
# Py4J errors usually start with:
|
|
178
|
+
# "An error occurred while calling o46.save.\n: java.lang.IllegalArgumentException: ..."
|
|
179
|
+
lines = msg.split("\n")
|
|
180
|
+
for line in lines:
|
|
181
|
+
if line.strip().startswith(": java.") or line.strip().startswith(": org."):
|
|
182
|
+
# Found the java exception line
|
|
183
|
+
parts = line.split(":", 2) # : java.lang.Exception: Message
|
|
184
|
+
if len(parts) >= 3:
|
|
185
|
+
error_type = parts[1].split(".")[-1].strip()
|
|
186
|
+
return parts[2].strip(), error_type
|
|
187
|
+
|
|
188
|
+
return msg, error_type
|
|
189
|
+
|
|
190
|
+
def _format_error(self) -> str:
|
|
191
|
+
"""Generate rich error message with context."""
|
|
192
|
+
parts = [f"[X] Node execution failed: {self.context.node_name}"]
|
|
193
|
+
|
|
194
|
+
# Location info
|
|
195
|
+
if self.context.config_file:
|
|
196
|
+
parts.append(f"\n Location: {self.context.config_file}")
|
|
197
|
+
if self.context.config_line:
|
|
198
|
+
parts.append(f":{self.context.config_line}")
|
|
199
|
+
|
|
200
|
+
# Step info
|
|
201
|
+
if self.context.step_index is not None and self.context.total_steps:
|
|
202
|
+
parts.append(f"\n Step: {self.context.step_index + 1} of {self.context.total_steps}")
|
|
203
|
+
|
|
204
|
+
# Error message
|
|
205
|
+
# CLEAN THE ERROR HERE
|
|
206
|
+
if self.original_error:
|
|
207
|
+
clean_msg, clean_type = self._clean_spark_error(self.original_error)
|
|
208
|
+
# If we successfully cleaned it (message is shorter than original), use it
|
|
209
|
+
if len(clean_msg) < len(str(self.original_error)):
|
|
210
|
+
parts.append(f"\n\n Error: {clean_msg}")
|
|
211
|
+
parts.append(f"\n Type: {clean_type}")
|
|
212
|
+
else:
|
|
213
|
+
parts.append(f"\n\n Error: {self.message}")
|
|
214
|
+
parts.append(f"\n Type: {type(self.original_error).__name__}")
|
|
215
|
+
else:
|
|
216
|
+
parts.append(f"\n\n Error: {self.message}")
|
|
217
|
+
|
|
218
|
+
# Context information
|
|
219
|
+
if self.context.input_schema:
|
|
220
|
+
parts.append(f"\n\n Available columns: {self.context.input_schema}")
|
|
221
|
+
|
|
222
|
+
if self.context.input_shape:
|
|
223
|
+
parts.append(f"\n Input shape: {self.context.input_shape}")
|
|
224
|
+
|
|
225
|
+
if self.context.previous_steps:
|
|
226
|
+
parts.append("\n\n Previous steps:")
|
|
227
|
+
for step in self.context.previous_steps:
|
|
228
|
+
parts.append(f"\n - {step}")
|
|
229
|
+
|
|
230
|
+
# Suggestions
|
|
231
|
+
if self.suggestions:
|
|
232
|
+
parts.append("\n\n Suggestions:")
|
|
233
|
+
for i, suggestion in enumerate(self.suggestions, 1):
|
|
234
|
+
parts.append(f"\n {i}. {suggestion}")
|
|
235
|
+
|
|
236
|
+
# Story reference
|
|
237
|
+
if self.story_path:
|
|
238
|
+
parts.append(f"\n\n Story: {self.story_path}")
|
|
239
|
+
|
|
240
|
+
return "".join(parts)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class TransformError(OdibiException):
|
|
244
|
+
"""Transform step failed."""
|
|
245
|
+
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class ValidationError(OdibiException):
|
|
250
|
+
"""Data validation failed."""
|
|
251
|
+
|
|
252
|
+
def __init__(self, node_name: str, failures: List[str]):
|
|
253
|
+
self.node_name = node_name
|
|
254
|
+
self.failures = failures
|
|
255
|
+
super().__init__(self._format_error())
|
|
256
|
+
|
|
257
|
+
def _format_error(self) -> str:
|
|
258
|
+
"""Format validation error."""
|
|
259
|
+
parts = [f"[X] Validation failed for node: {self.node_name}"]
|
|
260
|
+
parts.append("\n\n Failures:")
|
|
261
|
+
for failure in self.failures:
|
|
262
|
+
parts.append(f"\n * {failure}")
|
|
263
|
+
return "".join(parts)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class GateFailedError(OdibiException):
|
|
267
|
+
"""Quality gate check failed."""
|
|
268
|
+
|
|
269
|
+
def __init__(
|
|
270
|
+
self,
|
|
271
|
+
node_name: str,
|
|
272
|
+
pass_rate: float,
|
|
273
|
+
required_rate: float,
|
|
274
|
+
failed_rows: int,
|
|
275
|
+
total_rows: int,
|
|
276
|
+
failure_reasons: Optional[List[str]] = None,
|
|
277
|
+
):
|
|
278
|
+
self.node_name = node_name
|
|
279
|
+
self.pass_rate = pass_rate
|
|
280
|
+
self.required_rate = required_rate
|
|
281
|
+
self.failed_rows = failed_rows
|
|
282
|
+
self.total_rows = total_rows
|
|
283
|
+
self.failure_reasons = failure_reasons or []
|
|
284
|
+
super().__init__(self._format_error())
|
|
285
|
+
|
|
286
|
+
def _format_error(self) -> str:
|
|
287
|
+
"""Format gate failure error."""
|
|
288
|
+
parts = [f"[X] Quality gate failed for node: {self.node_name}"]
|
|
289
|
+
parts.append(f"\n\n Pass rate: {self.pass_rate:.1%} (required: {self.required_rate:.1%})")
|
|
290
|
+
parts.append(f"\n Failed rows: {self.failed_rows:,} / {self.total_rows:,}")
|
|
291
|
+
|
|
292
|
+
if self.failure_reasons:
|
|
293
|
+
parts.append("\n\n Reasons:")
|
|
294
|
+
for reason in self.failure_reasons:
|
|
295
|
+
parts.append(f"\n * {reason}")
|
|
296
|
+
|
|
297
|
+
return "".join(parts)
|