odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/enums.py ADDED
@@ -0,0 +1,7 @@
1
+ from enum import Enum
2
+
3
+
4
+ class EngineType(str, Enum):
5
+ PANDAS = "pandas"
6
+ SPARK = "spark"
7
+ POLARS = "polars"
odibi/exceptions.py ADDED
@@ -0,0 +1,297 @@
1
+ """Custom exceptions for ODIBI framework."""
2
+
3
+ from typing import List, Optional, Tuple
4
+
5
+
6
+ class OdibiException(Exception):
7
+ """Base exception for all ODIBI errors."""
8
+
9
+ pass
10
+
11
+
12
+ class ConfigValidationError(OdibiException):
13
+ """Configuration validation failed."""
14
+
15
+ def __init__(self, message: str, file: Optional[str] = None, line: Optional[int] = None):
16
+ self.message = message
17
+ self.file = file
18
+ self.line = line
19
+ super().__init__(self._format_error())
20
+
21
+ def _format_error(self) -> str:
22
+ """Format error message with location info."""
23
+ parts = ["Configuration validation error"]
24
+ if self.file:
25
+ parts.append(f"\n File: {self.file}")
26
+ if self.line:
27
+ parts.append(f"\n Line: {self.line}")
28
+ parts.append(f"\n Error: {self.message}")
29
+ return "".join(parts)
30
+
31
+
32
+ class ConnectionError(OdibiException):
33
+ """Connection failed or invalid."""
34
+
35
+ def __init__(self, connection_name: str, reason: str, suggestions: Optional[List[str]] = None):
36
+ self.connection_name = connection_name
37
+ self.reason = reason
38
+ self.suggestions = suggestions or []
39
+ super().__init__(self._format_error())
40
+
41
+ def _format_error(self) -> str:
42
+ """Format connection error with suggestions."""
43
+ parts = [
44
+ f"[X] Connection validation failed: {self.connection_name}",
45
+ f"\n Reason: {self.reason}",
46
+ ]
47
+
48
+ if self.suggestions:
49
+ parts.append("\n\n Suggestions:")
50
+ for i, suggestion in enumerate(self.suggestions, 1):
51
+ parts.append(f"\n {i}. {suggestion}")
52
+
53
+ return "".join(parts)
54
+
55
+
56
+ class DependencyError(OdibiException):
57
+ """Dependency graph error (cycles, missing nodes, etc.)."""
58
+
59
+ def __init__(self, message: str, cycle: Optional[List[str]] = None):
60
+ self.message = message
61
+ self.cycle = cycle
62
+ super().__init__(self._format_error())
63
+
64
+ def _format_error(self) -> str:
65
+ """Format dependency error."""
66
+ parts = [f"[X] Dependency error: {self.message}"]
67
+
68
+ if self.cycle:
69
+ parts.append("\n Cycle detected: " + " -> ".join(self.cycle))
70
+
71
+ return "".join(parts)
72
+
73
+
74
+ class ExecutionContext:
75
+ """Runtime context for error reporting."""
76
+
77
+ def __init__(
78
+ self,
79
+ node_name: str,
80
+ config_file: Optional[str] = None,
81
+ config_line: Optional[int] = None,
82
+ step_index: Optional[int] = None,
83
+ total_steps: Optional[int] = None,
84
+ input_schema: Optional[List[str]] = None,
85
+ input_shape: Optional[tuple] = None,
86
+ previous_steps: Optional[List[str]] = None,
87
+ ):
88
+ self.node_name = node_name
89
+ self.config_file = config_file
90
+ self.config_line = config_line
91
+ self.step_index = step_index
92
+ self.total_steps = total_steps
93
+ self.input_schema = input_schema or []
94
+ self.input_shape = input_shape
95
+ self.previous_steps = previous_steps or []
96
+
97
+
98
+ class NodeExecutionError(OdibiException):
99
+ """Node execution failed."""
100
+
101
+ def __init__(
102
+ self,
103
+ message: str,
104
+ context: ExecutionContext,
105
+ original_error: Optional[Exception] = None,
106
+ suggestions: Optional[List[str]] = None,
107
+ story_path: Optional[str] = None,
108
+ ):
109
+ self.message = message
110
+ self.context = context
111
+ self.original_error = original_error
112
+ self.suggestions = suggestions or []
113
+ self.story_path = story_path
114
+ super().__init__(self._format_error())
115
+
116
+ def _clean_spark_error(self, error: Exception) -> Tuple[str, str]:
117
+ """Extract clean error message from Spark exception.
118
+
119
+ Args:
120
+ error: Original exception
121
+
122
+ Returns:
123
+ (Clean Message, Error Type)
124
+ """
125
+ error_type = type(error).__name__
126
+ msg = str(error)
127
+
128
+ # Check for Py4J Java Error
129
+ if "Py4JJavaError" in error_type:
130
+ # The message usually contains the full Java stack trace
131
+ # We want to find the actual exception message, usually after the first line
132
+ # or specific patterns like "AnalysisException: ..."
133
+
134
+ # Try to find the Java Exception class name
135
+ import re
136
+
137
+ # Match patterns like "org.apache.spark.sql.AnalysisException: ..."
138
+ # or just "AnalysisException: ..." at start of line
139
+
140
+ # Common Spark Exceptions to look for
141
+ patterns = [
142
+ r"org\.apache\.spark\.sql\.AnalysisException: (.*)",
143
+ r"org\.apache\.spark\.sql\.catalyst\.parser\.ParseException: (.*)",
144
+ r"java\.io\.FileNotFoundException: (.*)",
145
+ r"java\.lang\.IllegalArgumentException: (.*)",
146
+ r"org\.apache\.hadoop\.mapred\.InvalidInputException: (.*)",
147
+ # Catch-all for simple class names
148
+ r"([a-zA-Z0-9]+Exception): (.*)",
149
+ ]
150
+
151
+ for pattern in patterns:
152
+ match = re.search(pattern, msg)
153
+ if match:
154
+ # Found a clean message
155
+ clean_msg = (
156
+ match.group(1)
157
+ if len(match.groups()) == 1
158
+ else f"{match.group(1)}: {match.group(2)}"
159
+ )
160
+ # If it's the generic catch-all, simplify the type
161
+ if len(match.groups()) == 2:
162
+ error_type = match.group(1).split(".")[-1]
163
+ clean_msg = match.group(2)
164
+ else:
165
+ # Try to guess type from the pattern we matched?
166
+ # For named patterns, we know the type
167
+ if "AnalysisException" in pattern:
168
+ error_type = "AnalysisException"
169
+ elif "ParseException" in pattern:
170
+ error_type = "ParseException"
171
+ elif "FileNotFoundException" in pattern:
172
+ error_type = "FileNotFoundException"
173
+
174
+ return clean_msg.strip(), error_type
175
+
176
+ # Fallback: If we can't parse it, take the first few lines before the stack trace
177
+ # Py4J errors usually start with:
178
+ # "An error occurred while calling o46.save.\n: java.lang.IllegalArgumentException: ..."
179
+ lines = msg.split("\n")
180
+ for line in lines:
181
+ if line.strip().startswith(": java.") or line.strip().startswith(": org."):
182
+ # Found the java exception line
183
+ parts = line.split(":", 2) # : java.lang.Exception: Message
184
+ if len(parts) >= 3:
185
+ error_type = parts[1].split(".")[-1].strip()
186
+ return parts[2].strip(), error_type
187
+
188
+ return msg, error_type
189
+
190
+ def _format_error(self) -> str:
191
+ """Generate rich error message with context."""
192
+ parts = [f"[X] Node execution failed: {self.context.node_name}"]
193
+
194
+ # Location info
195
+ if self.context.config_file:
196
+ parts.append(f"\n Location: {self.context.config_file}")
197
+ if self.context.config_line:
198
+ parts.append(f":{self.context.config_line}")
199
+
200
+ # Step info
201
+ if self.context.step_index is not None and self.context.total_steps:
202
+ parts.append(f"\n Step: {self.context.step_index + 1} of {self.context.total_steps}")
203
+
204
+ # Error message
205
+ # CLEAN THE ERROR HERE
206
+ if self.original_error:
207
+ clean_msg, clean_type = self._clean_spark_error(self.original_error)
208
+ # If we successfully cleaned it (message is shorter than original), use it
209
+ if len(clean_msg) < len(str(self.original_error)):
210
+ parts.append(f"\n\n Error: {clean_msg}")
211
+ parts.append(f"\n Type: {clean_type}")
212
+ else:
213
+ parts.append(f"\n\n Error: {self.message}")
214
+ parts.append(f"\n Type: {type(self.original_error).__name__}")
215
+ else:
216
+ parts.append(f"\n\n Error: {self.message}")
217
+
218
+ # Context information
219
+ if self.context.input_schema:
220
+ parts.append(f"\n\n Available columns: {self.context.input_schema}")
221
+
222
+ if self.context.input_shape:
223
+ parts.append(f"\n Input shape: {self.context.input_shape}")
224
+
225
+ if self.context.previous_steps:
226
+ parts.append("\n\n Previous steps:")
227
+ for step in self.context.previous_steps:
228
+ parts.append(f"\n - {step}")
229
+
230
+ # Suggestions
231
+ if self.suggestions:
232
+ parts.append("\n\n Suggestions:")
233
+ for i, suggestion in enumerate(self.suggestions, 1):
234
+ parts.append(f"\n {i}. {suggestion}")
235
+
236
+ # Story reference
237
+ if self.story_path:
238
+ parts.append(f"\n\n Story: {self.story_path}")
239
+
240
+ return "".join(parts)
241
+
242
+
243
+ class TransformError(OdibiException):
244
+ """Transform step failed."""
245
+
246
+ pass
247
+
248
+
249
+ class ValidationError(OdibiException):
250
+ """Data validation failed."""
251
+
252
+ def __init__(self, node_name: str, failures: List[str]):
253
+ self.node_name = node_name
254
+ self.failures = failures
255
+ super().__init__(self._format_error())
256
+
257
+ def _format_error(self) -> str:
258
+ """Format validation error."""
259
+ parts = [f"[X] Validation failed for node: {self.node_name}"]
260
+ parts.append("\n\n Failures:")
261
+ for failure in self.failures:
262
+ parts.append(f"\n * {failure}")
263
+ return "".join(parts)
264
+
265
+
266
+ class GateFailedError(OdibiException):
267
+ """Quality gate check failed."""
268
+
269
+ def __init__(
270
+ self,
271
+ node_name: str,
272
+ pass_rate: float,
273
+ required_rate: float,
274
+ failed_rows: int,
275
+ total_rows: int,
276
+ failure_reasons: Optional[List[str]] = None,
277
+ ):
278
+ self.node_name = node_name
279
+ self.pass_rate = pass_rate
280
+ self.required_rate = required_rate
281
+ self.failed_rows = failed_rows
282
+ self.total_rows = total_rows
283
+ self.failure_reasons = failure_reasons or []
284
+ super().__init__(self._format_error())
285
+
286
+ def _format_error(self) -> str:
287
+ """Format gate failure error."""
288
+ parts = [f"[X] Quality gate failed for node: {self.node_name}"]
289
+ parts.append(f"\n\n Pass rate: {self.pass_rate:.1%} (required: {self.required_rate:.1%})")
290
+ parts.append(f"\n Failed rows: {self.failed_rows:,} / {self.total_rows:,}")
291
+
292
+ if self.failure_reasons:
293
+ parts.append("\n\n Reasons:")
294
+ for reason in self.failure_reasons:
295
+ parts.append(f"\n * {reason}")
296
+
297
+ return "".join(parts)