odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,429 @@
1
+ """Pipeline progress tracking with Rich visualization.
2
+
3
+ This module provides progress visualization for pipeline execution with
4
+ auto-detection of environment (CLI vs notebook) and graceful fallback
5
+ when Rich is not available.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from odibi.utils.console import is_rich_available, get_console, _is_notebook_environment
11
+
12
+
13
+ class NodeStatus:
14
+ """Status constants for node execution."""
15
+
16
+ PENDING = "pending"
17
+ RUNNING = "running"
18
+ SUCCESS = "success"
19
+ FAILED = "failed"
20
+ SKIPPED = "skipped"
21
+
22
+
23
+ class PipelineProgress:
24
+ """Progress tracker for pipeline execution.
25
+
26
+ Provides visual feedback during pipeline runs with auto-detection
27
+ of environment (CLI/notebook) and Rich availability.
28
+
29
+ Example:
30
+ >>> progress = PipelineProgress("my_pipeline", ["node1", "node2"])
31
+ >>> progress.start()
32
+ >>> progress.update_node("node1", NodeStatus.SUCCESS, duration=1.5, rows=1000)
33
+ >>> progress.update_node("node2", NodeStatus.FAILED, duration=0.5)
34
+ >>> progress.finish()
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ pipeline_name: str,
40
+ node_names: List[str],
41
+ engine: str = "pandas",
42
+ ) -> None:
43
+ """Initialize progress tracker.
44
+
45
+ Args:
46
+ pipeline_name: Name of the pipeline being executed.
47
+ node_names: List of node names in execution order.
48
+ engine: Engine type (pandas/spark).
49
+ """
50
+ self.pipeline_name = pipeline_name
51
+ self.node_names = node_names
52
+ self.engine = engine
53
+ self.is_notebook = _is_notebook_environment()
54
+ self.use_rich = is_rich_available()
55
+
56
+ self._node_statuses: Dict[str, Dict[str, Any]] = {
57
+ name: {"status": NodeStatus.PENDING, "duration": None, "rows": None}
58
+ for name in node_names
59
+ }
60
+ self._live: Optional[Any] = None
61
+ self._table: Optional[Any] = None
62
+ self._start_time: Optional[float] = None
63
+
64
+ def start(self) -> None:
65
+ """Start progress display."""
66
+ import time
67
+
68
+ self._start_time = time.time()
69
+
70
+ if self.use_rich:
71
+ self._start_rich()
72
+ else:
73
+ self._start_plain()
74
+
75
+ def _start_rich(self) -> None:
76
+ """Start Rich live display."""
77
+ from rich.live import Live
78
+
79
+ console = get_console()
80
+
81
+ header = self._create_header_panel()
82
+ console.print(header)
83
+
84
+ if not self.is_notebook:
85
+ self._table = self._create_progress_table()
86
+ self._live = Live(
87
+ self._table,
88
+ console=console,
89
+ refresh_per_second=4,
90
+ transient=True,
91
+ )
92
+ self._live.start()
93
+ else:
94
+ console.print(f"[dim]Executing {len(self.node_names)} nodes...[/dim]\n")
95
+
96
+ def _start_plain(self) -> None:
97
+ """Start plain text display."""
98
+ print(f"\n{'=' * 60}")
99
+ print(f" Pipeline: {self.pipeline_name}")
100
+ print(f" Engine: {self.engine}")
101
+ print(f" Nodes: {len(self.node_names)}")
102
+ print(f"{'=' * 60}\n")
103
+
104
+ def _create_header_panel(self) -> Any:
105
+ """Create the header panel."""
106
+ from rich.panel import Panel
107
+ from rich.text import Text
108
+
109
+ header_text = Text()
110
+ header_text.append("Pipeline: ", style="dim")
111
+ header_text.append(f"{self.pipeline_name}\n", style="bold cyan")
112
+ header_text.append("Engine: ", style="dim")
113
+ header_text.append(f"{self.engine} ", style="green")
114
+ header_text.append("Nodes: ", style="dim")
115
+ header_text.append(f"{len(self.node_names)}", style="yellow")
116
+
117
+ return Panel(
118
+ header_text,
119
+ title="[bold]Odibi Pipeline[/bold]",
120
+ border_style="blue",
121
+ padding=(0, 2),
122
+ )
123
+
124
+ def _create_progress_table(self) -> Any:
125
+ """Create the progress table."""
126
+ from rich.table import Table
127
+
128
+ table = Table(
129
+ show_header=True,
130
+ header_style="bold",
131
+ box=None,
132
+ padding=(0, 1),
133
+ )
134
+ table.add_column("Node", style="cyan", min_width=30)
135
+ table.add_column("Status", justify="center", min_width=10)
136
+ table.add_column("Duration", justify="right", min_width=10)
137
+ table.add_column("Rows", justify="right", min_width=12)
138
+
139
+ for name in self.node_names:
140
+ info = self._node_statuses[name]
141
+ status_str = self._format_status(info["status"])
142
+ duration_str = self._format_duration(info["duration"])
143
+ rows_str = self._format_rows(info["rows"])
144
+ table.add_row(name, status_str, duration_str, rows_str)
145
+
146
+ return table
147
+
148
+ def _format_status(self, status: str) -> str:
149
+ """Format status with Rich markup."""
150
+ status_map = {
151
+ NodeStatus.PENDING: "[dim]○ pending[/dim]",
152
+ NodeStatus.RUNNING: "[yellow]◉ running[/yellow]",
153
+ NodeStatus.SUCCESS: "[green]✓ success[/green]",
154
+ NodeStatus.FAILED: "[red]✗ failed[/red]",
155
+ NodeStatus.SKIPPED: "[dim]⏭ skipped[/dim]",
156
+ }
157
+ return status_map.get(status, status)
158
+
159
+ def _format_status_plain(self, status: str) -> str:
160
+ """Format status for plain text."""
161
+ status_map = {
162
+ NodeStatus.PENDING: "○ pending",
163
+ NodeStatus.RUNNING: "◉ running",
164
+ NodeStatus.SUCCESS: "✓ success",
165
+ NodeStatus.FAILED: "✗ failed",
166
+ NodeStatus.SKIPPED: "⏭ skipped",
167
+ }
168
+ return status_map.get(status, status)
169
+
170
+ def _format_duration(self, duration: Optional[float]) -> str:
171
+ """Format duration value."""
172
+ if duration is None:
173
+ return "-"
174
+ if duration < 1:
175
+ return f"{duration * 1000:.0f}ms"
176
+ return f"{duration:.2f}s"
177
+
178
+ def _format_rows(self, rows: Optional[int]) -> str:
179
+ """Format row count."""
180
+ if rows is None:
181
+ return "-"
182
+ if rows >= 1_000_000:
183
+ return f"{rows / 1_000_000:.1f}M"
184
+ if rows >= 1_000:
185
+ return f"{rows / 1_000:.1f}K"
186
+ return str(rows)
187
+
188
+ def update_node(
189
+ self,
190
+ name: str,
191
+ status: str,
192
+ duration: Optional[float] = None,
193
+ rows: Optional[int] = None,
194
+ phase_timings: Optional[Dict[str, float]] = None,
195
+ ) -> None:
196
+ """Update node status.
197
+
198
+ Args:
199
+ name: Node name.
200
+ status: Status from NodeStatus constants.
201
+ duration: Execution duration in seconds.
202
+ rows: Number of rows processed.
203
+ phase_timings: Optional dict of phase name -> duration in ms.
204
+ """
205
+ if name not in self._node_statuses:
206
+ return
207
+
208
+ self._node_statuses[name] = {
209
+ "status": status,
210
+ "duration": duration,
211
+ "rows": rows,
212
+ "phase_timings": phase_timings,
213
+ }
214
+
215
+ if self.use_rich:
216
+ self._update_rich(name, status, duration, rows)
217
+ else:
218
+ self._update_plain(name, status, duration, rows)
219
+
220
+ def _update_rich(
221
+ self,
222
+ name: str,
223
+ status: str,
224
+ duration: Optional[float],
225
+ rows: Optional[int],
226
+ ) -> None:
227
+ """Update Rich display."""
228
+ if self._live and not self.is_notebook:
229
+ self._table = self._create_progress_table()
230
+ self._live.update(self._table)
231
+ elif self.is_notebook:
232
+ console = get_console()
233
+ status_str = self._format_status(status)
234
+ duration_str = self._format_duration(duration)
235
+ rows_str = self._format_rows(rows)
236
+ console.print(f" {name}: {status_str} ({duration_str}, {rows_str} rows)")
237
+
238
+ def _update_plain(
239
+ self,
240
+ name: str,
241
+ status: str,
242
+ duration: Optional[float],
243
+ rows: Optional[int],
244
+ ) -> None:
245
+ """Update plain text display."""
246
+ status_str = self._format_status_plain(status)
247
+ duration_str = self._format_duration(duration)
248
+ rows_str = self._format_rows(rows)
249
+ print(f" {name}: {status_str} ({duration_str}, {rows_str} rows)")
250
+
251
+ def finish(
252
+ self,
253
+ completed: int = 0,
254
+ failed: int = 0,
255
+ skipped: int = 0,
256
+ duration: Optional[float] = None,
257
+ ) -> None:
258
+ """Finish progress display and show summary.
259
+
260
+ Args:
261
+ completed: Number of completed nodes.
262
+ failed: Number of failed nodes.
263
+ skipped: Number of skipped nodes.
264
+ duration: Total pipeline duration in seconds.
265
+ """
266
+ if self._live:
267
+ self._live.stop()
268
+ self._live = None
269
+
270
+ import time
271
+
272
+ total_duration = duration or ((time.time() - self._start_time) if self._start_time else 0)
273
+
274
+ if self.use_rich:
275
+ self._finish_rich(completed, failed, skipped, total_duration)
276
+ else:
277
+ self._finish_plain(completed, failed, skipped, total_duration)
278
+
279
+ def _finish_rich(
280
+ self,
281
+ completed: int,
282
+ failed: int,
283
+ skipped: int,
284
+ duration: float,
285
+ ) -> None:
286
+ """Finish with Rich summary."""
287
+ from rich.panel import Panel
288
+ from rich.text import Text
289
+
290
+ console = get_console()
291
+
292
+ final_table = self._create_progress_table()
293
+ console.print(final_table)
294
+ console.print()
295
+
296
+ status = "[green]SUCCESS[/green]" if failed == 0 else "[red]FAILED[/red]"
297
+ summary = Text()
298
+ summary.append("Status: ")
299
+ summary.append_text(Text.from_markup(status))
300
+ summary.append("\n")
301
+ summary.append("Duration: ", style="dim")
302
+ summary.append(f"{duration:.2f}s\n")
303
+ summary.append("Completed: ", style="dim")
304
+ summary.append(f"{completed}", style="green")
305
+ if failed > 0:
306
+ summary.append(" Failed: ", style="dim")
307
+ summary.append(f"{failed}", style="red")
308
+ if skipped > 0:
309
+ summary.append(" Skipped: ", style="dim")
310
+ summary.append(f"{skipped}", style="yellow")
311
+
312
+ panel_style = "green" if failed == 0 else "red"
313
+ panel = Panel(
314
+ summary,
315
+ title="[bold]Pipeline Complete[/bold]",
316
+ border_style=panel_style,
317
+ padding=(0, 2),
318
+ )
319
+ console.print(panel)
320
+
321
+ def _finish_plain(
322
+ self,
323
+ completed: int,
324
+ failed: int,
325
+ skipped: int,
326
+ duration: float,
327
+ ) -> None:
328
+ """Finish with plain text summary."""
329
+ status = "SUCCESS" if failed == 0 else "FAILED"
330
+ print(f"\n{'=' * 60}")
331
+ print(f" Pipeline: {status}")
332
+ print(f" Duration: {duration:.2f}s")
333
+ print(f" Completed: {completed}, Failed: {failed}, Skipped: {skipped}")
334
+ print(f"{'=' * 60}\n")
335
+
336
+ def get_phase_timing_summary(self) -> Dict[str, Dict[str, float]]:
337
+ """Get phase timing breakdown for all nodes.
338
+
339
+ Returns:
340
+ Dict mapping node names to their phase timings (in ms).
341
+ """
342
+ return {
343
+ name: info.get("phase_timings", {})
344
+ for name, info in self._node_statuses.items()
345
+ if info.get("phase_timings")
346
+ }
347
+
348
+ def get_aggregate_phase_timings(self) -> Dict[str, float]:
349
+ """Get max phase timings across all nodes (bottleneck per phase).
350
+
351
+ Returns:
352
+ Dict mapping phase names to max time spent by any node (in ms).
353
+ """
354
+ max_timings: Dict[str, float] = {}
355
+ for info in self._node_statuses.values():
356
+ phase_timings = info.get("phase_timings") or {}
357
+ for phase, duration_ms in phase_timings.items():
358
+ max_timings[phase] = max(max_timings.get(phase, 0), duration_ms)
359
+ return {k: round(v, 2) for k, v in max_timings.items()}
360
+
361
+ def print_phase_timing_report(self, pipeline_duration_s: Optional[float] = None) -> None:
362
+ """Print a detailed phase timing report.
363
+
364
+ Args:
365
+ pipeline_duration_s: Actual pipeline wall-clock duration in seconds.
366
+ Used for percentage calculations. Falls back to sum of max phases.
367
+ """
368
+ aggregate = self.get_aggregate_phase_timings()
369
+ if not aggregate:
370
+ return
371
+
372
+ # Use actual pipeline duration for percentage, or fall back to sum of max phases
373
+ if pipeline_duration_s is not None:
374
+ total_ms = pipeline_duration_s * 1000
375
+ else:
376
+ total_ms = sum(aggregate.values())
377
+
378
+ if self.use_rich:
379
+ self._print_phase_timing_rich(aggregate, total_ms)
380
+ else:
381
+ self._print_phase_timing_plain(aggregate, total_ms)
382
+
383
+ def _print_phase_timing_rich(self, aggregate: Dict[str, float], total_ms: float) -> None:
384
+ """Print phase timing report with Rich."""
385
+ from rich.panel import Panel
386
+ from rich.table import Table
387
+
388
+ console = get_console()
389
+
390
+ table = Table(
391
+ show_header=True,
392
+ header_style="bold",
393
+ box=None,
394
+ padding=(0, 1),
395
+ )
396
+ table.add_column("Phase", style="cyan")
397
+ table.add_column("Slowest", justify="right")
398
+ table.add_column("% of Pipeline", justify="right")
399
+
400
+ # Sort by time descending
401
+ sorted_phases = sorted(aggregate.items(), key=lambda x: x[1], reverse=True)
402
+
403
+ for phase, duration_ms in sorted_phases:
404
+ pct = (duration_ms / total_ms * 100) if total_ms > 0 else 0
405
+ duration_str = (
406
+ f"{duration_ms:.0f}ms" if duration_ms < 1000 else f"{duration_ms / 1000:.2f}s"
407
+ )
408
+ table.add_row(phase, duration_str, f"{pct:.1f}%")
409
+
410
+ panel = Panel(
411
+ table,
412
+ title="[bold]Phase Bottlenecks (slowest node per phase)[/bold]",
413
+ border_style="dim",
414
+ padding=(0, 1),
415
+ )
416
+ console.print(panel)
417
+
418
+ def _print_phase_timing_plain(self, aggregate: Dict[str, float], total_ms: float) -> None:
419
+ """Print phase timing report in plain text."""
420
+ print("\n--- Phase Bottlenecks (slowest node per phase) ---")
421
+ sorted_phases = sorted(aggregate.items(), key=lambda x: x[1], reverse=True)
422
+
423
+ for phase, duration_ms in sorted_phases:
424
+ pct = (duration_ms / total_ms * 100) if total_ms > 0 else 0
425
+ duration_str = (
426
+ f"{duration_ms:.0f}ms" if duration_ms < 1000 else f"{duration_ms / 1000:.2f}s"
427
+ )
428
+ print(f" {phase}: {duration_str} ({pct:.1f}% of pipeline)")
429
+ print("-" * 48 + "\n")