odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/cli/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """
2
+ Command-line interface for Odibi.
3
+
4
+ Available commands:
5
+ - run: Execute a pipeline from YAML config
6
+ - validate: Validate YAML config without execution
7
+ """
8
+
9
+ from odibi.cli.main import main
10
+
11
+ __all__ = ["main"]
odibi/cli/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for running the CLI as a module."""
2
+
3
+ from odibi.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
odibi/cli/catalog.py ADDED
@@ -0,0 +1,553 @@
1
+ """Catalog CLI command for querying the System Catalog."""
2
+
3
+ import json
4
+ from datetime import datetime, timedelta, timezone
5
+ from pathlib import Path
6
+
7
+ from odibi.pipeline import PipelineManager
8
+ from odibi.utils.extensions import load_extensions
9
+ from odibi.utils.logging import logger
10
+
11
+
12
+ def add_catalog_parser(subparsers):
13
+ """Add catalog subcommand parser."""
14
+ catalog_parser = subparsers.add_parser(
15
+ "catalog",
16
+ help="Query System Catalog metadata",
17
+ description="Query and explore the System Catalog (runs, pipelines, nodes, state, etc.)",
18
+ )
19
+
20
+ catalog_subparsers = catalog_parser.add_subparsers(
21
+ dest="catalog_command", help="Catalog commands"
22
+ )
23
+
24
+ # odibi catalog runs
25
+ runs_parser = catalog_subparsers.add_parser("runs", help="List execution runs from meta_runs")
26
+ runs_parser.add_argument("config", help="Path to YAML config file")
27
+ runs_parser.add_argument("--pipeline", "-p", help="Filter by pipeline name")
28
+ runs_parser.add_argument("--node", "-n", help="Filter by node name")
29
+ runs_parser.add_argument(
30
+ "--status", "-s", choices=["SUCCESS", "FAILED", "RUNNING"], help="Filter by status"
31
+ )
32
+ runs_parser.add_argument(
33
+ "--days", "-d", type=int, default=7, help="Show runs from last N days (default: 7)"
34
+ )
35
+ runs_parser.add_argument(
36
+ "--limit", "-l", type=int, default=20, help="Maximum number of runs to show (default: 20)"
37
+ )
38
+ runs_parser.add_argument(
39
+ "--format",
40
+ "-f",
41
+ choices=["table", "json"],
42
+ default="table",
43
+ help="Output format (default: table)",
44
+ )
45
+
46
+ # odibi catalog pipelines
47
+ pipelines_parser = catalog_subparsers.add_parser(
48
+ "pipelines", help="List registered pipelines from meta_pipelines"
49
+ )
50
+ pipelines_parser.add_argument("config", help="Path to YAML config file")
51
+ pipelines_parser.add_argument(
52
+ "--format",
53
+ "-f",
54
+ choices=["table", "json"],
55
+ default="table",
56
+ help="Output format (default: table)",
57
+ )
58
+
59
+ # odibi catalog nodes
60
+ nodes_parser = catalog_subparsers.add_parser(
61
+ "nodes", help="List registered nodes from meta_nodes"
62
+ )
63
+ nodes_parser.add_argument("config", help="Path to YAML config file")
64
+ nodes_parser.add_argument("--pipeline", "-p", help="Filter by pipeline name")
65
+ nodes_parser.add_argument(
66
+ "--format",
67
+ "-f",
68
+ choices=["table", "json"],
69
+ default="table",
70
+ help="Output format (default: table)",
71
+ )
72
+
73
+ # odibi catalog state
74
+ state_parser = catalog_subparsers.add_parser("state", help="List HWM state from meta_state")
75
+ state_parser.add_argument("config", help="Path to YAML config file")
76
+ state_parser.add_argument("--pipeline", "-p", help="Filter by pipeline name")
77
+ state_parser.add_argument(
78
+ "--format",
79
+ "-f",
80
+ choices=["table", "json"],
81
+ default="table",
82
+ help="Output format (default: table)",
83
+ )
84
+
85
+ # odibi catalog tables
86
+ tables_parser = catalog_subparsers.add_parser(
87
+ "tables", help="List registered assets from meta_tables"
88
+ )
89
+ tables_parser.add_argument("config", help="Path to YAML config file")
90
+ tables_parser.add_argument("--project", help="Filter by project name")
91
+ tables_parser.add_argument(
92
+ "--format",
93
+ "-f",
94
+ choices=["table", "json"],
95
+ default="table",
96
+ help="Output format (default: table)",
97
+ )
98
+
99
+ # odibi catalog metrics
100
+ metrics_parser = catalog_subparsers.add_parser(
101
+ "metrics", help="List metrics definitions from meta_metrics"
102
+ )
103
+ metrics_parser.add_argument("config", help="Path to YAML config file")
104
+ metrics_parser.add_argument(
105
+ "--format",
106
+ "-f",
107
+ choices=["table", "json"],
108
+ default="table",
109
+ help="Output format (default: table)",
110
+ )
111
+
112
+ # odibi catalog patterns
113
+ patterns_parser = catalog_subparsers.add_parser(
114
+ "patterns", help="List pattern compliance from meta_patterns"
115
+ )
116
+ patterns_parser.add_argument("config", help="Path to YAML config file")
117
+ patterns_parser.add_argument(
118
+ "--format",
119
+ "-f",
120
+ choices=["table", "json"],
121
+ default="table",
122
+ help="Output format (default: table)",
123
+ )
124
+
125
+ # odibi catalog stats
126
+ stats_parser = catalog_subparsers.add_parser("stats", help="Show execution statistics")
127
+ stats_parser.add_argument("config", help="Path to YAML config file")
128
+ stats_parser.add_argument("--pipeline", "-p", help="Filter by pipeline name")
129
+ stats_parser.add_argument(
130
+ "--days", "-d", type=int, default=7, help="Statistics over last N days (default: 7)"
131
+ )
132
+
133
+ return catalog_parser
134
+
135
+
136
+ def catalog_command(args):
137
+ """Execute catalog command."""
138
+ if not hasattr(args, "catalog_command") or args.catalog_command is None:
139
+ print("Usage: odibi catalog <command>")
140
+ print("\nAvailable commands:")
141
+ print(" runs List execution runs")
142
+ print(" pipelines List registered pipelines")
143
+ print(" nodes List registered nodes")
144
+ print(" state List HWM state checkpoints")
145
+ print(" tables List registered assets")
146
+ print(" metrics List metrics definitions")
147
+ print(" patterns List pattern compliance")
148
+ print(" stats Show execution statistics")
149
+ return 1
150
+
151
+ command_map = {
152
+ "runs": _runs_command,
153
+ "pipelines": _pipelines_command,
154
+ "nodes": _nodes_command,
155
+ "state": _state_command,
156
+ "tables": _tables_command,
157
+ "metrics": _metrics_command,
158
+ "patterns": _patterns_command,
159
+ "stats": _stats_command,
160
+ }
161
+
162
+ handler = command_map.get(args.catalog_command)
163
+ if handler:
164
+ return handler(args)
165
+ else:
166
+ print(f"Unknown catalog command: {args.catalog_command}")
167
+ return 1
168
+
169
+
170
+ def _get_catalog_manager(args):
171
+ """Load config and return catalog manager."""
172
+ try:
173
+ config_path = Path(args.config).resolve()
174
+
175
+ load_extensions(config_path.parent)
176
+ if config_path.parent.parent != config_path.parent:
177
+ load_extensions(config_path.parent.parent)
178
+ if config_path.parent != Path.cwd():
179
+ load_extensions(Path.cwd())
180
+
181
+ manager = PipelineManager.from_yaml(args.config)
182
+
183
+ if not manager.catalog_manager:
184
+ logger.error("System Catalog not configured. Add 'system' section to config.")
185
+ return None
186
+
187
+ return manager.catalog_manager
188
+
189
+ except Exception as e:
190
+ logger.error(f"Failed to load configuration: {e}")
191
+ return None
192
+
193
+
194
+ def _format_table(headers: list, rows: list, max_width: int = 40) -> str:
195
+ """Format data as ASCII table."""
196
+ if not rows:
197
+ return "No data found."
198
+
199
+ def truncate(val, width):
200
+ s = str(val) if val is not None else ""
201
+ if len(s) > width:
202
+ return s[: width - 3] + "..."
203
+ return s
204
+
205
+ col_widths = []
206
+ for i, header in enumerate(headers):
207
+ max_col = len(header)
208
+ for row in rows:
209
+ if i < len(row):
210
+ max_col = max(max_col, min(len(str(row[i] or "")), max_width))
211
+ col_widths.append(min(max_col, max_width))
212
+
213
+ header_line = " | ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
214
+ separator = "-+-".join("-" * w for w in col_widths)
215
+
216
+ lines = [header_line, separator]
217
+ for row in rows:
218
+ row_line = " | ".join(
219
+ truncate(row[i] if i < len(row) else "", col_widths[i]).ljust(col_widths[i])
220
+ for i in range(len(headers))
221
+ )
222
+ lines.append(row_line)
223
+
224
+ return "\n".join(lines)
225
+
226
+
227
+ def _format_output(headers: list, rows: list, output_format: str) -> str:
228
+ """Format output as table or JSON."""
229
+ if output_format == "json":
230
+ data = [dict(zip(headers, row)) for row in rows]
231
+ return json.dumps(data, indent=2, default=str)
232
+ else:
233
+ return _format_table(headers, rows)
234
+
235
+
236
+ def _runs_command(args) -> int:
237
+ """List execution runs."""
238
+ catalog = _get_catalog_manager(args)
239
+ if not catalog:
240
+ return 1
241
+
242
+ try:
243
+ df = catalog._read_local_table(catalog.tables["meta_runs"])
244
+
245
+ if df.empty:
246
+ print("No runs found in catalog.")
247
+ return 0
248
+
249
+ cutoff = datetime.now(timezone.utc) - timedelta(days=args.days)
250
+
251
+ if "timestamp" in df.columns:
252
+ import pandas as pd
253
+
254
+ if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
255
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
256
+
257
+ if df["timestamp"].dt.tz is None:
258
+ df["timestamp"] = df["timestamp"].dt.tz_localize("UTC")
259
+
260
+ df = df[df["timestamp"] >= cutoff]
261
+
262
+ if args.pipeline and "pipeline_name" in df.columns:
263
+ df = df[df["pipeline_name"] == args.pipeline]
264
+
265
+ if args.node and "node_name" in df.columns:
266
+ df = df[df["node_name"] == args.node]
267
+
268
+ if args.status and "status" in df.columns:
269
+ df = df[df["status"] == args.status]
270
+
271
+ if "timestamp" in df.columns:
272
+ df = df.sort_values("timestamp", ascending=False)
273
+
274
+ df = df.head(args.limit)
275
+
276
+ headers = [
277
+ "run_id",
278
+ "pipeline_name",
279
+ "node_name",
280
+ "status",
281
+ "rows",
282
+ "duration_ms",
283
+ "timestamp",
284
+ ]
285
+ available_cols = [c for c in headers if c in df.columns]
286
+
287
+ if "rows" in available_cols:
288
+ available_cols[available_cols.index("rows")] = "rows_processed"
289
+ headers[headers.index("rows")] = "rows_processed"
290
+
291
+ rows = df[available_cols].values.tolist() if available_cols else []
292
+
293
+ print(_format_output(headers, rows, args.format))
294
+ print(f"\nShowing {len(rows)} runs from the last {args.days} days.")
295
+ return 0
296
+
297
+ except Exception as e:
298
+ logger.error(f"Failed to query runs: {e}")
299
+ return 1
300
+
301
+
302
+ def _pipelines_command(args) -> int:
303
+ """List registered pipelines."""
304
+ catalog = _get_catalog_manager(args)
305
+ if not catalog:
306
+ return 1
307
+
308
+ try:
309
+ df = catalog._read_local_table(catalog.tables["meta_pipelines"])
310
+
311
+ if df.empty:
312
+ print("No pipelines registered in catalog. Run 'odibi deploy' first.")
313
+ return 0
314
+
315
+ headers = ["pipeline_name", "layer", "description", "version_hash", "updated_at"]
316
+ available_cols = [c for c in headers if c in df.columns]
317
+
318
+ if "updated_at" in df.columns:
319
+ df = df.sort_values("updated_at", ascending=False)
320
+
321
+ rows = df[available_cols].values.tolist() if available_cols else []
322
+
323
+ print(_format_output(headers, rows, args.format))
324
+ print(f"\n{len(rows)} pipeline(s) registered.")
325
+ return 0
326
+
327
+ except Exception as e:
328
+ logger.error(f"Failed to query pipelines: {e}")
329
+ return 1
330
+
331
+
332
+ def _nodes_command(args) -> int:
333
+ """List registered nodes."""
334
+ catalog = _get_catalog_manager(args)
335
+ if not catalog:
336
+ return 1
337
+
338
+ try:
339
+ df = catalog._read_local_table(catalog.tables["meta_nodes"])
340
+
341
+ if df.empty:
342
+ print("No nodes registered in catalog. Run 'odibi deploy' first.")
343
+ return 0
344
+
345
+ if args.pipeline and "pipeline_name" in df.columns:
346
+ df = df[df["pipeline_name"] == args.pipeline]
347
+
348
+ headers = ["pipeline_name", "node_name", "type", "version_hash", "updated_at"]
349
+ available_cols = [c for c in headers if c in df.columns]
350
+
351
+ if "updated_at" in df.columns:
352
+ df = df.sort_values("updated_at", ascending=False)
353
+
354
+ rows = df[available_cols].values.tolist() if available_cols else []
355
+
356
+ print(_format_output(headers, rows, args.format))
357
+ print(f"\n{len(rows)} node(s) registered.")
358
+ return 0
359
+
360
+ except Exception as e:
361
+ logger.error(f"Failed to query nodes: {e}")
362
+ return 1
363
+
364
+
365
+ def _state_command(args) -> int:
366
+ """List HWM state checkpoints."""
367
+ catalog = _get_catalog_manager(args)
368
+ if not catalog:
369
+ return 1
370
+
371
+ try:
372
+ df = catalog._read_local_table(catalog.tables["meta_state"])
373
+
374
+ if df.empty:
375
+ print("No state checkpoints found in catalog.")
376
+ return 0
377
+
378
+ if args.pipeline and "key" in df.columns:
379
+ df = df[df["key"].str.contains(args.pipeline, na=False)]
380
+
381
+ headers = ["key", "value", "updated_at"]
382
+ available_cols = [c for c in headers if c in df.columns]
383
+
384
+ rows = df[available_cols].values.tolist() if available_cols else []
385
+
386
+ print(_format_output(headers, rows, args.format))
387
+ print(f"\n{len(rows)} state checkpoint(s) found.")
388
+ return 0
389
+
390
+ except Exception as e:
391
+ logger.error(f"Failed to query state: {e}")
392
+ return 1
393
+
394
+
395
+ def _tables_command(args) -> int:
396
+ """List registered assets."""
397
+ catalog = _get_catalog_manager(args)
398
+ if not catalog:
399
+ return 1
400
+
401
+ try:
402
+ df = catalog._read_local_table(catalog.tables["meta_tables"])
403
+
404
+ if df.empty:
405
+ print("No assets registered in catalog.")
406
+ return 0
407
+
408
+ if args.project and "project_name" in df.columns:
409
+ df = df[df["project_name"] == args.project]
410
+
411
+ headers = ["project_name", "table_name", "path", "format", "pattern_type", "updated_at"]
412
+ available_cols = [c for c in headers if c in df.columns]
413
+
414
+ rows = df[available_cols].values.tolist() if available_cols else []
415
+
416
+ print(_format_output(headers, rows, args.format))
417
+ print(f"\n{len(rows)} asset(s) registered.")
418
+ return 0
419
+
420
+ except Exception as e:
421
+ logger.error(f"Failed to query tables: {e}")
422
+ return 1
423
+
424
+
425
+ def _metrics_command(args) -> int:
426
+ """List metrics definitions."""
427
+ catalog = _get_catalog_manager(args)
428
+ if not catalog:
429
+ return 1
430
+
431
+ try:
432
+ df = catalog._read_local_table(catalog.tables["meta_metrics"])
433
+
434
+ if df.empty:
435
+ print("No metrics defined in catalog.")
436
+ return 0
437
+
438
+ headers = ["metric_name", "source_table", "definition_sql", "dimensions"]
439
+ available_cols = [c for c in headers if c in df.columns]
440
+
441
+ rows = df[available_cols].values.tolist() if available_cols else []
442
+
443
+ print(_format_output(headers, rows, args.format))
444
+ print(f"\n{len(rows)} metric(s) defined.")
445
+ return 0
446
+
447
+ except Exception as e:
448
+ logger.error(f"Failed to query metrics: {e}")
449
+ return 1
450
+
451
+
452
+ def _patterns_command(args) -> int:
453
+ """List pattern compliance."""
454
+ catalog = _get_catalog_manager(args)
455
+ if not catalog:
456
+ return 1
457
+
458
+ try:
459
+ df = catalog._read_local_table(catalog.tables["meta_patterns"])
460
+
461
+ if df.empty:
462
+ print("No pattern data in catalog.")
463
+ return 0
464
+
465
+ headers = ["table_name", "pattern_type", "compliance_score", "configuration"]
466
+ available_cols = [c for c in headers if c in df.columns]
467
+
468
+ rows = df[available_cols].values.tolist() if available_cols else []
469
+
470
+ print(_format_output(headers, rows, args.format))
471
+ print(f"\n{len(rows)} pattern record(s) found.")
472
+ return 0
473
+
474
+ except Exception as e:
475
+ logger.error(f"Failed to query patterns: {e}")
476
+ return 1
477
+
478
+
479
+ def _stats_command(args) -> int:
480
+ """Show execution statistics."""
481
+ catalog = _get_catalog_manager(args)
482
+ if not catalog:
483
+ return 1
484
+
485
+ try:
486
+ import pandas as pd
487
+
488
+ df = catalog._read_local_table(catalog.tables["meta_runs"])
489
+
490
+ if df.empty:
491
+ print("No runs found in catalog for statistics.")
492
+ return 0
493
+
494
+ cutoff = datetime.now(timezone.utc) - timedelta(days=args.days)
495
+
496
+ if "timestamp" in df.columns:
497
+ if not pd.api.types.is_datetime64_any_dtype(df["timestamp"]):
498
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
499
+
500
+ if df["timestamp"].dt.tz is None:
501
+ df["timestamp"] = df["timestamp"].dt.tz_localize("UTC")
502
+
503
+ df = df[df["timestamp"] >= cutoff]
504
+
505
+ if args.pipeline and "pipeline_name" in df.columns:
506
+ df = df[df["pipeline_name"] == args.pipeline]
507
+
508
+ if df.empty:
509
+ print(f"No runs in the last {args.days} days.")
510
+ return 0
511
+
512
+ print(f"=== Execution Statistics (Last {args.days} Days) ===\n")
513
+
514
+ total_runs = len(df)
515
+ success_runs = len(df[df["status"] == "SUCCESS"]) if "status" in df.columns else 0
516
+ failed_runs = len(df[df["status"] == "FAILED"]) if "status" in df.columns else 0
517
+ success_rate = (success_runs / total_runs * 100) if total_runs > 0 else 0
518
+
519
+ print(f"Total Runs: {total_runs}")
520
+ print(f"Successful: {success_runs}")
521
+ print(f"Failed: {failed_runs}")
522
+ print(f"Success Rate: {success_rate:.1f}%")
523
+
524
+ if "rows_processed" in df.columns:
525
+ total_rows = df["rows_processed"].sum()
526
+ avg_rows = df["rows_processed"].mean()
527
+ print(f"\nTotal Rows: {int(total_rows):,}")
528
+ print(f"Avg Rows/Run: {int(avg_rows):,}")
529
+
530
+ if "duration_ms" in df.columns:
531
+ avg_duration_ms = df["duration_ms"].mean()
532
+ total_duration_ms = df["duration_ms"].sum()
533
+ print(f"\nAvg Duration: {avg_duration_ms / 1000:.2f}s")
534
+ print(f"Total Runtime: {total_duration_ms / 1000:.2f}s")
535
+
536
+ if "pipeline_name" in df.columns:
537
+ print("\n--- Runs by Pipeline ---")
538
+ pipeline_counts = df["pipeline_name"].value_counts()
539
+ for pipeline, count in pipeline_counts.items():
540
+ print(f" {pipeline}: {count}")
541
+
542
+ if "node_name" in df.columns and "status" in df.columns:
543
+ failed_nodes = df[df["status"] == "FAILED"]["node_name"].value_counts()
544
+ if not failed_nodes.empty:
545
+ print("\n--- Most Failed Nodes ---")
546
+ for node, count in failed_nodes.head(5).items():
547
+ print(f" {node}: {count} failures")
548
+
549
+ return 0
550
+
551
+ except Exception as e:
552
+ logger.error(f"Failed to compute statistics: {e}")
553
+ return 1
odibi/cli/deploy.py ADDED
@@ -0,0 +1,69 @@
1
+ """Deploy command implementation."""
2
+
3
+ from pathlib import Path
4
+
5
+ from odibi.pipeline import PipelineManager
6
+ from odibi.utils.extensions import load_extensions
7
+ from odibi.utils.logging import logger
8
+
9
+
10
+ def deploy_command(args):
11
+ """Deploy pipeline definitions to System Catalog."""
12
+ try:
13
+ config_path = Path(args.config).resolve()
14
+
15
+ logger.info(f"Deploying configuration from {config_path}...")
16
+
17
+ # Load extensions (similar to run command)
18
+ load_extensions(config_path.parent)
19
+ if config_path.parent.parent != config_path.parent:
20
+ load_extensions(config_path.parent.parent)
21
+ if config_path.parent != Path.cwd():
22
+ load_extensions(Path.cwd())
23
+
24
+ # Load Pipeline Manager (this validates config and initializes connections/catalog)
25
+ # We don't need environment overrides for deployment typically, but user might want to deploy prod config?
26
+ # The user should pass the correct config file or env vars.
27
+ # We'll respect --env arg if we add it to deploy command.
28
+ env = getattr(args, "env", None)
29
+ manager = PipelineManager.from_yaml(args.config, env=env)
30
+
31
+ if not manager.catalog_manager:
32
+ logger.error("System Catalog not configured. Cannot deploy.")
33
+ return 1
34
+
35
+ catalog = manager.catalog_manager
36
+
37
+ # Bootstrap (ensure tables exist)
38
+ # PipelineManager.__init__ already calls bootstrap(), but calling it again is safe/idempotent
39
+ catalog.bootstrap()
40
+
41
+ logger.info(f"Syncing to System Catalog at: {catalog.base_path}")
42
+
43
+ project_config = manager.project_config
44
+ pipelines = project_config.pipelines
45
+
46
+ total_pipelines = len(pipelines)
47
+ total_nodes = sum(len(p.nodes) for p in pipelines)
48
+
49
+ logger.info(f"Found {total_pipelines} pipelines, {total_nodes} nodes.")
50
+
51
+ for i, pipeline in enumerate(pipelines, 1):
52
+ logger.info(f"[{i}/{total_pipelines}] Syncing pipeline: {pipeline.pipeline}")
53
+
54
+ # Register Pipeline
55
+ catalog.register_pipeline(pipeline, project_config)
56
+
57
+ # Register Nodes
58
+ for node in pipeline.nodes:
59
+ # logger.debug(f" - Syncing node: {node.name}")
60
+ catalog.register_node(pipeline.pipeline, node)
61
+
62
+ logger.info("Deployment complete! System Catalog is up to date.")
63
+ return 0
64
+
65
+ except Exception as e:
66
+ logger.error(f"Deployment failed: {e}")
67
+ # import traceback
68
+ # traceback.print_exc()
69
+ return 1