odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/cli/story.py ADDED
@@ -0,0 +1,379 @@
1
+ """
2
+ Story CLI Commands
3
+ ==================
4
+
5
+ Commands for generating and managing pipeline documentation stories.
6
+ """
7
+
8
+ from pathlib import Path
9
+
10
+ import yaml
11
+
12
+ from odibi.config import ProjectConfig
13
+ from odibi.story import DocStoryGenerator
14
+
15
+
16
+ def story_command(args):
17
+ """
18
+ Handle story subcommands.
19
+
20
+ Args:
21
+ args: Parsed command-line arguments
22
+
23
+ Returns:
24
+ Exit code (0 for success, 1 for error)
25
+ """
26
+ if args.story_command == "generate":
27
+ return generate_command(args)
28
+ elif args.story_command == "diff":
29
+ return diff_command(args)
30
+ elif args.story_command == "list":
31
+ return list_command(args)
32
+ else:
33
+ print(f"Unknown story command: {args.story_command}")
34
+ return 1
35
+
36
+
37
+ def generate_command(args):
38
+ """
39
+ Generate documentation story from pipeline config.
40
+
41
+ Args:
42
+ args: Parsed arguments with config, output, format, validate, etc.
43
+
44
+ Returns:
45
+ Exit code
46
+ """
47
+ try:
48
+ # Load configuration
49
+ print(f"📖 Loading configuration from {args.config}...")
50
+
51
+ with open(args.config, "r") as f:
52
+ config_data = yaml.safe_load(f)
53
+
54
+ config = ProjectConfig(**config_data)
55
+
56
+ # Get the pipeline config (assume first pipeline if not specified)
57
+ if config.pipelines:
58
+ pipeline_config = config.pipelines[0]
59
+ else:
60
+ print("❌ No pipelines found in configuration")
61
+ return 1
62
+
63
+ # Create doc story generator
64
+ print("📝 Generating documentation story...")
65
+ generator = DocStoryGenerator(
66
+ pipeline_config=pipeline_config,
67
+ project_config=config if hasattr(config, "project") else None,
68
+ )
69
+
70
+ # Determine output path
71
+ if args.output:
72
+ output_path = args.output
73
+ else:
74
+ # Auto-generate output filename
75
+ format_ext = {"html": ".html", "markdown": ".md", "json": ".json"}.get(
76
+ args.format.lower(), ".html"
77
+ )
78
+ output_path = f"docs/{pipeline_config.pipeline}_documentation{format_ext}"
79
+
80
+ # Load theme if HTML format
81
+ theme = None
82
+ if args.format.lower() == "html" and args.theme:
83
+ from odibi.story.themes import get_theme
84
+
85
+ try:
86
+ theme = get_theme(args.theme)
87
+ print(f"🎨 Using theme: {theme.name}")
88
+ except ValueError as e:
89
+ print(f"⚠️ Theme warning: {e}, using default theme")
90
+
91
+ # Generate story
92
+ result_path = generator.generate(
93
+ output_path=output_path,
94
+ format=args.format,
95
+ validate=not args.no_validate,
96
+ include_flow_diagram=not args.no_diagram,
97
+ theme=theme,
98
+ )
99
+
100
+ print(f"✅ Documentation generated: {result_path}")
101
+ print(f"📄 Format: {args.format.upper()}")
102
+
103
+ if args.format.lower() == "html":
104
+ print(f"🌐 Open in browser: file://{Path(result_path).absolute()}")
105
+
106
+ return 0
107
+
108
+ except FileNotFoundError as e:
109
+ print(f"❌ Configuration file not found: {e}")
110
+ return 1
111
+ except ValueError as e:
112
+ print(f"❌ Validation error: {e}")
113
+ return 1
114
+ except Exception as e:
115
+ print(f"❌ Error generating documentation: {e}")
116
+ if args.verbose:
117
+ import traceback
118
+
119
+ traceback.print_exc()
120
+ return 1
121
+
122
+
123
+ def diff_command(args):
124
+ """
125
+ Compare two pipeline run stories.
126
+
127
+ Args:
128
+ args: Parsed arguments with story1, story2 paths
129
+
130
+ Returns:
131
+ Exit code
132
+ """
133
+ try:
134
+ import json
135
+
136
+ print("📊 Comparing stories...")
137
+ print(f" Story 1: {args.story1}")
138
+ print(f" Story 2: {args.story2}")
139
+
140
+ # Load story metadata from JSON files
141
+ with open(args.story1, "r") as f:
142
+ story1_data = json.load(f)
143
+
144
+ with open(args.story2, "r") as f:
145
+ story2_data = json.load(f)
146
+
147
+ # Compare basic metrics
148
+ print("\n📈 Comparison Results:")
149
+ print("=" * 60)
150
+
151
+ # Pipeline info
152
+ print(f"\nPipeline: {story1_data.get('pipeline_name', 'Unknown')}")
153
+
154
+ # Execution times
155
+ print("\n⏱️ Execution Time:")
156
+ print(f" Story 1: {story1_data.get('duration', 0):.2f}s")
157
+ print(f" Story 2: {story2_data.get('duration', 0):.2f}s")
158
+
159
+ time_diff = story2_data.get("duration", 0) - story1_data.get("duration", 0)
160
+ if time_diff > 0:
161
+ print(f" Difference: +{time_diff:.2f}s (slower)")
162
+ elif time_diff < 0:
163
+ print(f" Difference: {time_diff:.2f}s (faster)")
164
+ else:
165
+ print(" Difference: No change")
166
+
167
+ # Success rate
168
+ print("\n✅ Success Rate:")
169
+ print(f" Story 1: {story1_data.get('success_rate', 0):.1f}%")
170
+ print(f" Story 2: {story2_data.get('success_rate', 0):.1f}%")
171
+
172
+ # Row counts
173
+ print("\n📊 Rows Processed:")
174
+ print(f" Story 1: {story1_data.get('total_rows_processed', 0):,}")
175
+ print(f" Story 2: {story2_data.get('total_rows_processed', 0):,}")
176
+
177
+ row_diff = story2_data.get("total_rows_processed", 0) - story1_data.get(
178
+ "total_rows_processed", 0
179
+ )
180
+ if row_diff != 0:
181
+ print(f" Difference: {row_diff:+,} rows")
182
+
183
+ # Node-level differences
184
+ if args.detailed:
185
+ print("\n🔍 Node-Level Details:")
186
+ print("-" * 60)
187
+
188
+ story1_nodes = {n["node_name"]: n for n in story1_data.get("nodes", [])}
189
+ story2_nodes = {n["node_name"]: n for n in story2_data.get("nodes", [])}
190
+
191
+ all_nodes = set(story1_nodes.keys()) | set(story2_nodes.keys())
192
+
193
+ for node_name in sorted(all_nodes):
194
+ node1 = story1_nodes.get(node_name, {})
195
+ node2 = story2_nodes.get(node_name, {})
196
+
197
+ print(f"\n {node_name}:")
198
+
199
+ if node1 and node2:
200
+ # Compare durations
201
+ dur1 = node1.get("duration", 0)
202
+ dur2 = node2.get("duration", 0)
203
+ dur_diff = dur2 - dur1
204
+ print(f" Duration: {dur1:.3f}s → {dur2:.3f}s ({dur_diff:+.3f}s)")
205
+
206
+ # Compare row counts
207
+ rows1 = node1.get("rows_out", 0) or 0
208
+ rows2 = node2.get("rows_out", 0) or 0
209
+ if rows1 or rows2:
210
+ row_diff = rows2 - rows1
211
+ print(f" Rows: {rows1:,} → {rows2:,} ({row_diff:+,})")
212
+
213
+ # Status changes
214
+ status1 = node1.get("status", "unknown")
215
+ status2 = node2.get("status", "unknown")
216
+ if status1 != status2:
217
+ print(f" ⚠️ Status changed: {status1} → {status2}")
218
+
219
+ elif node1:
220
+ print(" ❌ Removed in Story 2")
221
+ elif node2:
222
+ print(" ➕ Added in Story 2")
223
+
224
+ print("\n" + "=" * 60)
225
+ return 0
226
+
227
+ except FileNotFoundError as e:
228
+ print(f"❌ Story file not found: {e}")
229
+ return 1
230
+ except json.JSONDecodeError as e:
231
+ print(f"❌ Invalid JSON in story file: {e}")
232
+ return 1
233
+ except Exception as e:
234
+ print(f"❌ Error comparing stories: {e}")
235
+ if args.verbose:
236
+ import traceback
237
+
238
+ traceback.print_exc()
239
+ return 1
240
+
241
+
242
+ def list_command(args):
243
+ """
244
+ List available story files.
245
+
246
+ Args:
247
+ args: Parsed arguments with directory path
248
+
249
+ Returns:
250
+ Exit code
251
+ """
252
+ try:
253
+ from datetime import datetime
254
+
255
+ story_dir = Path(args.directory)
256
+
257
+ if not story_dir.exists():
258
+ print(f"❌ Directory not found: {story_dir}")
259
+ return 1
260
+
261
+ # Find story files (JSON, HTML, MD)
262
+ story_files = []
263
+ for ext in ["*.json", "*.html", "*.md"]:
264
+ story_files.extend(story_dir.glob(ext))
265
+
266
+ if not story_files:
267
+ print(f"ℹ️ No story files found in {story_dir}")
268
+ return 0
269
+
270
+ # Sort by modification time (newest first)
271
+ story_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
272
+
273
+ print(f"\n📚 Stories in {story_dir}:")
274
+ print("=" * 80)
275
+
276
+ for story_file in story_files[: args.limit]:
277
+ # Get file metadata
278
+ stat = story_file.stat()
279
+ size = stat.st_size
280
+ modified = datetime.fromtimestamp(stat.st_mtime)
281
+
282
+ # Format size
283
+ if size < 1024:
284
+ size_str = f"{size}B"
285
+ elif size < 1024 * 1024:
286
+ size_str = f"{size / 1024:.1f}KB"
287
+ else:
288
+ size_str = f"{size / 1024 / 1024:.1f}MB"
289
+
290
+ print(f"\n 📄 {story_file.name}")
291
+ print(f" Modified: {modified.strftime('%Y-%m-%d %H:%M:%S')}")
292
+ print(f" Size: {size_str}")
293
+ print(f" Path: {story_file}")
294
+
295
+ if len(story_files) > args.limit:
296
+ print(f"\n ... and {len(story_files) - args.limit} more")
297
+ print(" (Use --limit to show more)")
298
+
299
+ print()
300
+ return 0
301
+
302
+ except Exception as e:
303
+ print(f"❌ Error listing stories: {e}")
304
+ return 1
305
+
306
+
307
+ def add_story_parser(subparsers):
308
+ """
309
+ Add story subcommand parser.
310
+
311
+ Args:
312
+ subparsers: Argparse subparsers object
313
+
314
+ Returns:
315
+ Story parser
316
+ """
317
+ story_parser = subparsers.add_parser(
318
+ "story", help="Generate and manage pipeline documentation stories"
319
+ )
320
+
321
+ story_subparsers = story_parser.add_subparsers(dest="story_command", help="Story commands")
322
+
323
+ # odibi story generate
324
+ generate_parser = story_subparsers.add_parser(
325
+ "generate", help="Generate documentation story from pipeline config"
326
+ )
327
+ generate_parser.add_argument("config", help="Path to pipeline YAML config file")
328
+ generate_parser.add_argument(
329
+ "-o", "--output", help="Output file path (auto-generated if not specified)"
330
+ )
331
+ generate_parser.add_argument(
332
+ "-f",
333
+ "--format",
334
+ choices=["html", "markdown", "md", "json"],
335
+ default="html",
336
+ help="Output format (default: html)",
337
+ )
338
+ generate_parser.add_argument(
339
+ "--no-validate", action="store_true", help="Skip explanation quality validation"
340
+ )
341
+ generate_parser.add_argument(
342
+ "--no-diagram", action="store_true", help="Exclude flow diagram from documentation"
343
+ )
344
+ generate_parser.add_argument(
345
+ "-t",
346
+ "--theme",
347
+ default="default",
348
+ help="Theme name or path to custom theme YAML (default: default, options: corporate, dark, minimal)",
349
+ )
350
+ generate_parser.add_argument(
351
+ "-v", "--verbose", action="store_true", help="Verbose output with stack traces"
352
+ )
353
+
354
+ # odibi story diff
355
+ diff_parser = story_subparsers.add_parser("diff", help="Compare two pipeline run stories")
356
+ diff_parser.add_argument("story1", help="Path to first story JSON file")
357
+ diff_parser.add_argument("story2", help="Path to second story JSON file")
358
+ diff_parser.add_argument(
359
+ "-d", "--detailed", action="store_true", help="Show detailed node-level comparison"
360
+ )
361
+ diff_parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
362
+
363
+ # odibi story list
364
+ list_parser = story_subparsers.add_parser("list", help="List available story files")
365
+ list_parser.add_argument(
366
+ "-d",
367
+ "--directory",
368
+ default="stories/runs",
369
+ help="Directory to search for stories (default: stories/runs)",
370
+ )
371
+ list_parser.add_argument(
372
+ "-l",
373
+ "--limit",
374
+ type=int,
375
+ default=10,
376
+ help="Maximum number of stories to show (default: 10)",
377
+ )
378
+
379
+ return story_parser
odibi/cli/system.py ADDED
@@ -0,0 +1,132 @@
1
+ """System CLI command for managing system catalog operations."""
2
+
3
+ from pathlib import Path
4
+
5
+ from odibi.pipeline import PipelineManager
6
+ from odibi.state import create_state_backend, create_sync_source_backend, sync_system_data
7
+ from odibi.utils.extensions import load_extensions
8
+ from odibi.utils.logging import logger
9
+
10
+
11
+ def add_system_parser(subparsers):
12
+ """Add system subcommand parser."""
13
+ system_parser = subparsers.add_parser(
14
+ "system",
15
+ help="Manage System Catalog operations",
16
+ description="Commands for syncing and managing system catalog data",
17
+ )
18
+
19
+ system_subparsers = system_parser.add_subparsers(dest="system_command", help="System commands")
20
+
21
+ # odibi system sync
22
+ sync_parser = system_subparsers.add_parser(
23
+ "sync",
24
+ help="Sync system data from source to target backend",
25
+ )
26
+ sync_parser.add_argument("config", help="Path to YAML config file")
27
+ sync_parser.add_argument(
28
+ "--env", default=None, help="Environment to apply overrides (e.g., dev, qat, prod)"
29
+ )
30
+ sync_parser.add_argument(
31
+ "--tables",
32
+ nargs="+",
33
+ choices=["runs", "state"],
34
+ default=None,
35
+ help="Tables to sync (default: all)",
36
+ )
37
+ sync_parser.add_argument(
38
+ "--dry-run",
39
+ action="store_true",
40
+ help="Show what would be synced without making changes",
41
+ )
42
+
43
+ return system_parser
44
+
45
+
46
+ def system_command(args):
47
+ """Execute system command."""
48
+ if not hasattr(args, "system_command") or args.system_command is None:
49
+ print("Usage: odibi system <command>")
50
+ print("\nAvailable commands:")
51
+ print(" sync Sync system data from source to target backend")
52
+ return 1
53
+
54
+ command_map = {
55
+ "sync": _sync_command,
56
+ }
57
+
58
+ handler = command_map.get(args.system_command)
59
+ if handler:
60
+ return handler(args)
61
+ else:
62
+ print(f"Unknown system command: {args.system_command}")
63
+ return 1
64
+
65
+
66
+ def _sync_command(args) -> int:
67
+ """Sync system data from source to target."""
68
+ try:
69
+ config_path = Path(args.config).resolve()
70
+
71
+ load_extensions(config_path.parent)
72
+ if config_path.parent.parent != config_path.parent:
73
+ load_extensions(config_path.parent.parent)
74
+ if config_path.parent != Path.cwd():
75
+ load_extensions(Path.cwd())
76
+
77
+ manager = PipelineManager.from_yaml(args.config, environment=getattr(args, "env", None))
78
+ project_config = manager.config
79
+
80
+ if not project_config.system:
81
+ logger.error("System Catalog not configured. Add 'system' section to config.")
82
+ return 1
83
+
84
+ if not project_config.system.sync_from:
85
+ logger.error(
86
+ "No sync_from configured in system config. "
87
+ "Add 'sync_from' section with connection and path."
88
+ )
89
+ return 1
90
+
91
+ # Create source backend
92
+ sync_from = project_config.system.sync_from
93
+ source_backend = create_sync_source_backend(
94
+ sync_from_config=sync_from,
95
+ connections=project_config.connections,
96
+ project_root=str(config_path.parent),
97
+ )
98
+
99
+ # Create target backend
100
+ target_backend = create_state_backend(
101
+ config=project_config,
102
+ project_root=str(config_path.parent),
103
+ )
104
+
105
+ source_conn = sync_from.connection
106
+ target_conn = project_config.system.connection
107
+ tables = args.tables or ["runs", "state"]
108
+
109
+ if args.dry_run:
110
+ print("[DRY RUN] Would sync system data:")
111
+ print(f" Source: {source_conn}")
112
+ print(f" Target: {target_conn}")
113
+ print(f" Tables: {', '.join(tables)}")
114
+ return 0
115
+
116
+ print(f"Syncing system data from '{source_conn}' to '{target_conn}'...")
117
+
118
+ result = sync_system_data(
119
+ source_backend=source_backend,
120
+ target_backend=target_backend,
121
+ tables=tables,
122
+ )
123
+
124
+ print("\nSync complete!")
125
+ print(f" Runs synced: {result['runs']}")
126
+ print(f" State synced: {result['state']}")
127
+
128
+ return 0
129
+
130
+ except Exception as e:
131
+ logger.error(f"Sync failed: {e}")
132
+ return 1