odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,430 @@
1
+ """
2
+ Semantic Layer Runner
3
+ =====================
4
+
5
+ Orchestrates semantic layer execution including:
6
+ - Loading semantic configuration from ProjectConfig
7
+ - Executing views against SQL Server
8
+ - Generating semantic layer stories
9
+ - Generating combined lineage
10
+
11
+ Usage:
12
+ runner = SemanticLayerRunner(project_config)
13
+ result = runner.run() # Uses connection from semantic config
14
+ """
15
+
16
+ from typing import Any, Callable, Dict, Optional
17
+
18
+ from odibi.config import ProjectConfig
19
+ from odibi.semantics.metrics import SemanticLayerConfig, parse_semantic_config
20
+ from odibi.semantics.story import SemanticStoryGenerator, SemanticStoryMetadata
21
+ from odibi.story.lineage import LineageResult
22
+ from odibi.story.lineage_utils import (
23
+ generate_lineage,
24
+ get_full_stories_path,
25
+ get_storage_options,
26
+ )
27
+ from odibi.utils.logging_context import get_logging_context
28
+
29
+
30
+ class SemanticConfig:
31
+ """Extended semantic config with connection info."""
32
+
33
+ def __init__(self, config_dict: Dict[str, Any]):
34
+ self.connection: Optional[str] = config_dict.get("connection")
35
+ self.sql_output_path: Optional[str] = config_dict.get("sql_output_path")
36
+ self.layer_config = parse_semantic_config(config_dict)
37
+
38
+
39
+ class SemanticLayerRunner:
40
+ """
41
+ Run semantic layer operations with story generation.
42
+
43
+ Orchestrates the full semantic layer execution:
44
+ 1. Parse semantic config from project
45
+ 2. Execute views against SQL Server
46
+ 3. Generate semantic story (HTML + JSON)
47
+ 4. Optionally generate combined lineage
48
+
49
+ Example:
50
+ ```python
51
+ runner = SemanticLayerRunner(project_config)
52
+ result = runner.run(
53
+ execute_sql=sql_conn.execute,
54
+ save_sql_to="gold/views/",
55
+ write_file=adls_write,
56
+ )
57
+ ```
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ project_config: ProjectConfig,
63
+ name: Optional[str] = None,
64
+ ):
65
+ """
66
+ Initialize runner with project configuration.
67
+
68
+ Args:
69
+ project_config: ProjectConfig with semantic section
70
+ name: Optional name for the semantic layer run
71
+ """
72
+ self.project_config = project_config
73
+ self.name = name or f"{project_config.project}_semantic"
74
+
75
+ self._semantic_ext: Optional[SemanticConfig] = None
76
+ self._story_generator: Optional[SemanticStoryGenerator] = None
77
+ self._last_metadata: Optional[SemanticStoryMetadata] = None
78
+
79
+ @property
80
+ def semantic_ext(self) -> SemanticConfig:
81
+ """Get extended semantic configuration with connection info."""
82
+ if self._semantic_ext is None:
83
+ self._semantic_ext = self._parse_semantic_config()
84
+ return self._semantic_ext
85
+
86
+ @property
87
+ def semantic_config(self) -> SemanticLayerConfig:
88
+ """Get parsed semantic layer configuration."""
89
+ return self.semantic_ext.layer_config
90
+
91
+ @property
92
+ def connection_name(self) -> Optional[str]:
93
+ """Get the SQL Server connection name for views."""
94
+ return self.semantic_ext.connection
95
+
96
+ @property
97
+ def sql_output_path(self) -> Optional[str]:
98
+ """Get the path for saving SQL files."""
99
+ return self.semantic_ext.sql_output_path
100
+
101
+ def _parse_semantic_config(self) -> SemanticConfig:
102
+ """Parse semantic config from project config."""
103
+ ctx = get_logging_context()
104
+
105
+ semantic_dict = self.project_config.semantic
106
+ if not semantic_dict:
107
+ ctx.warning("No semantic configuration found in project config")
108
+ return SemanticConfig({})
109
+
110
+ ctx.debug(
111
+ "Parsing semantic config",
112
+ keys=list(semantic_dict.keys()),
113
+ connection=semantic_dict.get("connection"),
114
+ metrics_count=len(semantic_dict.get("metrics", [])),
115
+ views_count=len(semantic_dict.get("views", [])),
116
+ )
117
+
118
+ return SemanticConfig(semantic_dict)
119
+
120
+ def run(
121
+ self,
122
+ execute_sql: Optional[Callable[[str], None]] = None,
123
+ save_sql_to: Optional[str] = None,
124
+ write_file: Optional[Callable[[str, str], None]] = None,
125
+ generate_story: Optional[bool] = None,
126
+ generate_lineage: Optional[bool] = None,
127
+ ) -> Dict[str, Any]:
128
+ """
129
+ Execute the semantic layer.
130
+
131
+ Args:
132
+ execute_sql: Callable that executes SQL. If not provided, uses the
133
+ connection specified in semantic.connection config.
134
+ save_sql_to: Path to save SQL files. If not provided, uses
135
+ semantic.sql_output_path from config.
136
+ write_file: Optional callable to write files (for remote storage)
137
+ generate_story: Whether to generate execution story
138
+ generate_lineage: Whether to generate combined lineage
139
+
140
+ Returns:
141
+ Dict with execution results including:
142
+ - views_created: List of created view names
143
+ - views_failed: List of failed view names
144
+ - duration: Total execution time
145
+ - story_paths: Dict with json/html paths if story generated
146
+ - lineage_paths: Dict with json/html paths if lineage generated
147
+ """
148
+ ctx = get_logging_context()
149
+
150
+ if execute_sql is None:
151
+ execute_sql = self._get_execute_sql_from_connection()
152
+
153
+ if save_sql_to is None:
154
+ save_sql_to = self.sql_output_path
155
+
156
+ # Auto-create write_file using story connection if sql_output_path is set
157
+ if write_file is None and save_sql_to:
158
+ write_file = self._get_write_file_from_story_connection()
159
+ if write_file:
160
+ ctx.info("Using story connection for SQL file output", path=save_sql_to)
161
+
162
+ # Read defaults from story config if not explicitly provided
163
+ if generate_story is None:
164
+ generate_story = self.project_config.story.auto_generate
165
+ if generate_lineage is None:
166
+ generate_lineage = self.project_config.story.generate_lineage
167
+
168
+ ctx.info(
169
+ "Starting semantic layer execution",
170
+ name=self.name,
171
+ connection=self.connection_name,
172
+ views_count=len(self.semantic_config.views),
173
+ )
174
+
175
+ result = {
176
+ "views_created": [],
177
+ "views_failed": [],
178
+ "duration": 0.0,
179
+ "story_paths": None,
180
+ "lineage_paths": None,
181
+ "connection": self.connection_name,
182
+ }
183
+
184
+ if not self.semantic_config.views:
185
+ ctx.warning("No views defined in semantic config")
186
+ return result
187
+
188
+ stories_path = self.project_config.story.path
189
+ storage_options = self._get_storage_options()
190
+
191
+ self._story_generator = SemanticStoryGenerator(
192
+ config=self.semantic_config,
193
+ name=self.name,
194
+ output_path=stories_path,
195
+ storage_options=storage_options,
196
+ )
197
+
198
+ metadata = self._story_generator.execute_with_story(
199
+ execute_sql=execute_sql,
200
+ save_sql_to=save_sql_to,
201
+ write_file=write_file,
202
+ )
203
+ self._last_metadata = metadata
204
+
205
+ result["views_created"] = [v.view_name for v in metadata.views if v.status == "success"]
206
+ result["views_failed"] = [v.view_name for v in metadata.views if v.status == "failed"]
207
+ result["duration"] = metadata.duration
208
+
209
+ if generate_story:
210
+ story_paths = self._story_generator.save_story(write_file=write_file)
211
+ result["story_paths"] = story_paths
212
+ ctx.info("Semantic story saved", paths=story_paths)
213
+
214
+ if generate_lineage:
215
+ lineage_result = self._generate_lineage(write_file)
216
+ if lineage_result:
217
+ result["lineage_paths"] = {
218
+ "json": lineage_result.json_path,
219
+ "html": lineage_result.html_path,
220
+ }
221
+
222
+ ctx.info(
223
+ "Semantic layer execution complete",
224
+ views_created=len(result["views_created"]),
225
+ views_failed=len(result["views_failed"]),
226
+ duration=result["duration"],
227
+ )
228
+
229
+ return result
230
+
231
+ def _get_execute_sql_from_connection(self) -> Callable[[str], None]:
232
+ """Get an execute_sql callable from the configured connection."""
233
+ ctx = get_logging_context()
234
+
235
+ if not self.connection_name:
236
+ raise ValueError(
237
+ "No execute_sql provided and no connection specified in semantic config. "
238
+ "Either pass execute_sql to run() or add 'connection: your_sql_conn' to semantic config."
239
+ )
240
+
241
+ conn_config = self.project_config.connections.get(self.connection_name)
242
+ if not conn_config:
243
+ available = ", ".join(self.project_config.connections.keys())
244
+ raise ValueError(
245
+ f"Semantic connection '{self.connection_name}' not found. Available: {available}"
246
+ )
247
+
248
+ ctx.info(
249
+ "Creating SQL executor from connection",
250
+ connection=self.connection_name,
251
+ type=str(conn_config.type),
252
+ )
253
+
254
+ from odibi.connections.azure_sql import AzureSQL
255
+
256
+ server = getattr(conn_config, "host", None) or getattr(conn_config, "server", None)
257
+ database = getattr(conn_config, "database", None)
258
+ port = getattr(conn_config, "port", 1433)
259
+
260
+ if not server or not database:
261
+ raise ValueError(
262
+ f"Connection '{self.connection_name}' missing required 'host' or 'database'. "
263
+ f"Available fields: {list(conn_config.model_fields_set) if hasattr(conn_config, 'model_fields_set') else 'unknown'}"
264
+ )
265
+
266
+ auth_mode = "aad_msi"
267
+ username = None
268
+ password = None
269
+
270
+ if hasattr(conn_config, "auth") and conn_config.auth:
271
+ auth = conn_config.auth
272
+ mode = getattr(auth, "mode", None)
273
+ if mode:
274
+ auth_mode = mode.value if hasattr(mode, "value") else str(mode)
275
+ username = getattr(auth, "username", None)
276
+ password = getattr(auth, "password", None)
277
+ else:
278
+ username = getattr(conn_config, "username", None)
279
+ password = getattr(conn_config, "password", None)
280
+ if username and password:
281
+ auth_mode = "sql_login"
282
+
283
+ sql_conn = AzureSQL(
284
+ server=server,
285
+ database=database,
286
+ port=port,
287
+ auth_mode=auth_mode,
288
+ username=username,
289
+ password=password,
290
+ )
291
+
292
+ return sql_conn.execute
293
+
294
+ def _generate_lineage(
295
+ self,
296
+ write_file: Optional[Callable[[str, str], None]] = None,
297
+ ) -> Optional[LineageResult]:
298
+ """Generate combined lineage from all stories.
299
+
300
+ Uses the shared generate_lineage utility for consistency with
301
+ PipelineManager lineage generation.
302
+ """
303
+ return generate_lineage(
304
+ project_config=self.project_config,
305
+ write_file=write_file,
306
+ )
307
+
308
+ def _get_full_stories_path(self) -> str:
309
+ """
310
+ Build the full path to stories, including cloud URL if remote.
311
+
312
+ Delegates to the shared utility function for consistency.
313
+ """
314
+ return get_full_stories_path(self.project_config)
315
+
316
+ def _get_storage_options(self) -> Dict[str, Any]:
317
+ """
318
+ Get storage options from story connection for fsspec/adlfs.
319
+
320
+ Delegates to the shared utility function for consistency.
321
+ """
322
+ return get_storage_options(self.project_config)
323
+
324
+ def _get_write_file_from_story_connection(self) -> Optional[Callable[[str, str], None]]:
325
+ """
326
+ Create a write_file callback using the story connection.
327
+
328
+ Returns a callable that writes files to the story connection's storage,
329
+ or None if no valid connection is available.
330
+ """
331
+ ctx = get_logging_context()
332
+ storage_options = self._get_storage_options()
333
+
334
+ story_conn_name = self.project_config.story.connection
335
+ story_conn = self.project_config.connections.get(story_conn_name)
336
+
337
+ if not story_conn:
338
+ ctx.debug("No story connection found", connection=story_conn_name)
339
+ return None
340
+
341
+ conn_type = getattr(story_conn, "type", None)
342
+ if conn_type is None:
343
+ ctx.debug("Story connection has no type")
344
+ return None
345
+
346
+ conn_type_value = conn_type.value if hasattr(conn_type, "value") else str(conn_type)
347
+
348
+ if conn_type_value == "local":
349
+ base_path = getattr(story_conn, "base_path", "./data")
350
+
351
+ def write_file_local(path: str, content: str) -> None:
352
+ import os
353
+
354
+ full_path = os.path.join(base_path, path)
355
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
356
+ ctx.debug("Writing SQL file locally", path=full_path)
357
+ with open(full_path, "w") as f:
358
+ f.write(content)
359
+
360
+ return write_file_local
361
+
362
+ elif conn_type_value in ("azure_blob", "delta"):
363
+ if not storage_options:
364
+ ctx.debug("No storage options available for Azure write_file")
365
+ return None
366
+
367
+ account_name = getattr(story_conn, "account_name", None)
368
+ container = getattr(story_conn, "container", None)
369
+
370
+ if not account_name or not container:
371
+ ctx.debug("Azure connection missing account_name or container")
372
+ return None
373
+
374
+ def write_file_azure(path: str, content: str) -> None:
375
+ import fsspec
376
+
377
+ if path.startswith(("abfs://", "az://")):
378
+ full_path = path
379
+ else:
380
+ full_path = f"abfs://{container}@{account_name}.dfs.core.windows.net/{path}"
381
+
382
+ # adlfs needs account_name along with credentials
383
+ fs_options = {"account_name": account_name, **storage_options}
384
+ fs = fsspec.filesystem("abfs", **fs_options)
385
+ ctx.debug("Writing SQL file via Azure", path=full_path)
386
+ with fs.open(full_path, "w") as f:
387
+ f.write(content)
388
+
389
+ return write_file_azure
390
+
391
+ else:
392
+ ctx.debug("Unsupported connection type for write_file", type=conn_type_value)
393
+ return None
394
+
395
+ @property
396
+ def metadata(self) -> Optional[SemanticStoryMetadata]:
397
+ """Get the last execution metadata."""
398
+ return self._last_metadata
399
+
400
+
401
+ def run_semantic_layer(
402
+ project_config: ProjectConfig,
403
+ execute_sql: Callable[[str], None],
404
+ save_sql_to: Optional[str] = None,
405
+ write_file: Optional[Callable[[str, str], None]] = None,
406
+ generate_story: bool = True,
407
+ generate_lineage: bool = False,
408
+ ) -> Dict[str, Any]:
409
+ """
410
+ Convenience function to run semantic layer from project config.
411
+
412
+ Args:
413
+ project_config: ProjectConfig with semantic section
414
+ execute_sql: Callable that executes SQL against the database
415
+ save_sql_to: Optional path to save SQL files
416
+ write_file: Optional callable to write files
417
+ generate_story: Whether to generate execution story
418
+ generate_lineage: Whether to generate combined lineage
419
+
420
+ Returns:
421
+ Dict with execution results
422
+ """
423
+ runner = SemanticLayerRunner(project_config)
424
+ return runner.run(
425
+ execute_sql=execute_sql,
426
+ save_sql_to=save_sql_to,
427
+ write_file=write_file,
428
+ generate_story=generate_story,
429
+ generate_lineage=generate_lineage,
430
+ )