odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,432 @@
1
+ """
2
+ Semantic View Generation Module
3
+ ===============================
4
+
5
+ Generate and execute SQL Server views from semantic layer configurations.
6
+
7
+ Views provide pre-computed aggregations at specific grains, with:
8
+ - Derived metrics calculated correctly (SUM first, then formula)
9
+ - Time grain transformations (DATETRUNC)
10
+ - NULLIF protection for division by zero
11
+ - Self-documenting SQL with metric descriptions
12
+ """
13
+
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime
16
+ from typing import Any, Callable, Dict, List, Optional
17
+
18
+ from odibi.semantics.metrics import (
19
+ DimensionDefinition,
20
+ MetricDefinition,
21
+ MetricType,
22
+ SemanticLayerConfig,
23
+ TimeGrain,
24
+ ViewConfig,
25
+ ViewResult,
26
+ )
27
+ from odibi.utils.logging_context import get_logging_context
28
+
29
+
30
+ def generate_ensure_schema_sql(schema: str) -> str:
31
+ """
32
+ Generate SQL to create schema if it doesn't exist.
33
+
34
+ Uses SQL Server's conditional execution pattern since
35
+ CREATE SCHEMA must be the first statement in a batch.
36
+ """
37
+ return f"""\
38
+ IF NOT EXISTS (SELECT 1 FROM sys.schemas WHERE name = '{schema}')
39
+ BEGIN
40
+ EXEC('CREATE SCHEMA [{schema}]')
41
+ END"""
42
+
43
+
44
+ @dataclass
45
+ class ViewExecutionResult:
46
+ """Result of executing multiple views."""
47
+
48
+ views_created: List[str] = field(default_factory=list)
49
+ sql_files_saved: List[str] = field(default_factory=list)
50
+ errors: List[str] = field(default_factory=list)
51
+ results: List[ViewResult] = field(default_factory=list)
52
+
53
+
54
+ class ViewGenerator:
55
+ """
56
+ Generate SQL Server views from semantic layer configuration.
57
+
58
+ Usage:
59
+ config = SemanticLayerConfig(...)
60
+ generator = ViewGenerator(config)
61
+ ddl = generator.generate_view_ddl(view_config)
62
+ """
63
+
64
+ GRAIN_SQL_MAP = {
65
+ TimeGrain.DAY: "DATETRUNC(day, {col})",
66
+ TimeGrain.WEEK: "DATETRUNC(week, {col})",
67
+ TimeGrain.MONTH: "DATETRUNC(month, {col})",
68
+ TimeGrain.QUARTER: "DATETRUNC(quarter, {col})",
69
+ TimeGrain.YEAR: "DATETRUNC(year, {col})",
70
+ }
71
+
72
+ def __init__(self, config: SemanticLayerConfig):
73
+ """
74
+ Initialize with semantic layer configuration.
75
+
76
+ Args:
77
+ config: SemanticLayerConfig with metrics, dimensions, and views
78
+ """
79
+ self.config = config
80
+ self._metric_cache: Dict[str, MetricDefinition] = {}
81
+ self._dimension_cache: Dict[str, DimensionDefinition] = {}
82
+
83
+ for metric in config.metrics:
84
+ self._metric_cache[metric.name] = metric
85
+
86
+ for dim in config.dimensions:
87
+ self._dimension_cache[dim.name] = dim
88
+
89
+ def generate_view_ddl(self, view_config: ViewConfig) -> str:
90
+ """
91
+ Generate CREATE OR ALTER VIEW DDL statement.
92
+
93
+ Args:
94
+ view_config: ViewConfig with metrics, dimensions, and view name
95
+
96
+ Returns:
97
+ Complete SQL DDL string with documentation header
98
+ """
99
+ ctx = get_logging_context()
100
+ ctx.debug("Generating view DDL", view=view_config.name)
101
+
102
+ header = self._generate_header(view_config)
103
+ body = self._generate_view_body(view_config)
104
+
105
+ full_name = f"{view_config.db_schema}.{view_config.name}"
106
+ ddl = f"{header}\nCREATE OR ALTER VIEW {full_name} AS\n{body};"
107
+
108
+ ctx.info("Generated view DDL", view=view_config.name, lines=ddl.count("\n"))
109
+ return ddl
110
+
111
+ def _generate_header(self, view_config: ViewConfig) -> str:
112
+ """Generate SQL documentation header."""
113
+ lines = [
114
+ "-- " + "=" * 77,
115
+ f"-- View: {view_config.db_schema}.{view_config.name}",
116
+ ]
117
+
118
+ if view_config.description:
119
+ lines.append(f"-- Description: {view_config.description}")
120
+
121
+ lines.append(f"-- Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
122
+
123
+ if view_config.source_file:
124
+ lines.append(f"-- Source: {view_config.source_file}")
125
+
126
+ lines.append("-- " + "=" * 77)
127
+ lines.append("--")
128
+ lines.append("-- Metrics included:")
129
+
130
+ for metric_name in view_config.metrics:
131
+ metric_def = self._metric_cache.get(metric_name.lower())
132
+ if metric_def:
133
+ if metric_def.type == MetricType.DERIVED:
134
+ lines.append(f"-- - {metric_name}: {metric_def.description or 'Derived'}")
135
+ lines.append(f"-- Formula: {metric_def.formula}")
136
+ else:
137
+ lines.append(f"-- - {metric_name}: {metric_def.expr}")
138
+
139
+ lines.append("--")
140
+ lines.append("-- " + "=" * 77)
141
+
142
+ return "\n".join(lines)
143
+
144
+ def _generate_view_body(self, view_config: ViewConfig) -> str:
145
+ """Generate the SELECT statement body."""
146
+ source_table = self._get_source_table(view_config)
147
+
148
+ select_parts = []
149
+ group_by_parts = []
150
+
151
+ for dim_name in view_config.dimensions:
152
+ dim_def = self._dimension_cache.get(dim_name.lower())
153
+ dim_sql, dim_alias = self._get_dimension_sql(dim_name, dim_def)
154
+ select_parts.append(f" {dim_sql} AS {dim_alias}")
155
+ group_by_parts.append(dim_sql)
156
+
157
+ component_metrics = set()
158
+ derived_metrics = []
159
+ simple_metrics = []
160
+
161
+ for metric_name in view_config.metrics:
162
+ metric_def = self._metric_cache.get(metric_name.lower())
163
+ if metric_def:
164
+ if metric_def.type == MetricType.DERIVED:
165
+ derived_metrics.append(metric_def)
166
+ if metric_def.components:
167
+ for comp in metric_def.components:
168
+ component_metrics.add(comp.lower())
169
+ else:
170
+ simple_metrics.append(metric_def)
171
+
172
+ for comp_name in sorted(component_metrics):
173
+ comp_def = self._metric_cache.get(comp_name)
174
+ if comp_def and comp_def.expr:
175
+ select_parts.append(f" {comp_def.expr} AS {comp_name}")
176
+
177
+ for metric_def in simple_metrics:
178
+ if metric_def.name not in component_metrics and metric_def.expr:
179
+ select_parts.append(f" {metric_def.expr} AS {metric_def.name}")
180
+
181
+ for metric_def in derived_metrics:
182
+ formula_sql = self._build_derived_formula_sql(metric_def)
183
+ select_parts.append(f" {formula_sql} AS {metric_def.name}")
184
+
185
+ select_clause = ",\n".join(select_parts)
186
+ group_by_clause = ", ".join(group_by_parts)
187
+
188
+ body = f"SELECT\n{select_clause}\nFROM {source_table}\nGROUP BY {group_by_clause}"
189
+ return body
190
+
191
+ def _get_source_table(self, view_config: ViewConfig) -> str:
192
+ """Determine the source table from metrics."""
193
+ for metric_name in view_config.metrics:
194
+ metric_def = self._metric_cache.get(metric_name.lower())
195
+ if metric_def and metric_def.source:
196
+ return metric_def.source
197
+ if metric_def and metric_def.components:
198
+ for comp_name in metric_def.components:
199
+ comp_def = self._metric_cache.get(comp_name.lower())
200
+ if comp_def and comp_def.source:
201
+ return comp_def.source
202
+ raise ValueError(f"No source table found for view '{view_config.name}'")
203
+
204
+ def _get_dimension_sql(self, dim_name: str, dim_def: Optional[DimensionDefinition]) -> tuple:
205
+ """Get SQL expression and alias for a dimension."""
206
+ if dim_def is None:
207
+ return dim_name, dim_name
208
+
209
+ # Custom expression takes priority
210
+ if dim_def.expr:
211
+ return dim_def.expr, dim_name
212
+
213
+ col = dim_def.get_column()
214
+
215
+ # Then check for grain preset
216
+ if dim_def.grain:
217
+ sql_template = self.GRAIN_SQL_MAP.get(dim_def.grain)
218
+ if sql_template:
219
+ return sql_template.format(col=col), dim_name
220
+ return col, dim_name
221
+
222
+ return col, dim_name
223
+
224
+ def _build_derived_formula_sql(self, metric_def: MetricDefinition) -> str:
225
+ """Build SQL for a derived metric with NULLIF protection."""
226
+ if not metric_def.formula or not metric_def.components:
227
+ raise ValueError(f"Derived metric '{metric_def.name}' missing formula or components")
228
+
229
+ formula = metric_def.formula
230
+
231
+ component_exprs = {}
232
+ for comp_name in metric_def.components:
233
+ comp_metric = self._metric_cache.get(comp_name.lower())
234
+ if comp_metric and comp_metric.expr:
235
+ component_exprs[comp_name.lower()] = comp_metric.expr
236
+
237
+ sorted_names = sorted(component_exprs.keys(), key=len, reverse=True)
238
+ result = formula
239
+ for name in sorted_names:
240
+ result = result.replace(name, component_exprs[name])
241
+
242
+ result = self._wrap_divisors_with_nullif(result)
243
+
244
+ return result
245
+
246
+ def _wrap_divisors_with_nullif(self, expr: str) -> str:
247
+ """Wrap division operands with NULLIF to prevent division by zero."""
248
+ import re
249
+
250
+ def find_balanced_paren(s: str, start: int) -> int:
251
+ """Find the closing paren index for a balanced parenthesized expression."""
252
+ if start >= len(s) or s[start] != "(":
253
+ return -1
254
+ depth = 1
255
+ i = start + 1
256
+ while i < len(s) and depth > 0:
257
+ if s[i] == "(":
258
+ depth += 1
259
+ elif s[i] == ")":
260
+ depth -= 1
261
+ i += 1
262
+ return i if depth == 0 else -1
263
+
264
+ result = []
265
+ i = 0
266
+ while i < len(expr):
267
+ if expr[i] == "/":
268
+ result.append("/")
269
+ i += 1
270
+ while i < len(expr) and expr[i] in " \t":
271
+ result.append(expr[i])
272
+ i += 1
273
+ if i >= len(expr):
274
+ break
275
+
276
+ if expr[i] == "(":
277
+ end = find_balanced_paren(expr, i)
278
+ if end > 0:
279
+ divisor = expr[i:end]
280
+ result.append(f"NULLIF({divisor}, 0)")
281
+ i = end
282
+ else:
283
+ result.append(expr[i])
284
+ i += 1
285
+ else:
286
+ func_match = re.match(
287
+ r"(SUM|COUNT|AVG|MIN|MAX)\s*\([^)]+\)",
288
+ expr[i:],
289
+ re.IGNORECASE,
290
+ )
291
+ if func_match:
292
+ divisor = func_match.group(0)
293
+ result.append(f"NULLIF({divisor}, 0)")
294
+ i += len(divisor)
295
+ else:
296
+ ident_match = re.match(r"[A-Za-z_][A-Za-z0-9_]*", expr[i:])
297
+ if ident_match:
298
+ divisor = ident_match.group(0)
299
+ result.append(f"NULLIF({divisor}, 0)")
300
+ i += len(divisor)
301
+ else:
302
+ result.append(expr[i])
303
+ i += 1
304
+ else:
305
+ result.append(expr[i])
306
+ i += 1
307
+
308
+ return "".join(result)
309
+
310
+ def execute_view(
311
+ self,
312
+ view_config: ViewConfig,
313
+ execute_sql: Callable[[str], None],
314
+ save_sql_to: Optional[str] = None,
315
+ write_file: Optional[Callable[[str, str], None]] = None,
316
+ ) -> ViewResult:
317
+ """
318
+ Generate and execute a view.
319
+
320
+ Args:
321
+ view_config: View configuration
322
+ execute_sql: Callable that executes SQL against the database
323
+ save_sql_to: Optional path to save the SQL file
324
+ write_file: Optional callable to write file (path, content)
325
+
326
+ Returns:
327
+ ViewResult with success status and details
328
+ """
329
+ ctx = get_logging_context()
330
+ ctx.info("Executing view", view=view_config.name)
331
+
332
+ try:
333
+ if view_config.ensure_schema:
334
+ schema_sql = generate_ensure_schema_sql(view_config.db_schema)
335
+ ctx.debug("Ensuring schema exists", schema=view_config.db_schema)
336
+ execute_sql(schema_sql)
337
+
338
+ ddl = self.generate_view_ddl(view_config)
339
+
340
+ execute_sql(ddl)
341
+
342
+ sql_file_path = None
343
+ if save_sql_to and write_file:
344
+ filename = f"{view_config.name}.sql"
345
+ sql_file_path = f"{save_sql_to.rstrip('/')}/{filename}"
346
+ write_file(sql_file_path, ddl)
347
+ ctx.info("Saved SQL file", path=sql_file_path)
348
+
349
+ ctx.info("View created successfully", view=view_config.name)
350
+ return ViewResult(
351
+ name=view_config.name,
352
+ success=True,
353
+ sql=ddl,
354
+ sql_file_path=sql_file_path,
355
+ )
356
+
357
+ except Exception as e:
358
+ ctx.error("View creation failed", view=view_config.name, error=str(e))
359
+ return ViewResult(
360
+ name=view_config.name,
361
+ success=False,
362
+ sql="",
363
+ error=str(e),
364
+ )
365
+
366
+ def execute_all_views(
367
+ self,
368
+ execute_sql: Callable[[str], None],
369
+ save_sql_to: Optional[str] = None,
370
+ write_file: Optional[Callable[[str, str], None]] = None,
371
+ ) -> ViewExecutionResult:
372
+ """
373
+ Execute all views defined in the configuration.
374
+
375
+ Args:
376
+ execute_sql: Callable that executes SQL against the database
377
+ save_sql_to: Optional path to save SQL files
378
+ write_file: Optional callable to write files
379
+
380
+ Returns:
381
+ ViewExecutionResult with summary of all operations
382
+ """
383
+ ctx = get_logging_context()
384
+ ctx.info("Executing all views", count=len(self.config.views))
385
+
386
+ result = ViewExecutionResult()
387
+
388
+ for view_config in self.config.views:
389
+ view_result = self.execute_view(
390
+ view_config,
391
+ execute_sql,
392
+ save_sql_to,
393
+ write_file,
394
+ )
395
+
396
+ result.results.append(view_result)
397
+
398
+ if view_result.success:
399
+ result.views_created.append(view_result.name)
400
+ if view_result.sql_file_path:
401
+ result.sql_files_saved.append(view_result.sql_file_path)
402
+ else:
403
+ result.errors.append(f"{view_result.name}: {view_result.error}")
404
+
405
+ ctx.info(
406
+ "View execution complete",
407
+ created=len(result.views_created),
408
+ errors=len(result.errors),
409
+ )
410
+
411
+ return result
412
+
413
+ def get_view(self, name: str) -> Optional[ViewConfig]:
414
+ """Get a view configuration by name."""
415
+ name_lower = name.lower()
416
+ for view in self.config.views:
417
+ if view.name.lower() == name_lower:
418
+ return view
419
+ return None
420
+
421
+ def list_views(self) -> List[Dict[str, Any]]:
422
+ """List all configured views with their details."""
423
+ return [
424
+ {
425
+ "name": v.name,
426
+ "description": v.description,
427
+ "metrics": v.metrics,
428
+ "dimensions": v.dimensions,
429
+ "db_schema": v.db_schema,
430
+ }
431
+ for v in self.config.views
432
+ ]