odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,361 @@
1
+ """
2
+ Metric Definition Models
3
+ ========================
4
+
5
+ Pydantic models for semantic layer configuration including:
6
+ - Metric definitions (expressions, filters, derived metrics)
7
+ - Dimension definitions with hierarchies
8
+ - Materialization configurations
9
+ """
10
+
11
+ from enum import Enum
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from pydantic import BaseModel, Field, field_validator
15
+
16
+
17
+ class MetricType(str, Enum):
18
+ """Type of metric calculation."""
19
+
20
+ SIMPLE = "simple"
21
+ DERIVED = "derived"
22
+
23
+
24
+ class TimeGrain(str, Enum):
25
+ """Time grain for dimension transformations."""
26
+
27
+ DAY = "day"
28
+ WEEK = "week"
29
+ MONTH = "month"
30
+ QUARTER = "quarter"
31
+ YEAR = "year"
32
+
33
+
34
+ class MetricDefinition(BaseModel):
35
+ """
36
+ Definition of a semantic metric.
37
+
38
+ A metric represents a measurable value that can be aggregated
39
+ across dimensions (e.g., revenue, order_count, avg_order_value).
40
+
41
+ Attributes:
42
+ name: Unique metric identifier
43
+ description: Human-readable description
44
+ expr: SQL aggregation expression (e.g., "SUM(total_amount)").
45
+ Optional for derived metrics.
46
+ source: Source table reference. Supports three formats:
47
+ - `$pipeline.node` (recommended): e.g., `$build_warehouse.fact_orders`
48
+ - `connection.path`: e.g., `gold.fact_orders` or `gold.oee/plant_a/metrics`
49
+ - `table_name`: Uses default connection
50
+ filters: Optional WHERE conditions to apply
51
+ type: "simple" (direct aggregation) or "derived" (references other metrics)
52
+ components: List of component metric names (required for derived metrics).
53
+ These metrics must be additive (e.g., SUM-based) for correct
54
+ recalculation at different grains.
55
+ formula: Calculation formula using component names (required for derived).
56
+ Example: "(total_revenue - total_cost) / total_revenue"
57
+ """
58
+
59
+ name: str = Field(..., description="Unique metric identifier")
60
+ description: Optional[str] = Field(None, description="Human-readable description")
61
+ expr: Optional[str] = Field(None, description="SQL aggregation expression")
62
+ source: Optional[str] = Field(
63
+ None,
64
+ description=(
65
+ "Source table reference. Formats: "
66
+ "$pipeline.node (e.g., $build_warehouse.fact_orders), "
67
+ "connection.path (e.g., gold.fact_orders or gold.oee/plant_a/table), "
68
+ "or bare table_name"
69
+ ),
70
+ )
71
+ filters: List[str] = Field(default_factory=list, description="WHERE conditions")
72
+ type: MetricType = Field(default=MetricType.SIMPLE, description="Metric type")
73
+ components: Optional[List[str]] = Field(
74
+ None, description="Component metric names for derived metrics"
75
+ )
76
+ formula: Optional[str] = Field(None, description="Calculation formula using component names")
77
+
78
+ @field_validator("name")
79
+ @classmethod
80
+ def validate_name(cls, v: str) -> str:
81
+ if not v or not v.strip():
82
+ raise ValueError("Metric name cannot be empty")
83
+ if not v.replace("_", "").isalnum():
84
+ raise ValueError(
85
+ f"Metric name '{v}' must contain only alphanumeric characters and underscores"
86
+ )
87
+ return v.strip().lower()
88
+
89
+ @field_validator("expr")
90
+ @classmethod
91
+ def validate_expr(cls, v: Optional[str]) -> Optional[str]:
92
+ if v is not None and not v.strip():
93
+ raise ValueError("Metric expression cannot be empty if provided")
94
+ return v.strip() if v else None
95
+
96
+ def model_post_init(self, __context) -> None:
97
+ """Validate derived metric requirements after model initialization."""
98
+ if self.type == MetricType.DERIVED:
99
+ if not self.components:
100
+ raise ValueError(f"Derived metric '{self.name}' requires 'components' list")
101
+ if not self.formula:
102
+ raise ValueError(f"Derived metric '{self.name}' requires 'formula'")
103
+ elif self.type == MetricType.SIMPLE:
104
+ if not self.expr:
105
+ raise ValueError(f"Simple metric '{self.name}' requires 'expr'")
106
+
107
+
108
+ class DimensionDefinition(BaseModel):
109
+ """
110
+ Definition of a semantic dimension.
111
+
112
+ A dimension represents an attribute for grouping and filtering
113
+ metrics (e.g., date, product, region).
114
+
115
+ Attributes:
116
+ name: Unique dimension identifier
117
+ source: Source table reference. Supports three formats:
118
+ - `$pipeline.node` (recommended): e.g., `$build_warehouse.dim_customer`
119
+ - `connection.path`: e.g., `gold.dim_customer` or `gold.dims/customer`
120
+ - `table_name`: Uses default connection
121
+ column: Column name in source (defaults to name)
122
+ expr: Custom SQL expression. If provided, overrides column and grain.
123
+ Example: "YEAR(DATEADD(month, 6, Date))" for fiscal year
124
+ hierarchy: Optional ordered list of columns for drill-down
125
+ description: Human-readable description
126
+ grain: Time grain transformation (day, week, month, quarter, year).
127
+ Ignored if expr is provided.
128
+ """
129
+
130
+ name: str = Field(..., description="Unique dimension identifier")
131
+ source: Optional[str] = Field(
132
+ None,
133
+ description=(
134
+ "Source table reference. Formats: "
135
+ "$pipeline.node (e.g., $build_warehouse.dim_customer), "
136
+ "connection.path (e.g., gold.dim_customer or gold.dims/customer), "
137
+ "or bare table_name"
138
+ ),
139
+ )
140
+ column: Optional[str] = Field(None, description="Column name (defaults to name)")
141
+ expr: Optional[str] = Field(
142
+ None,
143
+ description=(
144
+ "Custom SQL expression. Overrides column and grain. "
145
+ "Example: YEAR(DATEADD(month, 6, Date)) for fiscal year"
146
+ ),
147
+ )
148
+ hierarchy: List[str] = Field(default_factory=list, description="Drill-down hierarchy")
149
+ description: Optional[str] = Field(None, description="Human-readable description")
150
+ grain: Optional[TimeGrain] = Field(None, description="Time grain transformation")
151
+
152
+ @field_validator("name")
153
+ @classmethod
154
+ def validate_name(cls, v: str) -> str:
155
+ if not v or not v.strip():
156
+ raise ValueError("Dimension name cannot be empty")
157
+ return v.strip().lower()
158
+
159
+ def get_column(self) -> str:
160
+ """Get the actual column name to use."""
161
+ return self.column if self.column else self.name
162
+
163
+
164
+ class MaterializationConfig(BaseModel):
165
+ """
166
+ Configuration for materializing metrics to a table.
167
+
168
+ Materialization pre-computes aggregated metrics at a specific
169
+ grain and persists them for faster querying.
170
+
171
+ Attributes:
172
+ name: Unique materialization identifier
173
+ metrics: List of metric names to include
174
+ dimensions: List of dimension names (determines grain)
175
+ output: Output table path
176
+ schedule: Optional cron schedule for refresh
177
+ incremental: Configuration for incremental refresh
178
+ """
179
+
180
+ name: str = Field(..., description="Unique materialization identifier")
181
+ metrics: List[str] = Field(..., description="Metrics to materialize")
182
+ dimensions: List[str] = Field(..., description="Dimensions for grouping")
183
+ output: str = Field(..., description="Output table path")
184
+ schedule: Optional[str] = Field(None, description="Cron schedule")
185
+ incremental: Optional[Dict[str, Any]] = Field(None, description="Incremental refresh config")
186
+
187
+ @field_validator("metrics")
188
+ @classmethod
189
+ def validate_metrics(cls, v: List[str]) -> List[str]:
190
+ if not v:
191
+ raise ValueError("At least one metric is required")
192
+ return v
193
+
194
+ @field_validator("dimensions")
195
+ @classmethod
196
+ def validate_dimensions(cls, v: List[str]) -> List[str]:
197
+ if not v:
198
+ raise ValueError("At least one dimension is required")
199
+ return v
200
+
201
+
202
+ class ViewConfig(BaseModel):
203
+ """
204
+ Configuration for a semantic view.
205
+
206
+ A view represents a pre-defined aggregation of metrics at a specific
207
+ grain, materialized as a SQL Server view for analyst consumption.
208
+
209
+ Attributes:
210
+ name: View name (will be created as db_schema.name in SQL Server)
211
+ description: Human-readable description of the view's purpose
212
+ metrics: List of metric names to include
213
+ dimensions: List of dimension names (determines grain)
214
+ db_schema: Database schema for the view (default: semantic)
215
+ ensure_schema: Auto-create schema if it doesn't exist (default: True)
216
+ source_file: Optional reference to source config file for documentation
217
+ """
218
+
219
+ name: str = Field(..., description="View name")
220
+ description: Optional[str] = Field(None, description="View description")
221
+ metrics: List[str] = Field(..., description="Metrics to include")
222
+ dimensions: List[str] = Field(..., description="Dimensions for grouping")
223
+ db_schema: str = Field(default="semantic", description="Database schema")
224
+ ensure_schema: bool = Field(default=True, description="Auto-create schema if it doesn't exist")
225
+ source_file: Optional[str] = Field(None, description="Source config file reference")
226
+
227
+ @field_validator("name")
228
+ @classmethod
229
+ def validate_name(cls, v: str) -> str:
230
+ if not v or not v.strip():
231
+ raise ValueError("View name cannot be empty")
232
+ return v.strip()
233
+
234
+ @field_validator("metrics")
235
+ @classmethod
236
+ def validate_metrics(cls, v: List[str]) -> List[str]:
237
+ if not v:
238
+ raise ValueError("At least one metric is required")
239
+ return v
240
+
241
+ @field_validator("dimensions")
242
+ @classmethod
243
+ def validate_dimensions_list(cls, v: List[str]) -> List[str]:
244
+ if not v:
245
+ raise ValueError("At least one dimension is required")
246
+ return v
247
+
248
+
249
+ class SemanticLayerConfig(BaseModel):
250
+ """
251
+ Complete semantic layer configuration.
252
+
253
+ Contains all metrics, dimensions, materializations, and views
254
+ for a semantic layer deployment.
255
+
256
+ Attributes:
257
+ metrics: List of metric definitions
258
+ dimensions: List of dimension definitions
259
+ materializations: List of materialization configurations
260
+ views: List of view configurations
261
+ """
262
+
263
+ metrics: List[MetricDefinition] = Field(default_factory=list, description="Metric definitions")
264
+ dimensions: List[DimensionDefinition] = Field(
265
+ default_factory=list, description="Dimension definitions"
266
+ )
267
+ materializations: List[MaterializationConfig] = Field(
268
+ default_factory=list, description="Materialization configs"
269
+ )
270
+ views: List[ViewConfig] = Field(default_factory=list, description="View configurations")
271
+
272
+ def get_metric(self, name: str) -> Optional[MetricDefinition]:
273
+ """Get a metric by name."""
274
+ name_lower = name.lower()
275
+ for metric in self.metrics:
276
+ if metric.name == name_lower:
277
+ return metric
278
+ return None
279
+
280
+ def get_dimension(self, name: str) -> Optional[DimensionDefinition]:
281
+ """Get a dimension by name."""
282
+ name_lower = name.lower()
283
+ for dim in self.dimensions:
284
+ if dim.name == name_lower:
285
+ return dim
286
+ return None
287
+
288
+ def get_materialization(self, name: str) -> Optional[MaterializationConfig]:
289
+ """Get a materialization config by name."""
290
+ name_lower = name.lower()
291
+ for mat in self.materializations:
292
+ if mat.name.lower() == name_lower:
293
+ return mat
294
+ return None
295
+
296
+ def validate_references(self) -> List[str]:
297
+ """
298
+ Validate that all references are valid.
299
+
300
+ Returns:
301
+ List of validation error messages (empty if valid)
302
+ """
303
+ errors = []
304
+ metric_names = {m.name for m in self.metrics}
305
+ dimension_names = {d.name for d in self.dimensions}
306
+
307
+ for metric in self.metrics:
308
+ if metric.components:
309
+ for component_name in metric.components:
310
+ if component_name.lower() not in metric_names:
311
+ errors.append(
312
+ f"Derived metric '{metric.name}' references "
313
+ f"unknown component '{component_name}'"
314
+ )
315
+
316
+ for mat in self.materializations:
317
+ for metric_name in mat.metrics:
318
+ if metric_name.lower() not in metric_names:
319
+ errors.append(
320
+ f"Materialization '{mat.name}' references unknown metric '{metric_name}'"
321
+ )
322
+
323
+ for dim_name in mat.dimensions:
324
+ if dim_name.lower() not in dimension_names:
325
+ errors.append(
326
+ f"Materialization '{mat.name}' references unknown dimension '{dim_name}'"
327
+ )
328
+
329
+ return errors
330
+
331
+
332
+ class ViewResult(BaseModel):
333
+ """
334
+ Result of view generation/execution.
335
+
336
+ Attributes:
337
+ name: View name
338
+ success: Whether the operation succeeded
339
+ sql: Generated SQL DDL
340
+ error: Error message if failed
341
+ sql_file_path: Path where SQL was saved (if save requested)
342
+ """
343
+
344
+ name: str = Field(..., description="View name")
345
+ success: bool = Field(..., description="Whether operation succeeded")
346
+ sql: str = Field(..., description="Generated SQL DDL")
347
+ error: Optional[str] = Field(None, description="Error message if failed")
348
+ sql_file_path: Optional[str] = Field(None, description="Path where SQL was saved")
349
+
350
+
351
+ def parse_semantic_config(config_dict: Dict[str, Any]) -> SemanticLayerConfig:
352
+ """
353
+ Parse a semantic layer configuration from a dictionary.
354
+
355
+ Args:
356
+ config_dict: Configuration dictionary (from YAML)
357
+
358
+ Returns:
359
+ SemanticLayerConfig instance
360
+ """
361
+ return SemanticLayerConfig(**config_dict)