odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
odibi/references.py ADDED
@@ -0,0 +1,151 @@
1
+ """Cross-pipeline reference resolution for Odibi.
2
+
3
+ This module handles the resolution of $pipeline.node references, enabling
4
+ pipelines to read data from other pipelines' outputs (e.g., bronze -> silver).
5
+
6
+ Example:
7
+ inputs:
8
+ events: $read_bronze.opsvisdata_ShiftDowntimeEventsview
9
+ calendar: $read_bronze.opsvisdata_vw_calender
10
+ """
11
+
12
+ from typing import Any, Dict, Union
13
+
14
+ from odibi.catalog import CatalogManager
15
+
16
+
17
+ class ReferenceResolutionError(Exception):
18
+ """Raised when a cross-pipeline reference cannot be resolved."""
19
+
20
+ pass
21
+
22
+
23
+ def resolve_input_reference(
24
+ ref: str,
25
+ catalog: CatalogManager,
26
+ ) -> Dict[str, Any]:
27
+ """
28
+ Resolves $pipeline.node to read configuration.
29
+
30
+ Args:
31
+ ref: Reference string like "$read_bronze.opsvisdata_vw_calender"
32
+ catalog: CatalogManager instance
33
+
34
+ Returns:
35
+ Dict with keys for engine.read():
36
+ - For external_table: connection, path, format
37
+ - For managed_table: table, format
38
+
39
+ Raises:
40
+ ValueError: If reference format is invalid
41
+ ReferenceResolutionError: If referenced node output is not found
42
+ """
43
+ if not ref.startswith("$"):
44
+ raise ValueError(f"Invalid reference: {ref}. Expected $pipeline.node format.")
45
+
46
+ parts = ref[1:].split(".", 1) # Remove $ and split
47
+ if len(parts) != 2:
48
+ raise ValueError(
49
+ f"Invalid reference format: {ref}. Expected $pipeline.node (e.g., $read_bronze.my_node)"
50
+ )
51
+
52
+ pipeline_name, node_name = parts
53
+
54
+ output = catalog.get_node_output(pipeline_name, node_name)
55
+
56
+ if output is None:
57
+ raise ReferenceResolutionError(
58
+ f"No output found for {ref}. "
59
+ f"Ensure pipeline '{pipeline_name}' has run and node '{node_name}' has a write block."
60
+ )
61
+
62
+ if output.get("output_type") == "managed_table":
63
+ return {
64
+ "table": output.get("table_name"),
65
+ "format": output.get("format"),
66
+ }
67
+ else: # external_table
68
+ return {
69
+ "connection": output.get("connection_name"),
70
+ "path": output.get("path"),
71
+ "format": output.get("format"),
72
+ }
73
+
74
+
75
+ def is_pipeline_reference(value: Any) -> bool:
76
+ """
77
+ Check if a value is a cross-pipeline reference.
78
+
79
+ Args:
80
+ value: Value to check
81
+
82
+ Returns:
83
+ True if value is a string starting with $
84
+ """
85
+ return isinstance(value, str) and value.startswith("$")
86
+
87
+
88
+ def resolve_inputs(
89
+ inputs: Dict[str, Union[str, Dict[str, Any]]],
90
+ catalog: CatalogManager,
91
+ ) -> Dict[str, Dict[str, Any]]:
92
+ """
93
+ Resolve all inputs, converting $pipeline.node references to read configs.
94
+
95
+ Args:
96
+ inputs: Dict mapping input name to either:
97
+ - A $pipeline.node reference string
98
+ - An explicit read config dict
99
+
100
+ Returns:
101
+ Dict mapping input name to resolved read configuration
102
+
103
+ Example:
104
+ inputs = {
105
+ "events": "$read_bronze.shift_events",
106
+ "calendar": {
107
+ "connection": "goat_prod",
108
+ "path": "bronze/OEE/vw_calender",
109
+ "format": "delta"
110
+ }
111
+ }
112
+ resolved = resolve_inputs(inputs, catalog)
113
+ # Returns:
114
+ # {
115
+ # "events": {"connection": "goat_prod", "path": "bronze/OEE/shift_events", "format": "delta"},
116
+ # "calendar": {"connection": "goat_prod", "path": "bronze/OEE/vw_calender", "format": "delta"}
117
+ # }
118
+ """
119
+ resolved = {}
120
+
121
+ for name, ref in inputs.items():
122
+ if is_pipeline_reference(ref):
123
+ resolved[name] = resolve_input_reference(ref, catalog)
124
+ elif isinstance(ref, dict):
125
+ resolved[name] = ref
126
+ else:
127
+ raise ValueError(
128
+ f"Invalid input format for '{name}': {ref}. "
129
+ "Expected $pipeline.node reference or read config dict."
130
+ )
131
+
132
+ return resolved
133
+
134
+
135
+ def validate_references(
136
+ inputs: Dict[str, Union[str, Dict[str, Any]]],
137
+ catalog: CatalogManager,
138
+ ) -> None:
139
+ """
140
+ Validate all cross-pipeline references at pipeline load time (fail fast).
141
+
142
+ Args:
143
+ inputs: Dict of input configurations
144
+ catalog: CatalogManager instance
145
+
146
+ Raises:
147
+ ReferenceResolutionError: If any reference cannot be resolved
148
+ """
149
+ for name, ref in inputs.items():
150
+ if is_pipeline_reference(ref):
151
+ resolve_input_reference(ref, catalog)
odibi/registry.py ADDED
@@ -0,0 +1,246 @@
1
+ """Function registry for transform functions."""
2
+
3
+ import inspect
4
+ from functools import wraps
5
+ from typing import Any, Callable, Dict, Optional, Union
6
+
7
+
8
+ class FunctionRegistry:
9
+ """Global registry of transform functions with type validation."""
10
+
11
+ _functions: Dict[str, Callable] = {}
12
+ _signatures: Dict[str, inspect.Signature] = {}
13
+ _param_models: Dict[str, Any] = {} # New: Store Pydantic models
14
+
15
+ @classmethod
16
+ def register(cls, func: Callable, name: str = None, param_model: Any = None) -> Callable:
17
+ """Register a transform function.
18
+
19
+ Args:
20
+ func: Function to register
21
+ name: Optional name override (default: func.__name__)
22
+ param_model: Optional Pydantic model for validation
23
+
24
+ Returns:
25
+ The original function
26
+ """
27
+ if name is None:
28
+ name = func.__name__
29
+
30
+ cls._functions[name] = func
31
+ cls._signatures[name] = inspect.signature(func)
32
+ if param_model:
33
+ cls._param_models[name] = param_model
34
+
35
+ return func
36
+
37
+ @classmethod
38
+ def get(cls, name: str) -> Callable:
39
+ """Retrieve a registered function.
40
+
41
+ Args:
42
+ name: Function name
43
+
44
+ Returns:
45
+ The registered function
46
+
47
+ Raises:
48
+ ValueError: If function not found
49
+ """
50
+ if name not in cls._functions:
51
+ available = ", ".join(cls._functions.keys()) if cls._functions else "none"
52
+ raise ValueError(
53
+ f"Transform function '{name}' not registered. Available functions: {available}"
54
+ )
55
+ return cls._functions[name]
56
+
57
+ @classmethod
58
+ def has_function(cls, name: str) -> bool:
59
+ """Check if a function is registered."""
60
+ return name in cls._functions
61
+
62
+ @classmethod
63
+ def get_function(cls, name: str) -> Optional[Callable]:
64
+ """Get a function without raising if not found."""
65
+ return cls._functions.get(name)
66
+
67
+ @classmethod
68
+ def get_param_model(cls, name: str) -> Optional[Any]:
69
+ """Get the Pydantic model for a function's parameters."""
70
+ return cls._param_models.get(name)
71
+
72
+ @classmethod
73
+ def validate_params(cls, name: str, params: Dict[str, Any]) -> None:
74
+ """Validate parameters against function signature or Pydantic model.
75
+
76
+ Args:
77
+ name: Function name
78
+ params: Parameters to validate
79
+
80
+ Raises:
81
+ ValueError: If parameters are invalid
82
+ TypeError: If parameter types don't match
83
+ """
84
+ if name not in cls._functions:
85
+ raise ValueError(f"Function '{name}' not registered")
86
+
87
+ # Priority: Check Pydantic Model
88
+ if name in cls._param_models:
89
+ model = cls._param_models[name]
90
+ try:
91
+ model(**params)
92
+ return # Validated successfully
93
+ except Exception as e:
94
+ raise ValueError(f"Validation failed for '{name}': {e}")
95
+
96
+ # Fallback: Check function signature (Legacy)
97
+ sig = cls._signatures[name]
98
+
99
+ # Get function parameters (excluding 'context' and 'current' which are injected)
100
+ func_params = {k: v for k, v in sig.parameters.items() if k not in ["context", "current"]}
101
+
102
+ # Check for missing required parameters
103
+ missing = []
104
+ for param_name, param in func_params.items():
105
+ if param.default is inspect.Parameter.empty:
106
+ # Required parameter
107
+ if param_name not in params:
108
+ missing.append(param_name)
109
+
110
+ if missing:
111
+ raise ValueError(
112
+ f"Missing required parameters for function '{name}': {', '.join(missing)}"
113
+ )
114
+
115
+ # Check for unexpected parameters
116
+ unexpected = set(params.keys()) - set(func_params.keys())
117
+ if unexpected:
118
+ raise ValueError(
119
+ f"Unexpected parameters for function '{name}': {', '.join(unexpected)}"
120
+ )
121
+
122
+ @classmethod
123
+ def list_functions(cls) -> list[str]:
124
+ """List all registered function names.
125
+
126
+ Returns:
127
+ List of function names
128
+ """
129
+ return list(cls._functions.keys())
130
+
131
+ @classmethod
132
+ def get_function_info(cls, name: str) -> Dict[str, Any]:
133
+ """Get detailed information about a registered function.
134
+
135
+ Args:
136
+ name: Function name
137
+
138
+ Returns:
139
+ Dictionary with function metadata
140
+ """
141
+ if name not in cls._functions:
142
+ raise ValueError(f"Function '{name}' not registered")
143
+
144
+ func = cls._functions[name]
145
+ sig = cls._signatures[name]
146
+
147
+ # Extract parameter info
148
+ params_info = {}
149
+ for param_name, param in sig.parameters.items():
150
+ if param_name == "context":
151
+ continue # Skip context param
152
+
153
+ param_info = {
154
+ "required": param.default is inspect.Parameter.empty,
155
+ "default": None if param.default is inspect.Parameter.empty else param.default,
156
+ "annotation": (
157
+ param.annotation if param.annotation != inspect.Parameter.empty else None
158
+ ),
159
+ }
160
+ params_info[param_name] = param_info
161
+
162
+ return {
163
+ "name": name,
164
+ "docstring": inspect.getdoc(func),
165
+ "parameters": params_info,
166
+ "return_annotation": (
167
+ sig.return_annotation if sig.return_annotation != inspect.Signature.empty else None
168
+ ),
169
+ }
170
+
171
+
172
+ def transform(name_or_func: Union[str, Callable] = None, **kwargs) -> Callable:
173
+ """Decorator to register a transform function.
174
+
175
+ Usage:
176
+ @transform
177
+ def my_transform(...): ...
178
+
179
+ @transform("my_name")
180
+ def my_transform(...): ...
181
+
182
+ @transform(name="my_name", category="foo")
183
+ def my_transform(...): ...
184
+
185
+ Args:
186
+ name_or_func: Function (if used without args) or Name (if used with args)
187
+ **kwargs: Additional metadata (ignored for now)
188
+
189
+ Returns:
190
+ The decorated function
191
+ """
192
+
193
+ # If called with keyword args only (e.g. @transform(name="foo")), name_or_func might be None
194
+ if name_or_func is None and "name" in kwargs:
195
+ name_or_func = kwargs["name"]
196
+
197
+ def _register(func, name=None):
198
+ @wraps(func)
199
+ def wrapper(*args, **kwargs):
200
+ return func(*args, **kwargs)
201
+
202
+ # Register the function
203
+ # If name passed to decorator is None, use func.__name__
204
+ # But FunctionRegistry.register handles None name by using func.__name__
205
+ # However, we want to use the explicit name if provided.
206
+ reg_name = name or func.__name__
207
+
208
+ # Extract param_model from kwargs (captured from decorator args)
209
+ # Note: kwargs here are from the outer scope (transform arguments), NOT wrapper args
210
+ # Wait, _register closes over kwargs from transform(..., **kwargs)
211
+ param_model = kwargs.get("param_model")
212
+
213
+ FunctionRegistry.register(wrapper, name=reg_name, param_model=param_model)
214
+ return wrapper
215
+
216
+ if callable(name_or_func):
217
+ # Called as @transform
218
+ return _register(name_or_func)
219
+ else:
220
+ # Called as @transform("name") or @transform(name="name")
221
+ def decorator(func):
222
+ return _register(func, name=name_or_func)
223
+
224
+ return decorator
225
+
226
+
227
+ def get_registered_function(name: str) -> Callable:
228
+ """Get a registered transform function.
229
+
230
+ Args:
231
+ name: Function name
232
+
233
+ Returns:
234
+ The registered function
235
+ """
236
+ return FunctionRegistry.get(name)
237
+
238
+
239
+ def validate_function_params(name: str, params: Dict[str, Any]) -> None:
240
+ """Validate parameters for a registered function.
241
+
242
+ Args:
243
+ name: Function name
244
+ params: Parameters to validate
245
+ """
246
+ FunctionRegistry.validate_params(name, params)
@@ -0,0 +1,71 @@
1
+ """
2
+ Semantic Layer Module
3
+ =====================
4
+
5
+ This module provides a semantic layer for defining and querying metrics.
6
+
7
+ Features:
8
+ - Define metrics in YAML (expressions, filters, source tables)
9
+ - Query interface: "revenue BY region, month"
10
+ - Materialize metrics on schedule
11
+ - Dimension hierarchies and drill-down
12
+
13
+ Core Components:
14
+ - MetricDefinition: Pydantic models for metric/dimension definitions
15
+ - SemanticQuery: Parse and execute "metric BY dimensions" queries
16
+ - Materialize: Execute and persist materialized aggregations
17
+
18
+ Example Config (in odibi.yaml):
19
+ metrics:
20
+ - name: revenue
21
+ description: "Total revenue from completed orders"
22
+ expr: "SUM(total_amount)"
23
+ source: fact_orders
24
+ filters:
25
+ - "status = 'completed'"
26
+
27
+ dimensions:
28
+ - name: order_date
29
+ source: dim_date
30
+ hierarchy: [year, quarter, month, full_date]
31
+
32
+ materializations:
33
+ - name: monthly_revenue_by_region
34
+ metrics: [revenue, order_count]
35
+ dimensions: [region, month]
36
+ schedule: "0 2 1 * *"
37
+ output: gold/agg_monthly_revenue
38
+ """
39
+
40
+ from odibi.semantics.materialize import Materializer
41
+ from odibi.semantics.metrics import (
42
+ DimensionDefinition,
43
+ MaterializationConfig,
44
+ MetricDefinition,
45
+ SemanticLayerConfig,
46
+ ViewConfig,
47
+ parse_semantic_config,
48
+ )
49
+ from odibi.semantics.query import SemanticQuery
50
+ from odibi.semantics.runner import SemanticLayerRunner, run_semantic_layer
51
+ from odibi.semantics.story import SemanticStoryGenerator, SemanticStoryMetadata
52
+ from odibi.semantics.views import ViewExecutionResult, ViewGenerator, ViewResult
53
+
54
+ __all__ = [
55
+ "MetricDefinition",
56
+ "DimensionDefinition",
57
+ "MaterializationConfig",
58
+ "SemanticLayerConfig",
59
+ "ViewConfig",
60
+ "parse_semantic_config",
61
+ "SemanticQuery",
62
+ "Materializer",
63
+ "ViewGenerator",
64
+ "ViewResult",
65
+ "ViewExecutionResult",
66
+ "SemanticStoryGenerator",
67
+ "SemanticStoryMetadata",
68
+ "SemanticLayerRunner",
69
+ "run_semantic_layer",
70
+ ]
71
+ __version__ = "1.1.0"