odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lineage Utilities
|
|
3
|
+
=================
|
|
4
|
+
|
|
5
|
+
Shared utilities for generating combined lineage from pipeline stories.
|
|
6
|
+
|
|
7
|
+
This module provides helper functions that can be used by both PipelineManager
|
|
8
|
+
and SemanticLayerRunner to generate lineage without tight coupling.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Any, Callable, Dict, Optional
|
|
12
|
+
|
|
13
|
+
from odibi.config import ProjectConfig
|
|
14
|
+
from odibi.story.lineage import LineageGenerator, LineageResult
|
|
15
|
+
from odibi.utils.logging_context import get_logging_context
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_full_stories_path(project_config: ProjectConfig) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Build the full path to stories, including cloud URL if remote.
|
|
21
|
+
|
|
22
|
+
Converts relative paths like "OEE/Stories/" to full cloud URLs:
|
|
23
|
+
- Azure: abfs://container@account.dfs.core.windows.net/OEE/Stories/
|
|
24
|
+
- S3: s3://bucket/OEE/Stories/
|
|
25
|
+
- GCS: gs://bucket/OEE/Stories/
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
project_config: Project configuration with story settings
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Full stories path (local or remote URL)
|
|
32
|
+
"""
|
|
33
|
+
stories_path = project_config.story.path
|
|
34
|
+
|
|
35
|
+
# Already a full URL
|
|
36
|
+
if "://" in stories_path:
|
|
37
|
+
return stories_path
|
|
38
|
+
|
|
39
|
+
# Get story connection info
|
|
40
|
+
story_conn_name = project_config.story.connection
|
|
41
|
+
story_conn = project_config.connections.get(story_conn_name)
|
|
42
|
+
|
|
43
|
+
if not story_conn:
|
|
44
|
+
return stories_path
|
|
45
|
+
|
|
46
|
+
conn_type = getattr(story_conn, "type", None)
|
|
47
|
+
if conn_type is None:
|
|
48
|
+
return stories_path
|
|
49
|
+
|
|
50
|
+
conn_type_value = conn_type.value if hasattr(conn_type, "value") else str(conn_type)
|
|
51
|
+
|
|
52
|
+
# Strip leading/trailing slashes for clean path construction
|
|
53
|
+
clean_path = stories_path.strip("/")
|
|
54
|
+
|
|
55
|
+
# Azure Blob Storage / Delta Lake
|
|
56
|
+
if conn_type_value in ("azure_blob", "delta"):
|
|
57
|
+
account_name = getattr(story_conn, "account_name", None)
|
|
58
|
+
container = getattr(story_conn, "container", None)
|
|
59
|
+
|
|
60
|
+
if account_name and container:
|
|
61
|
+
return f"abfs://{container}@{account_name}.dfs.core.windows.net/{clean_path}"
|
|
62
|
+
|
|
63
|
+
# AWS S3
|
|
64
|
+
elif conn_type_value in ("s3", "aws_s3"):
|
|
65
|
+
bucket = getattr(story_conn, "bucket", None)
|
|
66
|
+
|
|
67
|
+
if bucket:
|
|
68
|
+
return f"s3://{bucket}/{clean_path}"
|
|
69
|
+
|
|
70
|
+
# Google Cloud Storage
|
|
71
|
+
elif conn_type_value in ("gcs", "google_cloud_storage"):
|
|
72
|
+
bucket = getattr(story_conn, "bucket", None)
|
|
73
|
+
|
|
74
|
+
if bucket:
|
|
75
|
+
return f"gs://{bucket}/{clean_path}"
|
|
76
|
+
|
|
77
|
+
# HDFS
|
|
78
|
+
elif conn_type_value == "hdfs":
|
|
79
|
+
host = getattr(story_conn, "host", None)
|
|
80
|
+
port = getattr(story_conn, "port", 8020)
|
|
81
|
+
|
|
82
|
+
if host:
|
|
83
|
+
return f"hdfs://{host}:{port}/{clean_path}"
|
|
84
|
+
|
|
85
|
+
# DBFS (Databricks File System)
|
|
86
|
+
elif conn_type_value == "dbfs":
|
|
87
|
+
return f"dbfs:/{clean_path}"
|
|
88
|
+
|
|
89
|
+
return stories_path
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_storage_options(project_config: ProjectConfig) -> Dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Get storage options from story connection for fsspec/adlfs.
|
|
95
|
+
|
|
96
|
+
Handles all Azure auth modes:
|
|
97
|
+
- account_key / direct_key: Returns account_key for fsspec
|
|
98
|
+
- sas: Returns sas_token for fsspec
|
|
99
|
+
- connection_string: Returns connection_string for fsspec
|
|
100
|
+
- aad_msi / managed_identity: Returns empty dict (uses default Azure credential)
|
|
101
|
+
- key_vault: Would need to fetch secret (not implemented here)
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
project_config: Project configuration with story connection
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Dict of storage options for fsspec
|
|
108
|
+
"""
|
|
109
|
+
ctx = get_logging_context()
|
|
110
|
+
story_conn_name = project_config.story.connection
|
|
111
|
+
story_conn = project_config.connections.get(story_conn_name)
|
|
112
|
+
|
|
113
|
+
if not story_conn:
|
|
114
|
+
return {}
|
|
115
|
+
|
|
116
|
+
# Check for direct credentials on connection
|
|
117
|
+
if hasattr(story_conn, "credentials") and story_conn.credentials:
|
|
118
|
+
return dict(story_conn.credentials)
|
|
119
|
+
if hasattr(story_conn, "account_key") and story_conn.account_key:
|
|
120
|
+
return {"account_key": story_conn.account_key}
|
|
121
|
+
if hasattr(story_conn, "sas_token") and story_conn.sas_token:
|
|
122
|
+
return {"sas_token": story_conn.sas_token}
|
|
123
|
+
|
|
124
|
+
# Check nested auth structure
|
|
125
|
+
if hasattr(story_conn, "auth") and story_conn.auth:
|
|
126
|
+
auth = story_conn.auth
|
|
127
|
+
|
|
128
|
+
# Helper to get value from auth (handles both dict and Pydantic model)
|
|
129
|
+
def get_auth_value(key: str):
|
|
130
|
+
if isinstance(auth, dict):
|
|
131
|
+
return auth.get(key)
|
|
132
|
+
return getattr(auth, key, None)
|
|
133
|
+
|
|
134
|
+
auth_mode = get_auth_value("mode")
|
|
135
|
+
if auth_mode:
|
|
136
|
+
mode_value = auth_mode.value if hasattr(auth_mode, "value") else str(auth_mode)
|
|
137
|
+
else:
|
|
138
|
+
mode_value = None
|
|
139
|
+
|
|
140
|
+
# account_key or direct_key mode
|
|
141
|
+
account_key = get_auth_value("account_key")
|
|
142
|
+
if account_key:
|
|
143
|
+
return {"account_key": account_key}
|
|
144
|
+
|
|
145
|
+
# SAS token mode
|
|
146
|
+
sas_token = get_auth_value("sas_token")
|
|
147
|
+
if sas_token:
|
|
148
|
+
return {"sas_token": sas_token}
|
|
149
|
+
|
|
150
|
+
# Connection string mode
|
|
151
|
+
connection_string = get_auth_value("connection_string")
|
|
152
|
+
if connection_string:
|
|
153
|
+
return {"connection_string": connection_string}
|
|
154
|
+
|
|
155
|
+
# MSI / managed identity - uses DefaultAzureCredential, no explicit creds needed
|
|
156
|
+
if mode_value in ("aad_msi", "managed_identity"):
|
|
157
|
+
# Return account_name for adlfs to use with DefaultAzureCredential
|
|
158
|
+
account_name = getattr(story_conn, "account_name", None)
|
|
159
|
+
if account_name:
|
|
160
|
+
return {"account_name": account_name}
|
|
161
|
+
return {}
|
|
162
|
+
|
|
163
|
+
# Key Vault mode - would need to fetch from Key Vault
|
|
164
|
+
if mode_value == "key_vault":
|
|
165
|
+
ctx.warning(
|
|
166
|
+
"Key Vault auth for story storage not yet implemented. "
|
|
167
|
+
"Consider using direct_key or aad_msi for story connection."
|
|
168
|
+
)
|
|
169
|
+
return {}
|
|
170
|
+
|
|
171
|
+
return {}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_write_file(project_config: ProjectConfig) -> Optional[Callable[[str, str], None]]:
|
|
175
|
+
"""
|
|
176
|
+
Create a write_file callback for remote storage using story connection.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
project_config: Project configuration with story connection
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Callable for writing files, or None if local storage
|
|
183
|
+
"""
|
|
184
|
+
storage_options = get_storage_options(project_config)
|
|
185
|
+
|
|
186
|
+
story_conn_name = project_config.story.connection
|
|
187
|
+
story_conn = project_config.connections.get(story_conn_name)
|
|
188
|
+
|
|
189
|
+
if not story_conn:
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
conn_type = getattr(story_conn, "type", None)
|
|
193
|
+
if conn_type is None:
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
conn_type_value = conn_type.value if hasattr(conn_type, "value") else str(conn_type)
|
|
197
|
+
|
|
198
|
+
if conn_type_value == "local":
|
|
199
|
+
base_path = getattr(story_conn, "base_path", "./data")
|
|
200
|
+
|
|
201
|
+
def write_file_local(path: str, content: str) -> None:
|
|
202
|
+
import os
|
|
203
|
+
|
|
204
|
+
full_path = os.path.join(base_path, path)
|
|
205
|
+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
|
206
|
+
with open(full_path, "w") as f:
|
|
207
|
+
f.write(content)
|
|
208
|
+
|
|
209
|
+
return write_file_local
|
|
210
|
+
|
|
211
|
+
elif conn_type_value in ("azure_blob", "delta"):
|
|
212
|
+
if not storage_options:
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
account_name = getattr(story_conn, "account_name", None)
|
|
216
|
+
container = getattr(story_conn, "container", None)
|
|
217
|
+
|
|
218
|
+
if not account_name or not container:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
def write_file_azure(path: str, content: str) -> None:
|
|
222
|
+
import fsspec
|
|
223
|
+
|
|
224
|
+
if path.startswith(("abfs://", "az://")):
|
|
225
|
+
full_path = path
|
|
226
|
+
else:
|
|
227
|
+
full_path = f"abfs://{container}@{account_name}.dfs.core.windows.net/{path}"
|
|
228
|
+
|
|
229
|
+
fs_options = {"account_name": account_name, **storage_options}
|
|
230
|
+
fs = fsspec.filesystem("abfs", **fs_options)
|
|
231
|
+
with fs.open(full_path, "w") as f:
|
|
232
|
+
f.write(content)
|
|
233
|
+
|
|
234
|
+
return write_file_azure
|
|
235
|
+
|
|
236
|
+
elif conn_type_value in ("s3", "aws_s3"):
|
|
237
|
+
bucket = getattr(story_conn, "bucket", None)
|
|
238
|
+
if not bucket:
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
def write_file_s3(path: str, content: str) -> None:
|
|
242
|
+
import fsspec
|
|
243
|
+
|
|
244
|
+
if path.startswith("s3://"):
|
|
245
|
+
full_path = path
|
|
246
|
+
else:
|
|
247
|
+
full_path = f"s3://{bucket}/{path}"
|
|
248
|
+
|
|
249
|
+
fs = fsspec.filesystem("s3", **storage_options)
|
|
250
|
+
with fs.open(full_path, "w") as f:
|
|
251
|
+
f.write(content)
|
|
252
|
+
|
|
253
|
+
return write_file_s3
|
|
254
|
+
|
|
255
|
+
elif conn_type_value in ("gcs", "google_cloud_storage"):
|
|
256
|
+
bucket = getattr(story_conn, "bucket", None)
|
|
257
|
+
if not bucket:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
def write_file_gcs(path: str, content: str) -> None:
|
|
261
|
+
import fsspec
|
|
262
|
+
|
|
263
|
+
if path.startswith("gs://"):
|
|
264
|
+
full_path = path
|
|
265
|
+
else:
|
|
266
|
+
full_path = f"gs://{bucket}/{path}"
|
|
267
|
+
|
|
268
|
+
fs = fsspec.filesystem("gcs", **storage_options)
|
|
269
|
+
with fs.open(full_path, "w") as f:
|
|
270
|
+
f.write(content)
|
|
271
|
+
|
|
272
|
+
return write_file_gcs
|
|
273
|
+
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def generate_lineage(
|
|
278
|
+
project_config: ProjectConfig,
|
|
279
|
+
date: Optional[str] = None,
|
|
280
|
+
write_file: Optional[Callable[[str, str], None]] = None,
|
|
281
|
+
) -> Optional[LineageResult]:
|
|
282
|
+
"""
|
|
283
|
+
Generate combined lineage from all pipeline stories.
|
|
284
|
+
|
|
285
|
+
This is a standalone function that can be called after any pipeline run
|
|
286
|
+
to generate cross-layer lineage stitching.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
project_config: Project configuration
|
|
290
|
+
date: Optional date string for lineage (defaults to today)
|
|
291
|
+
write_file: Optional callback for writing files to remote storage
|
|
292
|
+
(auto-created from story connection if not provided)
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
LineageResult if successful, None if generation fails
|
|
296
|
+
"""
|
|
297
|
+
ctx = get_logging_context()
|
|
298
|
+
|
|
299
|
+
stories_path = get_full_stories_path(project_config)
|
|
300
|
+
storage_options = get_storage_options(project_config)
|
|
301
|
+
|
|
302
|
+
# Auto-create write_file callback if not provided and using remote storage
|
|
303
|
+
if write_file is None:
|
|
304
|
+
write_file = get_write_file(project_config)
|
|
305
|
+
|
|
306
|
+
ctx.debug("Generating lineage", stories_path=stories_path)
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
lineage_gen = LineageGenerator(
|
|
310
|
+
stories_path=stories_path,
|
|
311
|
+
storage_options=storage_options,
|
|
312
|
+
)
|
|
313
|
+
result = lineage_gen.generate(date=date)
|
|
314
|
+
lineage_gen.save(result, write_file=write_file)
|
|
315
|
+
ctx.info(
|
|
316
|
+
"Lineage generated successfully",
|
|
317
|
+
nodes=len(result.nodes),
|
|
318
|
+
edges=len(result.edges),
|
|
319
|
+
layers=len(result.layers),
|
|
320
|
+
)
|
|
321
|
+
return result
|
|
322
|
+
except Exception as e:
|
|
323
|
+
ctx.warning(f"Failed to generate lineage: {e}")
|
|
324
|
+
return None
|