odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,324 @@
1
+ """
2
+ Lineage Utilities
3
+ =================
4
+
5
+ Shared utilities for generating combined lineage from pipeline stories.
6
+
7
+ This module provides helper functions that can be used by both PipelineManager
8
+ and SemanticLayerRunner to generate lineage without tight coupling.
9
+ """
10
+
11
+ from typing import Any, Callable, Dict, Optional
12
+
13
+ from odibi.config import ProjectConfig
14
+ from odibi.story.lineage import LineageGenerator, LineageResult
15
+ from odibi.utils.logging_context import get_logging_context
16
+
17
+
18
+ def get_full_stories_path(project_config: ProjectConfig) -> str:
19
+ """
20
+ Build the full path to stories, including cloud URL if remote.
21
+
22
+ Converts relative paths like "OEE/Stories/" to full cloud URLs:
23
+ - Azure: abfs://container@account.dfs.core.windows.net/OEE/Stories/
24
+ - S3: s3://bucket/OEE/Stories/
25
+ - GCS: gs://bucket/OEE/Stories/
26
+
27
+ Args:
28
+ project_config: Project configuration with story settings
29
+
30
+ Returns:
31
+ Full stories path (local or remote URL)
32
+ """
33
+ stories_path = project_config.story.path
34
+
35
+ # Already a full URL
36
+ if "://" in stories_path:
37
+ return stories_path
38
+
39
+ # Get story connection info
40
+ story_conn_name = project_config.story.connection
41
+ story_conn = project_config.connections.get(story_conn_name)
42
+
43
+ if not story_conn:
44
+ return stories_path
45
+
46
+ conn_type = getattr(story_conn, "type", None)
47
+ if conn_type is None:
48
+ return stories_path
49
+
50
+ conn_type_value = conn_type.value if hasattr(conn_type, "value") else str(conn_type)
51
+
52
+ # Strip leading/trailing slashes for clean path construction
53
+ clean_path = stories_path.strip("/")
54
+
55
+ # Azure Blob Storage / Delta Lake
56
+ if conn_type_value in ("azure_blob", "delta"):
57
+ account_name = getattr(story_conn, "account_name", None)
58
+ container = getattr(story_conn, "container", None)
59
+
60
+ if account_name and container:
61
+ return f"abfs://{container}@{account_name}.dfs.core.windows.net/{clean_path}"
62
+
63
+ # AWS S3
64
+ elif conn_type_value in ("s3", "aws_s3"):
65
+ bucket = getattr(story_conn, "bucket", None)
66
+
67
+ if bucket:
68
+ return f"s3://{bucket}/{clean_path}"
69
+
70
+ # Google Cloud Storage
71
+ elif conn_type_value in ("gcs", "google_cloud_storage"):
72
+ bucket = getattr(story_conn, "bucket", None)
73
+
74
+ if bucket:
75
+ return f"gs://{bucket}/{clean_path}"
76
+
77
+ # HDFS
78
+ elif conn_type_value == "hdfs":
79
+ host = getattr(story_conn, "host", None)
80
+ port = getattr(story_conn, "port", 8020)
81
+
82
+ if host:
83
+ return f"hdfs://{host}:{port}/{clean_path}"
84
+
85
+ # DBFS (Databricks File System)
86
+ elif conn_type_value == "dbfs":
87
+ return f"dbfs:/{clean_path}"
88
+
89
+ return stories_path
90
+
91
+
92
+ def get_storage_options(project_config: ProjectConfig) -> Dict[str, Any]:
93
+ """
94
+ Get storage options from story connection for fsspec/adlfs.
95
+
96
+ Handles all Azure auth modes:
97
+ - account_key / direct_key: Returns account_key for fsspec
98
+ - sas: Returns sas_token for fsspec
99
+ - connection_string: Returns connection_string for fsspec
100
+ - aad_msi / managed_identity: Returns empty dict (uses default Azure credential)
101
+ - key_vault: Would need to fetch secret (not implemented here)
102
+
103
+ Args:
104
+ project_config: Project configuration with story connection
105
+
106
+ Returns:
107
+ Dict of storage options for fsspec
108
+ """
109
+ ctx = get_logging_context()
110
+ story_conn_name = project_config.story.connection
111
+ story_conn = project_config.connections.get(story_conn_name)
112
+
113
+ if not story_conn:
114
+ return {}
115
+
116
+ # Check for direct credentials on connection
117
+ if hasattr(story_conn, "credentials") and story_conn.credentials:
118
+ return dict(story_conn.credentials)
119
+ if hasattr(story_conn, "account_key") and story_conn.account_key:
120
+ return {"account_key": story_conn.account_key}
121
+ if hasattr(story_conn, "sas_token") and story_conn.sas_token:
122
+ return {"sas_token": story_conn.sas_token}
123
+
124
+ # Check nested auth structure
125
+ if hasattr(story_conn, "auth") and story_conn.auth:
126
+ auth = story_conn.auth
127
+
128
+ # Helper to get value from auth (handles both dict and Pydantic model)
129
+ def get_auth_value(key: str):
130
+ if isinstance(auth, dict):
131
+ return auth.get(key)
132
+ return getattr(auth, key, None)
133
+
134
+ auth_mode = get_auth_value("mode")
135
+ if auth_mode:
136
+ mode_value = auth_mode.value if hasattr(auth_mode, "value") else str(auth_mode)
137
+ else:
138
+ mode_value = None
139
+
140
+ # account_key or direct_key mode
141
+ account_key = get_auth_value("account_key")
142
+ if account_key:
143
+ return {"account_key": account_key}
144
+
145
+ # SAS token mode
146
+ sas_token = get_auth_value("sas_token")
147
+ if sas_token:
148
+ return {"sas_token": sas_token}
149
+
150
+ # Connection string mode
151
+ connection_string = get_auth_value("connection_string")
152
+ if connection_string:
153
+ return {"connection_string": connection_string}
154
+
155
+ # MSI / managed identity - uses DefaultAzureCredential, no explicit creds needed
156
+ if mode_value in ("aad_msi", "managed_identity"):
157
+ # Return account_name for adlfs to use with DefaultAzureCredential
158
+ account_name = getattr(story_conn, "account_name", None)
159
+ if account_name:
160
+ return {"account_name": account_name}
161
+ return {}
162
+
163
+ # Key Vault mode - would need to fetch from Key Vault
164
+ if mode_value == "key_vault":
165
+ ctx.warning(
166
+ "Key Vault auth for story storage not yet implemented. "
167
+ "Consider using direct_key or aad_msi for story connection."
168
+ )
169
+ return {}
170
+
171
+ return {}
172
+
173
+
174
+ def get_write_file(project_config: ProjectConfig) -> Optional[Callable[[str, str], None]]:
175
+ """
176
+ Create a write_file callback for remote storage using story connection.
177
+
178
+ Args:
179
+ project_config: Project configuration with story connection
180
+
181
+ Returns:
182
+ Callable for writing files, or None if local storage
183
+ """
184
+ storage_options = get_storage_options(project_config)
185
+
186
+ story_conn_name = project_config.story.connection
187
+ story_conn = project_config.connections.get(story_conn_name)
188
+
189
+ if not story_conn:
190
+ return None
191
+
192
+ conn_type = getattr(story_conn, "type", None)
193
+ if conn_type is None:
194
+ return None
195
+
196
+ conn_type_value = conn_type.value if hasattr(conn_type, "value") else str(conn_type)
197
+
198
+ if conn_type_value == "local":
199
+ base_path = getattr(story_conn, "base_path", "./data")
200
+
201
+ def write_file_local(path: str, content: str) -> None:
202
+ import os
203
+
204
+ full_path = os.path.join(base_path, path)
205
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
206
+ with open(full_path, "w") as f:
207
+ f.write(content)
208
+
209
+ return write_file_local
210
+
211
+ elif conn_type_value in ("azure_blob", "delta"):
212
+ if not storage_options:
213
+ return None
214
+
215
+ account_name = getattr(story_conn, "account_name", None)
216
+ container = getattr(story_conn, "container", None)
217
+
218
+ if not account_name or not container:
219
+ return None
220
+
221
+ def write_file_azure(path: str, content: str) -> None:
222
+ import fsspec
223
+
224
+ if path.startswith(("abfs://", "az://")):
225
+ full_path = path
226
+ else:
227
+ full_path = f"abfs://{container}@{account_name}.dfs.core.windows.net/{path}"
228
+
229
+ fs_options = {"account_name": account_name, **storage_options}
230
+ fs = fsspec.filesystem("abfs", **fs_options)
231
+ with fs.open(full_path, "w") as f:
232
+ f.write(content)
233
+
234
+ return write_file_azure
235
+
236
+ elif conn_type_value in ("s3", "aws_s3"):
237
+ bucket = getattr(story_conn, "bucket", None)
238
+ if not bucket:
239
+ return None
240
+
241
+ def write_file_s3(path: str, content: str) -> None:
242
+ import fsspec
243
+
244
+ if path.startswith("s3://"):
245
+ full_path = path
246
+ else:
247
+ full_path = f"s3://{bucket}/{path}"
248
+
249
+ fs = fsspec.filesystem("s3", **storage_options)
250
+ with fs.open(full_path, "w") as f:
251
+ f.write(content)
252
+
253
+ return write_file_s3
254
+
255
+ elif conn_type_value in ("gcs", "google_cloud_storage"):
256
+ bucket = getattr(story_conn, "bucket", None)
257
+ if not bucket:
258
+ return None
259
+
260
+ def write_file_gcs(path: str, content: str) -> None:
261
+ import fsspec
262
+
263
+ if path.startswith("gs://"):
264
+ full_path = path
265
+ else:
266
+ full_path = f"gs://{bucket}/{path}"
267
+
268
+ fs = fsspec.filesystem("gcs", **storage_options)
269
+ with fs.open(full_path, "w") as f:
270
+ f.write(content)
271
+
272
+ return write_file_gcs
273
+
274
+ return None
275
+
276
+
277
+ def generate_lineage(
278
+ project_config: ProjectConfig,
279
+ date: Optional[str] = None,
280
+ write_file: Optional[Callable[[str, str], None]] = None,
281
+ ) -> Optional[LineageResult]:
282
+ """
283
+ Generate combined lineage from all pipeline stories.
284
+
285
+ This is a standalone function that can be called after any pipeline run
286
+ to generate cross-layer lineage stitching.
287
+
288
+ Args:
289
+ project_config: Project configuration
290
+ date: Optional date string for lineage (defaults to today)
291
+ write_file: Optional callback for writing files to remote storage
292
+ (auto-created from story connection if not provided)
293
+
294
+ Returns:
295
+ LineageResult if successful, None if generation fails
296
+ """
297
+ ctx = get_logging_context()
298
+
299
+ stories_path = get_full_stories_path(project_config)
300
+ storage_options = get_storage_options(project_config)
301
+
302
+ # Auto-create write_file callback if not provided and using remote storage
303
+ if write_file is None:
304
+ write_file = get_write_file(project_config)
305
+
306
+ ctx.debug("Generating lineage", stories_path=stories_path)
307
+
308
+ try:
309
+ lineage_gen = LineageGenerator(
310
+ stories_path=stories_path,
311
+ storage_options=storage_options,
312
+ )
313
+ result = lineage_gen.generate(date=date)
314
+ lineage_gen.save(result, write_file=write_file)
315
+ ctx.info(
316
+ "Lineage generated successfully",
317
+ nodes=len(result.nodes),
318
+ edges=len(result.edges),
319
+ layers=len(result.layers),
320
+ )
321
+ return result
322
+ except Exception as e:
323
+ ctx.warning(f"Failed to generate lineage: {e}")
324
+ return None