odibi 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. odibi/__init__.py +32 -0
  2. odibi/__main__.py +8 -0
  3. odibi/catalog.py +3011 -0
  4. odibi/cli/__init__.py +11 -0
  5. odibi/cli/__main__.py +6 -0
  6. odibi/cli/catalog.py +553 -0
  7. odibi/cli/deploy.py +69 -0
  8. odibi/cli/doctor.py +161 -0
  9. odibi/cli/export.py +66 -0
  10. odibi/cli/graph.py +150 -0
  11. odibi/cli/init_pipeline.py +242 -0
  12. odibi/cli/lineage.py +259 -0
  13. odibi/cli/main.py +215 -0
  14. odibi/cli/run.py +98 -0
  15. odibi/cli/schema.py +208 -0
  16. odibi/cli/secrets.py +232 -0
  17. odibi/cli/story.py +379 -0
  18. odibi/cli/system.py +132 -0
  19. odibi/cli/test.py +286 -0
  20. odibi/cli/ui.py +31 -0
  21. odibi/cli/validate.py +39 -0
  22. odibi/config.py +3541 -0
  23. odibi/connections/__init__.py +9 -0
  24. odibi/connections/azure_adls.py +499 -0
  25. odibi/connections/azure_sql.py +709 -0
  26. odibi/connections/base.py +28 -0
  27. odibi/connections/factory.py +322 -0
  28. odibi/connections/http.py +78 -0
  29. odibi/connections/local.py +119 -0
  30. odibi/connections/local_dbfs.py +61 -0
  31. odibi/constants.py +17 -0
  32. odibi/context.py +528 -0
  33. odibi/diagnostics/__init__.py +12 -0
  34. odibi/diagnostics/delta.py +520 -0
  35. odibi/diagnostics/diff.py +169 -0
  36. odibi/diagnostics/manager.py +171 -0
  37. odibi/engine/__init__.py +20 -0
  38. odibi/engine/base.py +334 -0
  39. odibi/engine/pandas_engine.py +2178 -0
  40. odibi/engine/polars_engine.py +1114 -0
  41. odibi/engine/registry.py +54 -0
  42. odibi/engine/spark_engine.py +2362 -0
  43. odibi/enums.py +7 -0
  44. odibi/exceptions.py +297 -0
  45. odibi/graph.py +426 -0
  46. odibi/introspect.py +1214 -0
  47. odibi/lineage.py +511 -0
  48. odibi/node.py +3341 -0
  49. odibi/orchestration/__init__.py +0 -0
  50. odibi/orchestration/airflow.py +90 -0
  51. odibi/orchestration/dagster.py +77 -0
  52. odibi/patterns/__init__.py +24 -0
  53. odibi/patterns/aggregation.py +599 -0
  54. odibi/patterns/base.py +94 -0
  55. odibi/patterns/date_dimension.py +423 -0
  56. odibi/patterns/dimension.py +696 -0
  57. odibi/patterns/fact.py +748 -0
  58. odibi/patterns/merge.py +128 -0
  59. odibi/patterns/scd2.py +148 -0
  60. odibi/pipeline.py +2382 -0
  61. odibi/plugins.py +80 -0
  62. odibi/project.py +581 -0
  63. odibi/references.py +151 -0
  64. odibi/registry.py +246 -0
  65. odibi/semantics/__init__.py +71 -0
  66. odibi/semantics/materialize.py +392 -0
  67. odibi/semantics/metrics.py +361 -0
  68. odibi/semantics/query.py +743 -0
  69. odibi/semantics/runner.py +430 -0
  70. odibi/semantics/story.py +507 -0
  71. odibi/semantics/views.py +432 -0
  72. odibi/state/__init__.py +1203 -0
  73. odibi/story/__init__.py +55 -0
  74. odibi/story/doc_story.py +554 -0
  75. odibi/story/generator.py +1431 -0
  76. odibi/story/lineage.py +1043 -0
  77. odibi/story/lineage_utils.py +324 -0
  78. odibi/story/metadata.py +608 -0
  79. odibi/story/renderers.py +453 -0
  80. odibi/story/templates/run_story.html +2520 -0
  81. odibi/story/themes.py +216 -0
  82. odibi/testing/__init__.py +13 -0
  83. odibi/testing/assertions.py +75 -0
  84. odibi/testing/fixtures.py +85 -0
  85. odibi/testing/source_pool.py +277 -0
  86. odibi/transformers/__init__.py +122 -0
  87. odibi/transformers/advanced.py +1472 -0
  88. odibi/transformers/delete_detection.py +610 -0
  89. odibi/transformers/manufacturing.py +1029 -0
  90. odibi/transformers/merge_transformer.py +778 -0
  91. odibi/transformers/relational.py +675 -0
  92. odibi/transformers/scd.py +579 -0
  93. odibi/transformers/sql_core.py +1356 -0
  94. odibi/transformers/validation.py +165 -0
  95. odibi/ui/__init__.py +0 -0
  96. odibi/ui/app.py +195 -0
  97. odibi/utils/__init__.py +66 -0
  98. odibi/utils/alerting.py +667 -0
  99. odibi/utils/config_loader.py +343 -0
  100. odibi/utils/console.py +231 -0
  101. odibi/utils/content_hash.py +202 -0
  102. odibi/utils/duration.py +43 -0
  103. odibi/utils/encoding.py +102 -0
  104. odibi/utils/extensions.py +28 -0
  105. odibi/utils/hashing.py +61 -0
  106. odibi/utils/logging.py +203 -0
  107. odibi/utils/logging_context.py +740 -0
  108. odibi/utils/progress.py +429 -0
  109. odibi/utils/setup_helpers.py +302 -0
  110. odibi/utils/telemetry.py +140 -0
  111. odibi/validation/__init__.py +62 -0
  112. odibi/validation/engine.py +765 -0
  113. odibi/validation/explanation_linter.py +155 -0
  114. odibi/validation/fk.py +547 -0
  115. odibi/validation/gate.py +252 -0
  116. odibi/validation/quarantine.py +605 -0
  117. odibi/writers/__init__.py +15 -0
  118. odibi/writers/sql_server_writer.py +2081 -0
  119. odibi-2.5.0.dist-info/METADATA +255 -0
  120. odibi-2.5.0.dist-info/RECORD +124 -0
  121. odibi-2.5.0.dist-info/WHEEL +5 -0
  122. odibi-2.5.0.dist-info/entry_points.txt +2 -0
  123. odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
  124. odibi-2.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,28 @@
1
+ """Base connection interface."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+
6
+ class BaseConnection(ABC):
7
+ """Abstract base class for connections."""
8
+
9
+ @abstractmethod
10
+ def get_path(self, relative_path: str) -> str:
11
+ """Get full path for a relative path.
12
+
13
+ Args:
14
+ relative_path: Relative path or table name
15
+
16
+ Returns:
17
+ Full path to resource
18
+ """
19
+ pass
20
+
21
+ @abstractmethod
22
+ def validate(self) -> None:
23
+ """Validate connection configuration.
24
+
25
+ Raises:
26
+ ConnectionError: If validation fails
27
+ """
28
+ pass
@@ -0,0 +1,322 @@
1
+ """Connection factory for built-in connection types."""
2
+
3
+ from typing import Any, Dict
4
+
5
+ from odibi.plugins import register_connection_factory
6
+ from odibi.utils.logging import logger
7
+ from odibi.utils.logging_context import get_logging_context
8
+
9
+
10
+ def create_local_connection(name: str, config: Dict[str, Any]) -> Any:
11
+ """Factory for LocalConnection."""
12
+ ctx = get_logging_context()
13
+ ctx.log_connection(connection_type="local", connection_name=name, action="create")
14
+
15
+ from odibi.connections.local import LocalConnection
16
+
17
+ base_path = config.get("base_path", "./data")
18
+ connection = LocalConnection(base_path=base_path)
19
+
20
+ ctx.log_connection(
21
+ connection_type="local", connection_name=name, action="created", base_path=base_path
22
+ )
23
+ return connection
24
+
25
+
26
+ def create_http_connection(name: str, config: Dict[str, Any]) -> Any:
27
+ """Factory for HttpConnection."""
28
+ ctx = get_logging_context()
29
+ ctx.log_connection(connection_type="http", connection_name=name, action="create")
30
+
31
+ from odibi.connections.http import HttpConnection
32
+
33
+ base_url = config.get("base_url", "")
34
+ connection = HttpConnection(
35
+ base_url=base_url,
36
+ headers=config.get("headers"),
37
+ auth=config.get("auth"),
38
+ )
39
+
40
+ ctx.log_connection(
41
+ connection_type="http", connection_name=name, action="created", base_url=base_url
42
+ )
43
+ return connection
44
+
45
+
46
+ def create_azure_blob_connection(name: str, config: Dict[str, Any]) -> Any:
47
+ """Factory for AzureADLS (Blob) Connection."""
48
+ ctx = get_logging_context()
49
+ ctx.log_connection(connection_type="azure_blob", connection_name=name, action="create")
50
+
51
+ try:
52
+ from odibi.connections.azure_adls import AzureADLS
53
+ except ImportError as e:
54
+ ctx.error(
55
+ f"Failed to import AzureADLS for connection '{name}'",
56
+ connection_name=name,
57
+ error=str(e),
58
+ )
59
+ raise ImportError(
60
+ "Azure ADLS support requires 'pip install odibi[azure]'. "
61
+ "See README.md for installation instructions."
62
+ )
63
+
64
+ # Handle config discrepancies
65
+ account = config.get("account_name") or config.get("account")
66
+ if not account:
67
+ ctx.error(
68
+ f"Connection '{name}' missing 'account_name'",
69
+ connection_name=name,
70
+ config_keys=list(config.keys()),
71
+ )
72
+ raise ValueError(
73
+ f"Connection '{name}' missing 'account_name'. "
74
+ f"Expected 'account_name' or 'account' in config, got keys: {list(config.keys())}"
75
+ )
76
+
77
+ auth_config = config.get("auth", {})
78
+
79
+ # Extract auth details
80
+ key_vault_name = auth_config.get("key_vault_name") or config.get("key_vault_name")
81
+ secret_name = auth_config.get("secret_name") or config.get("secret_name")
82
+ account_key = auth_config.get("account_key") or config.get("account_key")
83
+ sas_token = auth_config.get("sas_token") or config.get("sas_token")
84
+ tenant_id = auth_config.get("tenant_id") or config.get("tenant_id")
85
+ client_id = auth_config.get("client_id") or config.get("client_id")
86
+ client_secret = auth_config.get("client_secret") or config.get("client_secret")
87
+
88
+ auth_mode = auth_config.get("mode") or config.get("auth_mode", "key_vault")
89
+
90
+ # Auto-detect auth_mode if not explicitly set
91
+ if "auth_mode" not in config and "mode" not in auth_config:
92
+ if sas_token:
93
+ auth_mode = "sas_token"
94
+ elif key_vault_name and secret_name:
95
+ auth_mode = "key_vault"
96
+ elif account_key:
97
+ auth_mode = "direct_key"
98
+ elif tenant_id and client_id and client_secret:
99
+ auth_mode = "service_principal"
100
+ else:
101
+ auth_mode = "managed_identity"
102
+
103
+ ctx.debug(
104
+ f"Auto-detected auth_mode for connection '{name}'",
105
+ connection_name=name,
106
+ auth_mode=auth_mode,
107
+ )
108
+
109
+ validation_mode = config.get("validation_mode", "lazy")
110
+ validate = config.get("validate")
111
+ if validate is None:
112
+ validate = True if validation_mode == "eager" else False
113
+
114
+ # Register secrets (log that we're registering, not the values)
115
+ if account_key:
116
+ logger.register_secret(account_key)
117
+ ctx.debug(f"Registered account_key secret for connection '{name}'", connection_name=name)
118
+ if sas_token:
119
+ logger.register_secret(sas_token)
120
+ ctx.debug(f"Registered sas_token secret for connection '{name}'", connection_name=name)
121
+ if client_secret:
122
+ logger.register_secret(client_secret)
123
+ ctx.debug(f"Registered client_secret secret for connection '{name}'", connection_name=name)
124
+
125
+ try:
126
+ connection = AzureADLS(
127
+ account=account,
128
+ container=config["container"],
129
+ path_prefix=config.get("path_prefix", ""),
130
+ auth_mode=auth_mode,
131
+ key_vault_name=key_vault_name,
132
+ secret_name=secret_name,
133
+ account_key=account_key,
134
+ sas_token=sas_token,
135
+ tenant_id=tenant_id,
136
+ client_id=client_id,
137
+ client_secret=client_secret,
138
+ validate=validate,
139
+ )
140
+
141
+ ctx.log_connection(
142
+ connection_type="azure_blob",
143
+ connection_name=name,
144
+ action="created",
145
+ account=account,
146
+ container=config["container"],
147
+ auth_mode=auth_mode,
148
+ validation_mode=validation_mode,
149
+ )
150
+ return connection
151
+
152
+ except Exception as e:
153
+ ctx.error(
154
+ f"Failed to create Azure Blob connection '{name}'",
155
+ connection_name=name,
156
+ account=account,
157
+ container=config.get("container"),
158
+ auth_mode=auth_mode,
159
+ error=str(e),
160
+ )
161
+ raise
162
+
163
+
164
+ def create_delta_connection(name: str, config: Dict[str, Any]) -> Any:
165
+ """Factory for Delta Connection."""
166
+ ctx = get_logging_context()
167
+ ctx.log_connection(connection_type="delta", connection_name=name, action="create")
168
+
169
+ # Local path-based Delta
170
+ if "path" in config:
171
+ from odibi.connections.local import LocalConnection
172
+
173
+ base_path = config.get("path") or config.get("base_path")
174
+ connection = LocalConnection(base_path=base_path)
175
+
176
+ ctx.log_connection(
177
+ connection_type="delta",
178
+ connection_name=name,
179
+ action="created",
180
+ mode="local_path",
181
+ base_path=base_path,
182
+ )
183
+ return connection
184
+
185
+ # Catalog based (Spark only)
186
+ from odibi.connections.base import BaseConnection
187
+
188
+ class DeltaCatalogConnection(BaseConnection):
189
+ def __init__(self, catalog, schema):
190
+ self.catalog = catalog
191
+ self.schema = schema
192
+
193
+ def get_path(self, table):
194
+ return f"{self.catalog}.{self.schema}.{table}"
195
+
196
+ def validate(self):
197
+ pass
198
+
199
+ def pandas_storage_options(self):
200
+ return {}
201
+
202
+ catalog = config.get("catalog")
203
+ schema = config.get("schema") or "default"
204
+ connection = DeltaCatalogConnection(catalog=catalog, schema=schema)
205
+
206
+ ctx.log_connection(
207
+ connection_type="delta",
208
+ connection_name=name,
209
+ action="created",
210
+ mode="catalog",
211
+ catalog=catalog,
212
+ schema=schema,
213
+ )
214
+ return connection
215
+
216
+
217
+ def create_sql_server_connection(name: str, config: Dict[str, Any]) -> Any:
218
+ """Factory for SQL Server / Azure SQL Connection."""
219
+ ctx = get_logging_context()
220
+ ctx.log_connection(connection_type="sql_server", connection_name=name, action="create")
221
+
222
+ try:
223
+ from odibi.connections.azure_sql import AzureSQL
224
+ except ImportError as e:
225
+ ctx.error(
226
+ f"Failed to import AzureSQL for connection '{name}'",
227
+ connection_name=name,
228
+ error=str(e),
229
+ )
230
+ raise ImportError(
231
+ "Azure SQL support requires 'pip install odibi[azure]'. "
232
+ "See README.md for installation instructions."
233
+ )
234
+
235
+ server = config.get("host") or config.get("server")
236
+ if not server:
237
+ ctx.error(
238
+ f"Connection '{name}' missing 'host' or 'server'",
239
+ connection_name=name,
240
+ config_keys=list(config.keys()),
241
+ )
242
+ raise ValueError(
243
+ f"Connection '{name}' missing 'host' or 'server'. " f"Got keys: {list(config.keys())}"
244
+ )
245
+
246
+ auth_config = config.get("auth", {})
247
+ username = auth_config.get("username") or config.get("username")
248
+ password = auth_config.get("password") or config.get("password")
249
+ key_vault_name = auth_config.get("key_vault_name") or config.get("key_vault_name")
250
+ secret_name = auth_config.get("secret_name") or config.get("secret_name")
251
+
252
+ auth_mode = config.get("auth_mode")
253
+ if not auth_mode:
254
+ if username and password:
255
+ auth_mode = "sql"
256
+ elif key_vault_name and secret_name and username:
257
+ auth_mode = "key_vault"
258
+ else:
259
+ auth_mode = "aad_msi"
260
+
261
+ ctx.debug(
262
+ f"Auto-detected auth_mode for connection '{name}'",
263
+ connection_name=name,
264
+ auth_mode=auth_mode,
265
+ )
266
+
267
+ if password:
268
+ logger.register_secret(password)
269
+ ctx.debug(f"Registered password secret for connection '{name}'", connection_name=name)
270
+
271
+ try:
272
+ connection = AzureSQL(
273
+ server=server,
274
+ database=config["database"],
275
+ driver=config.get("driver", "ODBC Driver 18 for SQL Server"),
276
+ username=username,
277
+ password=password,
278
+ auth_mode=auth_mode,
279
+ key_vault_name=key_vault_name,
280
+ secret_name=secret_name,
281
+ port=config.get("port", 1433),
282
+ timeout=config.get("timeout", 30),
283
+ )
284
+
285
+ ctx.log_connection(
286
+ connection_type="sql_server",
287
+ connection_name=name,
288
+ action="created",
289
+ server=server,
290
+ database=config["database"],
291
+ auth_mode=auth_mode,
292
+ port=config.get("port", 1433),
293
+ )
294
+ return connection
295
+
296
+ except Exception as e:
297
+ ctx.error(
298
+ f"Failed to create SQL Server connection '{name}'",
299
+ connection_name=name,
300
+ server=server,
301
+ database=config.get("database"),
302
+ auth_mode=auth_mode,
303
+ error=str(e),
304
+ )
305
+ raise
306
+
307
+
308
+ def register_builtins():
309
+ """Register all built-in connection factories."""
310
+ register_connection_factory("local", create_local_connection)
311
+ register_connection_factory("http", create_http_connection)
312
+
313
+ # Azure Blob / ADLS
314
+ register_connection_factory("azure_blob", create_azure_blob_connection)
315
+ register_connection_factory("azure_adls", create_azure_blob_connection)
316
+
317
+ # Delta
318
+ register_connection_factory("delta", create_delta_connection)
319
+
320
+ # SQL
321
+ register_connection_factory("sql_server", create_sql_server_connection)
322
+ register_connection_factory("azure_sql", create_sql_server_connection)
@@ -0,0 +1,78 @@
1
+ """HTTP Connection implementation."""
2
+
3
+ from typing import Any, Dict, Optional
4
+ from urllib.parse import urljoin
5
+
6
+ from odibi.connections.base import BaseConnection
7
+
8
+
9
+ class HttpConnection(BaseConnection):
10
+ """Connection to HTTP/HTTPS APIs."""
11
+
12
+ def __init__(
13
+ self,
14
+ base_url: str,
15
+ headers: Optional[Dict[str, str]] = None,
16
+ auth: Optional[Dict[str, str]] = None,
17
+ validate: bool = True,
18
+ ):
19
+ """Initialize HTTP connection.
20
+
21
+ Args:
22
+ base_url: Base URL for API
23
+ headers: Default headers
24
+ auth: Authentication details
25
+ validate: Whether to validate connection (ping)
26
+ """
27
+ self.base_url = base_url.rstrip("/") + "/"
28
+ self.headers = headers or {}
29
+
30
+ if auth:
31
+ if "token" in auth:
32
+ self.headers["Authorization"] = f"Bearer {auth['token']}"
33
+ elif "username" in auth and "password" in auth:
34
+ import base64
35
+
36
+ creds = f"{auth['username']}:{auth['password']}"
37
+ b64_creds = base64.b64encode(creds.encode()).decode()
38
+ self.headers["Authorization"] = f"Basic {b64_creds}"
39
+ elif "api_key" in auth:
40
+ # Common pattern: X-API-Key header or similar
41
+ header_name = auth.get("header_name", "X-API-Key")
42
+ self.headers[header_name] = auth["api_key"]
43
+
44
+ if validate:
45
+ self.validate()
46
+
47
+ def validate(self) -> None:
48
+ """Validate connection configuration.
49
+
50
+ Raises:
51
+ ValueError: If validation fails
52
+ """
53
+ if not self.base_url:
54
+ raise ValueError("HTTP connection requires 'base_url'")
55
+
56
+ def get_path(self, path: str) -> str:
57
+ """Resolve endpoint path.
58
+
59
+ Args:
60
+ path: API endpoint (e.g., 'v1/users')
61
+
62
+ Returns:
63
+ Full URL
64
+ """
65
+ if path.startswith("http://") or path.startswith("https://"):
66
+ return path
67
+
68
+ # urljoin can be tricky if base_url doesn't end with /
69
+ return urljoin(self.base_url, path.lstrip("/"))
70
+
71
+ def pandas_storage_options(self) -> Dict[str, Any]:
72
+ """Get storage options for Pandas/fsspec.
73
+
74
+ Returns:
75
+ Dictionary with headers
76
+ """
77
+ # For HTTP(S) in Pandas (urllib), storage_options ARE the headers.
78
+ return self.headers
@@ -0,0 +1,119 @@
1
+ """Local filesystem connection."""
2
+
3
+ from pathlib import Path
4
+
5
+ from odibi.connections.base import BaseConnection
6
+ from odibi.utils.logging_context import get_logging_context
7
+
8
+
9
+ class LocalConnection(BaseConnection):
10
+ """Connection to local filesystem or URI-based paths (e.g. dbfs:/, file://)."""
11
+
12
+ def __init__(self, base_path: str = "./data"):
13
+ """Initialize local connection.
14
+
15
+ Args:
16
+ base_path: Base directory for all paths (can be local path or URI)
17
+ """
18
+ ctx = get_logging_context()
19
+ ctx.log_connection(
20
+ connection_type="local",
21
+ connection_name="LocalConnection",
22
+ action="init",
23
+ base_path=base_path,
24
+ )
25
+
26
+ self.base_path_str = base_path
27
+ self.is_uri = "://" in base_path or ":/" in base_path
28
+
29
+ if not self.is_uri:
30
+ self.base_path = Path(base_path)
31
+ ctx.debug(
32
+ "LocalConnection initialized with filesystem path",
33
+ base_path=base_path,
34
+ is_uri=False,
35
+ )
36
+ else:
37
+ self.base_path = None # Not used for URIs
38
+ ctx.debug(
39
+ "LocalConnection initialized with URI path",
40
+ base_path=base_path,
41
+ is_uri=True,
42
+ )
43
+
44
+ def get_path(self, relative_path: str) -> str:
45
+ """Get full path for a relative path.
46
+
47
+ Args:
48
+ relative_path: Relative path from base
49
+
50
+ Returns:
51
+ Full absolute path or URI
52
+ """
53
+ ctx = get_logging_context()
54
+
55
+ if self.is_uri:
56
+ # Use os.path for simple string joining, handling slashes manually for consistency
57
+ # Strip leading slash from relative to avoid root replacement
58
+ clean_rel = relative_path.lstrip("/").lstrip("\\")
59
+ # Handle cases where base_path might not have trailing slash
60
+ if self.base_path_str.endswith("/") or self.base_path_str.endswith("\\"):
61
+ full_path = f"{self.base_path_str}{clean_rel}"
62
+ else:
63
+ # Use forward slash for URIs
64
+ full_path = f"{self.base_path_str}/{clean_rel}"
65
+
66
+ ctx.debug(
67
+ "Resolved URI path",
68
+ relative_path=relative_path,
69
+ full_path=full_path,
70
+ )
71
+ return full_path
72
+ else:
73
+ # Standard local path logic
74
+ full_path = self.base_path / relative_path
75
+ resolved = str(full_path.absolute())
76
+
77
+ ctx.debug(
78
+ "Resolved local path",
79
+ relative_path=relative_path,
80
+ full_path=resolved,
81
+ )
82
+ return resolved
83
+
84
+ def validate(self) -> None:
85
+ """Validate that base path exists or can be created.
86
+
87
+ Raises:
88
+ ConnectionError: If validation fails
89
+ """
90
+ ctx = get_logging_context()
91
+ ctx.debug(
92
+ "Validating LocalConnection",
93
+ base_path=self.base_path_str,
94
+ is_uri=self.is_uri,
95
+ )
96
+
97
+ if self.is_uri:
98
+ # Cannot validate/create URIs with local os module
99
+ # Assume valid or handled by engine
100
+ ctx.debug(
101
+ "Skipping URI validation (handled by engine)",
102
+ base_path=self.base_path_str,
103
+ )
104
+ else:
105
+ # Create base directory if it doesn't exist
106
+ try:
107
+ self.base_path.mkdir(parents=True, exist_ok=True)
108
+ ctx.info(
109
+ "LocalConnection validated successfully",
110
+ base_path=str(self.base_path.absolute()),
111
+ created=not self.base_path.exists(),
112
+ )
113
+ except Exception as e:
114
+ ctx.error(
115
+ "LocalConnection validation failed",
116
+ base_path=self.base_path_str,
117
+ error=str(e),
118
+ )
119
+ raise
@@ -0,0 +1,61 @@
1
+ """Local DBFS mock for testing Databricks pipelines locally."""
2
+
3
+ from pathlib import Path
4
+ from typing import Union
5
+
6
+ from .base import BaseConnection
7
+
8
+
9
+ class LocalDBFS(BaseConnection):
10
+ """Mock DBFS connection for local development.
11
+
12
+ Maps dbfs:/ paths to local filesystem for testing.
13
+ Useful for developing Databricks pipelines locally.
14
+ """
15
+
16
+ def __init__(self, root: Union[str, Path] = ".dbfs"):
17
+ """Initialize local DBFS mock.
18
+
19
+ Args:
20
+ root: Local directory to use as DBFS root (default: .dbfs)
21
+ """
22
+ self.root = Path(root).resolve()
23
+
24
+ def resolve(self, path: str) -> str:
25
+ """Resolve dbfs:/ path to local filesystem path.
26
+
27
+ Args:
28
+ path: DBFS path (e.g., 'dbfs:/FileStore/data.csv')
29
+
30
+ Returns:
31
+ Absolute local filesystem path
32
+
33
+ Example:
34
+ >>> conn = LocalDBFS(root="/tmp/dbfs")
35
+ >>> conn.resolve("dbfs:/FileStore/data.csv")
36
+ '/tmp/dbfs/FileStore/data.csv'
37
+ """
38
+ # Remove dbfs:/ prefix
39
+ clean_path = path.replace("dbfs:/", "").lstrip("/")
40
+
41
+ # Join with root
42
+ local_path = self.root / clean_path
43
+
44
+ return str(local_path)
45
+
46
+ def ensure_dir(self, path: str) -> None:
47
+ """Create parent directories for given path.
48
+
49
+ Args:
50
+ path: DBFS path
51
+ """
52
+ local_path = Path(self.resolve(path))
53
+ local_path.parent.mkdir(parents=True, exist_ok=True)
54
+
55
+ def get_path(self, relative_path: str) -> str:
56
+ """Get local filesystem path for DBFS path."""
57
+ return self.resolve(relative_path)
58
+
59
+ def validate(self) -> None:
60
+ """Validate local DBFS configuration."""
61
+ pass # No validation needed for local mock
odibi/constants.py ADDED
@@ -0,0 +1,17 @@
1
+ """Constants used throughout the Odibi framework."""
2
+
3
+ # Delta table maintenance
4
+ DEFAULT_VACUUM_RETENTION_HOURS = 168 # 7 days
5
+
6
+ # SQL operations
7
+ DEFAULT_SQL_CHUNK_SIZE = 1000
8
+
9
+ # Connection timeouts
10
+ DEFAULT_CONNECTION_TIMEOUT = 30 # seconds
11
+ DEFAULT_KEY_VAULT_TIMEOUT = 30.0 # seconds
12
+
13
+ # Story generation
14
+ DEFAULT_MAX_SAMPLE_ROWS = 10
15
+
16
+ # Delta history
17
+ DEFAULT_HISTORY_LIMIT = 100