odibi 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odibi/__init__.py +32 -0
- odibi/__main__.py +8 -0
- odibi/catalog.py +3011 -0
- odibi/cli/__init__.py +11 -0
- odibi/cli/__main__.py +6 -0
- odibi/cli/catalog.py +553 -0
- odibi/cli/deploy.py +69 -0
- odibi/cli/doctor.py +161 -0
- odibi/cli/export.py +66 -0
- odibi/cli/graph.py +150 -0
- odibi/cli/init_pipeline.py +242 -0
- odibi/cli/lineage.py +259 -0
- odibi/cli/main.py +215 -0
- odibi/cli/run.py +98 -0
- odibi/cli/schema.py +208 -0
- odibi/cli/secrets.py +232 -0
- odibi/cli/story.py +379 -0
- odibi/cli/system.py +132 -0
- odibi/cli/test.py +286 -0
- odibi/cli/ui.py +31 -0
- odibi/cli/validate.py +39 -0
- odibi/config.py +3541 -0
- odibi/connections/__init__.py +9 -0
- odibi/connections/azure_adls.py +499 -0
- odibi/connections/azure_sql.py +709 -0
- odibi/connections/base.py +28 -0
- odibi/connections/factory.py +322 -0
- odibi/connections/http.py +78 -0
- odibi/connections/local.py +119 -0
- odibi/connections/local_dbfs.py +61 -0
- odibi/constants.py +17 -0
- odibi/context.py +528 -0
- odibi/diagnostics/__init__.py +12 -0
- odibi/diagnostics/delta.py +520 -0
- odibi/diagnostics/diff.py +169 -0
- odibi/diagnostics/manager.py +171 -0
- odibi/engine/__init__.py +20 -0
- odibi/engine/base.py +334 -0
- odibi/engine/pandas_engine.py +2178 -0
- odibi/engine/polars_engine.py +1114 -0
- odibi/engine/registry.py +54 -0
- odibi/engine/spark_engine.py +2362 -0
- odibi/enums.py +7 -0
- odibi/exceptions.py +297 -0
- odibi/graph.py +426 -0
- odibi/introspect.py +1214 -0
- odibi/lineage.py +511 -0
- odibi/node.py +3341 -0
- odibi/orchestration/__init__.py +0 -0
- odibi/orchestration/airflow.py +90 -0
- odibi/orchestration/dagster.py +77 -0
- odibi/patterns/__init__.py +24 -0
- odibi/patterns/aggregation.py +599 -0
- odibi/patterns/base.py +94 -0
- odibi/patterns/date_dimension.py +423 -0
- odibi/patterns/dimension.py +696 -0
- odibi/patterns/fact.py +748 -0
- odibi/patterns/merge.py +128 -0
- odibi/patterns/scd2.py +148 -0
- odibi/pipeline.py +2382 -0
- odibi/plugins.py +80 -0
- odibi/project.py +581 -0
- odibi/references.py +151 -0
- odibi/registry.py +246 -0
- odibi/semantics/__init__.py +71 -0
- odibi/semantics/materialize.py +392 -0
- odibi/semantics/metrics.py +361 -0
- odibi/semantics/query.py +743 -0
- odibi/semantics/runner.py +430 -0
- odibi/semantics/story.py +507 -0
- odibi/semantics/views.py +432 -0
- odibi/state/__init__.py +1203 -0
- odibi/story/__init__.py +55 -0
- odibi/story/doc_story.py +554 -0
- odibi/story/generator.py +1431 -0
- odibi/story/lineage.py +1043 -0
- odibi/story/lineage_utils.py +324 -0
- odibi/story/metadata.py +608 -0
- odibi/story/renderers.py +453 -0
- odibi/story/templates/run_story.html +2520 -0
- odibi/story/themes.py +216 -0
- odibi/testing/__init__.py +13 -0
- odibi/testing/assertions.py +75 -0
- odibi/testing/fixtures.py +85 -0
- odibi/testing/source_pool.py +277 -0
- odibi/transformers/__init__.py +122 -0
- odibi/transformers/advanced.py +1472 -0
- odibi/transformers/delete_detection.py +610 -0
- odibi/transformers/manufacturing.py +1029 -0
- odibi/transformers/merge_transformer.py +778 -0
- odibi/transformers/relational.py +675 -0
- odibi/transformers/scd.py +579 -0
- odibi/transformers/sql_core.py +1356 -0
- odibi/transformers/validation.py +165 -0
- odibi/ui/__init__.py +0 -0
- odibi/ui/app.py +195 -0
- odibi/utils/__init__.py +66 -0
- odibi/utils/alerting.py +667 -0
- odibi/utils/config_loader.py +343 -0
- odibi/utils/console.py +231 -0
- odibi/utils/content_hash.py +202 -0
- odibi/utils/duration.py +43 -0
- odibi/utils/encoding.py +102 -0
- odibi/utils/extensions.py +28 -0
- odibi/utils/hashing.py +61 -0
- odibi/utils/logging.py +203 -0
- odibi/utils/logging_context.py +740 -0
- odibi/utils/progress.py +429 -0
- odibi/utils/setup_helpers.py +302 -0
- odibi/utils/telemetry.py +140 -0
- odibi/validation/__init__.py +62 -0
- odibi/validation/engine.py +765 -0
- odibi/validation/explanation_linter.py +155 -0
- odibi/validation/fk.py +547 -0
- odibi/validation/gate.py +252 -0
- odibi/validation/quarantine.py +605 -0
- odibi/writers/__init__.py +15 -0
- odibi/writers/sql_server_writer.py +2081 -0
- odibi-2.5.0.dist-info/METADATA +255 -0
- odibi-2.5.0.dist-info/RECORD +124 -0
- odibi-2.5.0.dist-info/WHEEL +5 -0
- odibi-2.5.0.dist-info/entry_points.txt +2 -0
- odibi-2.5.0.dist-info/licenses/LICENSE +190 -0
- odibi-2.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Base connection interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseConnection(ABC):
|
|
7
|
+
"""Abstract base class for connections."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def get_path(self, relative_path: str) -> str:
|
|
11
|
+
"""Get full path for a relative path.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
relative_path: Relative path or table name
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Full path to resource
|
|
18
|
+
"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def validate(self) -> None:
|
|
23
|
+
"""Validate connection configuration.
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ConnectionError: If validation fails
|
|
27
|
+
"""
|
|
28
|
+
pass
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Connection factory for built-in connection types."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from odibi.plugins import register_connection_factory
|
|
6
|
+
from odibi.utils.logging import logger
|
|
7
|
+
from odibi.utils.logging_context import get_logging_context
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_local_connection(name: str, config: Dict[str, Any]) -> Any:
|
|
11
|
+
"""Factory for LocalConnection."""
|
|
12
|
+
ctx = get_logging_context()
|
|
13
|
+
ctx.log_connection(connection_type="local", connection_name=name, action="create")
|
|
14
|
+
|
|
15
|
+
from odibi.connections.local import LocalConnection
|
|
16
|
+
|
|
17
|
+
base_path = config.get("base_path", "./data")
|
|
18
|
+
connection = LocalConnection(base_path=base_path)
|
|
19
|
+
|
|
20
|
+
ctx.log_connection(
|
|
21
|
+
connection_type="local", connection_name=name, action="created", base_path=base_path
|
|
22
|
+
)
|
|
23
|
+
return connection
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_http_connection(name: str, config: Dict[str, Any]) -> Any:
|
|
27
|
+
"""Factory for HttpConnection."""
|
|
28
|
+
ctx = get_logging_context()
|
|
29
|
+
ctx.log_connection(connection_type="http", connection_name=name, action="create")
|
|
30
|
+
|
|
31
|
+
from odibi.connections.http import HttpConnection
|
|
32
|
+
|
|
33
|
+
base_url = config.get("base_url", "")
|
|
34
|
+
connection = HttpConnection(
|
|
35
|
+
base_url=base_url,
|
|
36
|
+
headers=config.get("headers"),
|
|
37
|
+
auth=config.get("auth"),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
ctx.log_connection(
|
|
41
|
+
connection_type="http", connection_name=name, action="created", base_url=base_url
|
|
42
|
+
)
|
|
43
|
+
return connection
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def create_azure_blob_connection(name: str, config: Dict[str, Any]) -> Any:
|
|
47
|
+
"""Factory for AzureADLS (Blob) Connection."""
|
|
48
|
+
ctx = get_logging_context()
|
|
49
|
+
ctx.log_connection(connection_type="azure_blob", connection_name=name, action="create")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
from odibi.connections.azure_adls import AzureADLS
|
|
53
|
+
except ImportError as e:
|
|
54
|
+
ctx.error(
|
|
55
|
+
f"Failed to import AzureADLS for connection '{name}'",
|
|
56
|
+
connection_name=name,
|
|
57
|
+
error=str(e),
|
|
58
|
+
)
|
|
59
|
+
raise ImportError(
|
|
60
|
+
"Azure ADLS support requires 'pip install odibi[azure]'. "
|
|
61
|
+
"See README.md for installation instructions."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Handle config discrepancies
|
|
65
|
+
account = config.get("account_name") or config.get("account")
|
|
66
|
+
if not account:
|
|
67
|
+
ctx.error(
|
|
68
|
+
f"Connection '{name}' missing 'account_name'",
|
|
69
|
+
connection_name=name,
|
|
70
|
+
config_keys=list(config.keys()),
|
|
71
|
+
)
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Connection '{name}' missing 'account_name'. "
|
|
74
|
+
f"Expected 'account_name' or 'account' in config, got keys: {list(config.keys())}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
auth_config = config.get("auth", {})
|
|
78
|
+
|
|
79
|
+
# Extract auth details
|
|
80
|
+
key_vault_name = auth_config.get("key_vault_name") or config.get("key_vault_name")
|
|
81
|
+
secret_name = auth_config.get("secret_name") or config.get("secret_name")
|
|
82
|
+
account_key = auth_config.get("account_key") or config.get("account_key")
|
|
83
|
+
sas_token = auth_config.get("sas_token") or config.get("sas_token")
|
|
84
|
+
tenant_id = auth_config.get("tenant_id") or config.get("tenant_id")
|
|
85
|
+
client_id = auth_config.get("client_id") or config.get("client_id")
|
|
86
|
+
client_secret = auth_config.get("client_secret") or config.get("client_secret")
|
|
87
|
+
|
|
88
|
+
auth_mode = auth_config.get("mode") or config.get("auth_mode", "key_vault")
|
|
89
|
+
|
|
90
|
+
# Auto-detect auth_mode if not explicitly set
|
|
91
|
+
if "auth_mode" not in config and "mode" not in auth_config:
|
|
92
|
+
if sas_token:
|
|
93
|
+
auth_mode = "sas_token"
|
|
94
|
+
elif key_vault_name and secret_name:
|
|
95
|
+
auth_mode = "key_vault"
|
|
96
|
+
elif account_key:
|
|
97
|
+
auth_mode = "direct_key"
|
|
98
|
+
elif tenant_id and client_id and client_secret:
|
|
99
|
+
auth_mode = "service_principal"
|
|
100
|
+
else:
|
|
101
|
+
auth_mode = "managed_identity"
|
|
102
|
+
|
|
103
|
+
ctx.debug(
|
|
104
|
+
f"Auto-detected auth_mode for connection '{name}'",
|
|
105
|
+
connection_name=name,
|
|
106
|
+
auth_mode=auth_mode,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
validation_mode = config.get("validation_mode", "lazy")
|
|
110
|
+
validate = config.get("validate")
|
|
111
|
+
if validate is None:
|
|
112
|
+
validate = True if validation_mode == "eager" else False
|
|
113
|
+
|
|
114
|
+
# Register secrets (log that we're registering, not the values)
|
|
115
|
+
if account_key:
|
|
116
|
+
logger.register_secret(account_key)
|
|
117
|
+
ctx.debug(f"Registered account_key secret for connection '{name}'", connection_name=name)
|
|
118
|
+
if sas_token:
|
|
119
|
+
logger.register_secret(sas_token)
|
|
120
|
+
ctx.debug(f"Registered sas_token secret for connection '{name}'", connection_name=name)
|
|
121
|
+
if client_secret:
|
|
122
|
+
logger.register_secret(client_secret)
|
|
123
|
+
ctx.debug(f"Registered client_secret secret for connection '{name}'", connection_name=name)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
connection = AzureADLS(
|
|
127
|
+
account=account,
|
|
128
|
+
container=config["container"],
|
|
129
|
+
path_prefix=config.get("path_prefix", ""),
|
|
130
|
+
auth_mode=auth_mode,
|
|
131
|
+
key_vault_name=key_vault_name,
|
|
132
|
+
secret_name=secret_name,
|
|
133
|
+
account_key=account_key,
|
|
134
|
+
sas_token=sas_token,
|
|
135
|
+
tenant_id=tenant_id,
|
|
136
|
+
client_id=client_id,
|
|
137
|
+
client_secret=client_secret,
|
|
138
|
+
validate=validate,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
ctx.log_connection(
|
|
142
|
+
connection_type="azure_blob",
|
|
143
|
+
connection_name=name,
|
|
144
|
+
action="created",
|
|
145
|
+
account=account,
|
|
146
|
+
container=config["container"],
|
|
147
|
+
auth_mode=auth_mode,
|
|
148
|
+
validation_mode=validation_mode,
|
|
149
|
+
)
|
|
150
|
+
return connection
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
ctx.error(
|
|
154
|
+
f"Failed to create Azure Blob connection '{name}'",
|
|
155
|
+
connection_name=name,
|
|
156
|
+
account=account,
|
|
157
|
+
container=config.get("container"),
|
|
158
|
+
auth_mode=auth_mode,
|
|
159
|
+
error=str(e),
|
|
160
|
+
)
|
|
161
|
+
raise
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def create_delta_connection(name: str, config: Dict[str, Any]) -> Any:
|
|
165
|
+
"""Factory for Delta Connection."""
|
|
166
|
+
ctx = get_logging_context()
|
|
167
|
+
ctx.log_connection(connection_type="delta", connection_name=name, action="create")
|
|
168
|
+
|
|
169
|
+
# Local path-based Delta
|
|
170
|
+
if "path" in config:
|
|
171
|
+
from odibi.connections.local import LocalConnection
|
|
172
|
+
|
|
173
|
+
base_path = config.get("path") or config.get("base_path")
|
|
174
|
+
connection = LocalConnection(base_path=base_path)
|
|
175
|
+
|
|
176
|
+
ctx.log_connection(
|
|
177
|
+
connection_type="delta",
|
|
178
|
+
connection_name=name,
|
|
179
|
+
action="created",
|
|
180
|
+
mode="local_path",
|
|
181
|
+
base_path=base_path,
|
|
182
|
+
)
|
|
183
|
+
return connection
|
|
184
|
+
|
|
185
|
+
# Catalog based (Spark only)
|
|
186
|
+
from odibi.connections.base import BaseConnection
|
|
187
|
+
|
|
188
|
+
class DeltaCatalogConnection(BaseConnection):
|
|
189
|
+
def __init__(self, catalog, schema):
|
|
190
|
+
self.catalog = catalog
|
|
191
|
+
self.schema = schema
|
|
192
|
+
|
|
193
|
+
def get_path(self, table):
|
|
194
|
+
return f"{self.catalog}.{self.schema}.{table}"
|
|
195
|
+
|
|
196
|
+
def validate(self):
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
def pandas_storage_options(self):
|
|
200
|
+
return {}
|
|
201
|
+
|
|
202
|
+
catalog = config.get("catalog")
|
|
203
|
+
schema = config.get("schema") or "default"
|
|
204
|
+
connection = DeltaCatalogConnection(catalog=catalog, schema=schema)
|
|
205
|
+
|
|
206
|
+
ctx.log_connection(
|
|
207
|
+
connection_type="delta",
|
|
208
|
+
connection_name=name,
|
|
209
|
+
action="created",
|
|
210
|
+
mode="catalog",
|
|
211
|
+
catalog=catalog,
|
|
212
|
+
schema=schema,
|
|
213
|
+
)
|
|
214
|
+
return connection
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def create_sql_server_connection(name: str, config: Dict[str, Any]) -> Any:
|
|
218
|
+
"""Factory for SQL Server / Azure SQL Connection."""
|
|
219
|
+
ctx = get_logging_context()
|
|
220
|
+
ctx.log_connection(connection_type="sql_server", connection_name=name, action="create")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
from odibi.connections.azure_sql import AzureSQL
|
|
224
|
+
except ImportError as e:
|
|
225
|
+
ctx.error(
|
|
226
|
+
f"Failed to import AzureSQL for connection '{name}'",
|
|
227
|
+
connection_name=name,
|
|
228
|
+
error=str(e),
|
|
229
|
+
)
|
|
230
|
+
raise ImportError(
|
|
231
|
+
"Azure SQL support requires 'pip install odibi[azure]'. "
|
|
232
|
+
"See README.md for installation instructions."
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
server = config.get("host") or config.get("server")
|
|
236
|
+
if not server:
|
|
237
|
+
ctx.error(
|
|
238
|
+
f"Connection '{name}' missing 'host' or 'server'",
|
|
239
|
+
connection_name=name,
|
|
240
|
+
config_keys=list(config.keys()),
|
|
241
|
+
)
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"Connection '{name}' missing 'host' or 'server'. " f"Got keys: {list(config.keys())}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
auth_config = config.get("auth", {})
|
|
247
|
+
username = auth_config.get("username") or config.get("username")
|
|
248
|
+
password = auth_config.get("password") or config.get("password")
|
|
249
|
+
key_vault_name = auth_config.get("key_vault_name") or config.get("key_vault_name")
|
|
250
|
+
secret_name = auth_config.get("secret_name") or config.get("secret_name")
|
|
251
|
+
|
|
252
|
+
auth_mode = config.get("auth_mode")
|
|
253
|
+
if not auth_mode:
|
|
254
|
+
if username and password:
|
|
255
|
+
auth_mode = "sql"
|
|
256
|
+
elif key_vault_name and secret_name and username:
|
|
257
|
+
auth_mode = "key_vault"
|
|
258
|
+
else:
|
|
259
|
+
auth_mode = "aad_msi"
|
|
260
|
+
|
|
261
|
+
ctx.debug(
|
|
262
|
+
f"Auto-detected auth_mode for connection '{name}'",
|
|
263
|
+
connection_name=name,
|
|
264
|
+
auth_mode=auth_mode,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
if password:
|
|
268
|
+
logger.register_secret(password)
|
|
269
|
+
ctx.debug(f"Registered password secret for connection '{name}'", connection_name=name)
|
|
270
|
+
|
|
271
|
+
try:
|
|
272
|
+
connection = AzureSQL(
|
|
273
|
+
server=server,
|
|
274
|
+
database=config["database"],
|
|
275
|
+
driver=config.get("driver", "ODBC Driver 18 for SQL Server"),
|
|
276
|
+
username=username,
|
|
277
|
+
password=password,
|
|
278
|
+
auth_mode=auth_mode,
|
|
279
|
+
key_vault_name=key_vault_name,
|
|
280
|
+
secret_name=secret_name,
|
|
281
|
+
port=config.get("port", 1433),
|
|
282
|
+
timeout=config.get("timeout", 30),
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
ctx.log_connection(
|
|
286
|
+
connection_type="sql_server",
|
|
287
|
+
connection_name=name,
|
|
288
|
+
action="created",
|
|
289
|
+
server=server,
|
|
290
|
+
database=config["database"],
|
|
291
|
+
auth_mode=auth_mode,
|
|
292
|
+
port=config.get("port", 1433),
|
|
293
|
+
)
|
|
294
|
+
return connection
|
|
295
|
+
|
|
296
|
+
except Exception as e:
|
|
297
|
+
ctx.error(
|
|
298
|
+
f"Failed to create SQL Server connection '{name}'",
|
|
299
|
+
connection_name=name,
|
|
300
|
+
server=server,
|
|
301
|
+
database=config.get("database"),
|
|
302
|
+
auth_mode=auth_mode,
|
|
303
|
+
error=str(e),
|
|
304
|
+
)
|
|
305
|
+
raise
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def register_builtins():
|
|
309
|
+
"""Register all built-in connection factories."""
|
|
310
|
+
register_connection_factory("local", create_local_connection)
|
|
311
|
+
register_connection_factory("http", create_http_connection)
|
|
312
|
+
|
|
313
|
+
# Azure Blob / ADLS
|
|
314
|
+
register_connection_factory("azure_blob", create_azure_blob_connection)
|
|
315
|
+
register_connection_factory("azure_adls", create_azure_blob_connection)
|
|
316
|
+
|
|
317
|
+
# Delta
|
|
318
|
+
register_connection_factory("delta", create_delta_connection)
|
|
319
|
+
|
|
320
|
+
# SQL
|
|
321
|
+
register_connection_factory("sql_server", create_sql_server_connection)
|
|
322
|
+
register_connection_factory("azure_sql", create_sql_server_connection)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""HTTP Connection implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
from odibi.connections.base import BaseConnection
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HttpConnection(BaseConnection):
|
|
10
|
+
"""Connection to HTTP/HTTPS APIs."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
base_url: str,
|
|
15
|
+
headers: Optional[Dict[str, str]] = None,
|
|
16
|
+
auth: Optional[Dict[str, str]] = None,
|
|
17
|
+
validate: bool = True,
|
|
18
|
+
):
|
|
19
|
+
"""Initialize HTTP connection.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
base_url: Base URL for API
|
|
23
|
+
headers: Default headers
|
|
24
|
+
auth: Authentication details
|
|
25
|
+
validate: Whether to validate connection (ping)
|
|
26
|
+
"""
|
|
27
|
+
self.base_url = base_url.rstrip("/") + "/"
|
|
28
|
+
self.headers = headers or {}
|
|
29
|
+
|
|
30
|
+
if auth:
|
|
31
|
+
if "token" in auth:
|
|
32
|
+
self.headers["Authorization"] = f"Bearer {auth['token']}"
|
|
33
|
+
elif "username" in auth and "password" in auth:
|
|
34
|
+
import base64
|
|
35
|
+
|
|
36
|
+
creds = f"{auth['username']}:{auth['password']}"
|
|
37
|
+
b64_creds = base64.b64encode(creds.encode()).decode()
|
|
38
|
+
self.headers["Authorization"] = f"Basic {b64_creds}"
|
|
39
|
+
elif "api_key" in auth:
|
|
40
|
+
# Common pattern: X-API-Key header or similar
|
|
41
|
+
header_name = auth.get("header_name", "X-API-Key")
|
|
42
|
+
self.headers[header_name] = auth["api_key"]
|
|
43
|
+
|
|
44
|
+
if validate:
|
|
45
|
+
self.validate()
|
|
46
|
+
|
|
47
|
+
def validate(self) -> None:
|
|
48
|
+
"""Validate connection configuration.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValueError: If validation fails
|
|
52
|
+
"""
|
|
53
|
+
if not self.base_url:
|
|
54
|
+
raise ValueError("HTTP connection requires 'base_url'")
|
|
55
|
+
|
|
56
|
+
def get_path(self, path: str) -> str:
|
|
57
|
+
"""Resolve endpoint path.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path: API endpoint (e.g., 'v1/users')
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Full URL
|
|
64
|
+
"""
|
|
65
|
+
if path.startswith("http://") or path.startswith("https://"):
|
|
66
|
+
return path
|
|
67
|
+
|
|
68
|
+
# urljoin can be tricky if base_url doesn't end with /
|
|
69
|
+
return urljoin(self.base_url, path.lstrip("/"))
|
|
70
|
+
|
|
71
|
+
def pandas_storage_options(self) -> Dict[str, Any]:
|
|
72
|
+
"""Get storage options for Pandas/fsspec.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dictionary with headers
|
|
76
|
+
"""
|
|
77
|
+
# For HTTP(S) in Pandas (urllib), storage_options ARE the headers.
|
|
78
|
+
return self.headers
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Local filesystem connection."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from odibi.connections.base import BaseConnection
|
|
6
|
+
from odibi.utils.logging_context import get_logging_context
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LocalConnection(BaseConnection):
|
|
10
|
+
"""Connection to local filesystem or URI-based paths (e.g. dbfs:/, file://)."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, base_path: str = "./data"):
|
|
13
|
+
"""Initialize local connection.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
base_path: Base directory for all paths (can be local path or URI)
|
|
17
|
+
"""
|
|
18
|
+
ctx = get_logging_context()
|
|
19
|
+
ctx.log_connection(
|
|
20
|
+
connection_type="local",
|
|
21
|
+
connection_name="LocalConnection",
|
|
22
|
+
action="init",
|
|
23
|
+
base_path=base_path,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
self.base_path_str = base_path
|
|
27
|
+
self.is_uri = "://" in base_path or ":/" in base_path
|
|
28
|
+
|
|
29
|
+
if not self.is_uri:
|
|
30
|
+
self.base_path = Path(base_path)
|
|
31
|
+
ctx.debug(
|
|
32
|
+
"LocalConnection initialized with filesystem path",
|
|
33
|
+
base_path=base_path,
|
|
34
|
+
is_uri=False,
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
self.base_path = None # Not used for URIs
|
|
38
|
+
ctx.debug(
|
|
39
|
+
"LocalConnection initialized with URI path",
|
|
40
|
+
base_path=base_path,
|
|
41
|
+
is_uri=True,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def get_path(self, relative_path: str) -> str:
|
|
45
|
+
"""Get full path for a relative path.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
relative_path: Relative path from base
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Full absolute path or URI
|
|
52
|
+
"""
|
|
53
|
+
ctx = get_logging_context()
|
|
54
|
+
|
|
55
|
+
if self.is_uri:
|
|
56
|
+
# Use os.path for simple string joining, handling slashes manually for consistency
|
|
57
|
+
# Strip leading slash from relative to avoid root replacement
|
|
58
|
+
clean_rel = relative_path.lstrip("/").lstrip("\\")
|
|
59
|
+
# Handle cases where base_path might not have trailing slash
|
|
60
|
+
if self.base_path_str.endswith("/") or self.base_path_str.endswith("\\"):
|
|
61
|
+
full_path = f"{self.base_path_str}{clean_rel}"
|
|
62
|
+
else:
|
|
63
|
+
# Use forward slash for URIs
|
|
64
|
+
full_path = f"{self.base_path_str}/{clean_rel}"
|
|
65
|
+
|
|
66
|
+
ctx.debug(
|
|
67
|
+
"Resolved URI path",
|
|
68
|
+
relative_path=relative_path,
|
|
69
|
+
full_path=full_path,
|
|
70
|
+
)
|
|
71
|
+
return full_path
|
|
72
|
+
else:
|
|
73
|
+
# Standard local path logic
|
|
74
|
+
full_path = self.base_path / relative_path
|
|
75
|
+
resolved = str(full_path.absolute())
|
|
76
|
+
|
|
77
|
+
ctx.debug(
|
|
78
|
+
"Resolved local path",
|
|
79
|
+
relative_path=relative_path,
|
|
80
|
+
full_path=resolved,
|
|
81
|
+
)
|
|
82
|
+
return resolved
|
|
83
|
+
|
|
84
|
+
def validate(self) -> None:
|
|
85
|
+
"""Validate that base path exists or can be created.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ConnectionError: If validation fails
|
|
89
|
+
"""
|
|
90
|
+
ctx = get_logging_context()
|
|
91
|
+
ctx.debug(
|
|
92
|
+
"Validating LocalConnection",
|
|
93
|
+
base_path=self.base_path_str,
|
|
94
|
+
is_uri=self.is_uri,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if self.is_uri:
|
|
98
|
+
# Cannot validate/create URIs with local os module
|
|
99
|
+
# Assume valid or handled by engine
|
|
100
|
+
ctx.debug(
|
|
101
|
+
"Skipping URI validation (handled by engine)",
|
|
102
|
+
base_path=self.base_path_str,
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
# Create base directory if it doesn't exist
|
|
106
|
+
try:
|
|
107
|
+
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
108
|
+
ctx.info(
|
|
109
|
+
"LocalConnection validated successfully",
|
|
110
|
+
base_path=str(self.base_path.absolute()),
|
|
111
|
+
created=not self.base_path.exists(),
|
|
112
|
+
)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
ctx.error(
|
|
115
|
+
"LocalConnection validation failed",
|
|
116
|
+
base_path=self.base_path_str,
|
|
117
|
+
error=str(e),
|
|
118
|
+
)
|
|
119
|
+
raise
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Local DBFS mock for testing Databricks pipelines locally."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
from .base import BaseConnection
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LocalDBFS(BaseConnection):
|
|
10
|
+
"""Mock DBFS connection for local development.
|
|
11
|
+
|
|
12
|
+
Maps dbfs:/ paths to local filesystem for testing.
|
|
13
|
+
Useful for developing Databricks pipelines locally.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, root: Union[str, Path] = ".dbfs"):
|
|
17
|
+
"""Initialize local DBFS mock.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
root: Local directory to use as DBFS root (default: .dbfs)
|
|
21
|
+
"""
|
|
22
|
+
self.root = Path(root).resolve()
|
|
23
|
+
|
|
24
|
+
def resolve(self, path: str) -> str:
|
|
25
|
+
"""Resolve dbfs:/ path to local filesystem path.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
path: DBFS path (e.g., 'dbfs:/FileStore/data.csv')
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Absolute local filesystem path
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> conn = LocalDBFS(root="/tmp/dbfs")
|
|
35
|
+
>>> conn.resolve("dbfs:/FileStore/data.csv")
|
|
36
|
+
'/tmp/dbfs/FileStore/data.csv'
|
|
37
|
+
"""
|
|
38
|
+
# Remove dbfs:/ prefix
|
|
39
|
+
clean_path = path.replace("dbfs:/", "").lstrip("/")
|
|
40
|
+
|
|
41
|
+
# Join with root
|
|
42
|
+
local_path = self.root / clean_path
|
|
43
|
+
|
|
44
|
+
return str(local_path)
|
|
45
|
+
|
|
46
|
+
def ensure_dir(self, path: str) -> None:
|
|
47
|
+
"""Create parent directories for given path.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
path: DBFS path
|
|
51
|
+
"""
|
|
52
|
+
local_path = Path(self.resolve(path))
|
|
53
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
def get_path(self, relative_path: str) -> str:
|
|
56
|
+
"""Get local filesystem path for DBFS path."""
|
|
57
|
+
return self.resolve(relative_path)
|
|
58
|
+
|
|
59
|
+
def validate(self) -> None:
|
|
60
|
+
"""Validate local DBFS configuration."""
|
|
61
|
+
pass # No validation needed for local mock
|
odibi/constants.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Constants used throughout the Odibi framework."""
|
|
2
|
+
|
|
3
|
+
# Delta table maintenance
|
|
4
|
+
DEFAULT_VACUUM_RETENTION_HOURS = 168 # 7 days
|
|
5
|
+
|
|
6
|
+
# SQL operations
|
|
7
|
+
DEFAULT_SQL_CHUNK_SIZE = 1000
|
|
8
|
+
|
|
9
|
+
# Connection timeouts
|
|
10
|
+
DEFAULT_CONNECTION_TIMEOUT = 30 # seconds
|
|
11
|
+
DEFAULT_KEY_VAULT_TIMEOUT = 30.0 # seconds
|
|
12
|
+
|
|
13
|
+
# Story generation
|
|
14
|
+
DEFAULT_MAX_SAMPLE_ROWS = 10
|
|
15
|
+
|
|
16
|
+
# Delta history
|
|
17
|
+
DEFAULT_HISTORY_LIMIT = 100
|