metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
"""Shared DuckLake configuration helpers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
from typing import Any, Protocol, runtime_checkable
|
|
6
|
+
|
|
7
|
+
from duckdb import DuckDBPyConnection # noqa: TID252
|
|
8
|
+
from pydantic import (
|
|
9
|
+
BaseModel,
|
|
10
|
+
ConfigDict,
|
|
11
|
+
Field,
|
|
12
|
+
ValidationInfo,
|
|
13
|
+
computed_field,
|
|
14
|
+
field_validator,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@runtime_checkable
|
|
19
|
+
class SupportsDuckLakeParts(Protocol):
|
|
20
|
+
"""Protocol for objects that can produce DuckLake attachment SQL fragments."""
|
|
21
|
+
|
|
22
|
+
def get_ducklake_sql_parts(self, alias: str) -> tuple[str, str]: ...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@runtime_checkable
|
|
26
|
+
class SupportsModelDump(Protocol):
|
|
27
|
+
"""Protocol for Pydantic-like objects that expose a model_dump method."""
|
|
28
|
+
|
|
29
|
+
def model_dump(self) -> Mapping[str, Any]: ...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
DuckLakeBackendInput = Mapping[str, Any] | SupportsDuckLakeParts | SupportsModelDump
|
|
33
|
+
DuckLakeBackend = SupportsDuckLakeParts | dict[str, Any]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def coerce_backend_config(
|
|
37
|
+
backend: DuckLakeBackendInput, *, role: str
|
|
38
|
+
) -> DuckLakeBackend:
|
|
39
|
+
"""Normalize metadata/storage backend configuration."""
|
|
40
|
+
if isinstance(backend, SupportsDuckLakeParts):
|
|
41
|
+
return backend
|
|
42
|
+
if isinstance(backend, SupportsModelDump):
|
|
43
|
+
return dict(backend.model_dump())
|
|
44
|
+
if isinstance(backend, Mapping):
|
|
45
|
+
return dict(backend)
|
|
46
|
+
raise TypeError(
|
|
47
|
+
f"DuckLake {role} must be a mapping or expose get_ducklake_sql_parts()/model_dump(), "
|
|
48
|
+
f"got {type(backend)!r}."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def resolve_metadata_backend(backend: DuckLakeBackend, alias: str) -> tuple[str, str]:
|
|
53
|
+
"""Generate DuckLake metadata backend SQL fragments."""
|
|
54
|
+
if isinstance(backend, SupportsDuckLakeParts):
|
|
55
|
+
return backend.get_ducklake_sql_parts(alias)
|
|
56
|
+
return _metadata_sql_from_mapping(backend, alias)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def resolve_storage_backend(backend: DuckLakeBackend, alias: str) -> tuple[str, str]:
|
|
60
|
+
"""Generate DuckLake storage backend SQL fragments."""
|
|
61
|
+
if isinstance(backend, SupportsDuckLakeParts):
|
|
62
|
+
return backend.get_ducklake_sql_parts(alias)
|
|
63
|
+
return _storage_sql_from_mapping(backend, alias)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _metadata_sql_from_mapping(
|
|
67
|
+
config: Mapping[str, Any], alias: str
|
|
68
|
+
) -> tuple[str, str]:
|
|
69
|
+
backend_type = str(config.get("type", "")).lower()
|
|
70
|
+
if backend_type == "postgres":
|
|
71
|
+
return _metadata_postgres_sql(config, alias)
|
|
72
|
+
if backend_type in {"sqlite", "duckdb"}:
|
|
73
|
+
path = config.get("path")
|
|
74
|
+
if not path:
|
|
75
|
+
raise ValueError(
|
|
76
|
+
"DuckLake metadata backend of type "
|
|
77
|
+
f"'{backend_type}' requires a 'path' entry."
|
|
78
|
+
)
|
|
79
|
+
literal_path = _stringify_scalar(path)
|
|
80
|
+
return "", f"METADATA_PATH {literal_path}"
|
|
81
|
+
raise ValueError(f"Unsupported DuckLake metadata backend type: {backend_type!r}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _metadata_postgres_sql(config: Mapping[str, Any], alias: str) -> tuple[str, str]:
|
|
85
|
+
database = config.get("database")
|
|
86
|
+
user = config.get("user")
|
|
87
|
+
password = config.get("password")
|
|
88
|
+
if database is None or user is None or password is None:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
"DuckLake postgres metadata backend requires 'database', 'user', and 'password'."
|
|
91
|
+
)
|
|
92
|
+
host = config.get("host") or os.getenv("DUCKLAKE_PG_HOST", "localhost")
|
|
93
|
+
port_value = config.get("port", 5432)
|
|
94
|
+
try:
|
|
95
|
+
port = int(port_value)
|
|
96
|
+
except (TypeError, ValueError) as exc:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
"DuckLake postgres metadata backend requires 'port' to be an integer."
|
|
99
|
+
) from exc
|
|
100
|
+
secret_params: dict[str, Any] = {
|
|
101
|
+
"HOST": host,
|
|
102
|
+
"PORT": port,
|
|
103
|
+
"DATABASE": database,
|
|
104
|
+
"USER": user,
|
|
105
|
+
"PASSWORD": password,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
extra_params = config.get("secret_parameters")
|
|
109
|
+
if isinstance(extra_params, Mapping):
|
|
110
|
+
for key, value in extra_params.items():
|
|
111
|
+
secret_params[str(key).upper()] = value
|
|
112
|
+
|
|
113
|
+
secret_name = f"secret_catalog_{alias}"
|
|
114
|
+
secret_sql = build_secret_sql(secret_name, "postgres", secret_params)
|
|
115
|
+
metadata_params = (
|
|
116
|
+
"METADATA_PATH '', "
|
|
117
|
+
f"METADATA_PARAMETERS MAP {{'TYPE': 'postgres', 'SECRET': '{secret_name}'}}"
|
|
118
|
+
)
|
|
119
|
+
return secret_sql, metadata_params
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _storage_sql_from_mapping(config: Mapping[str, Any], alias: str) -> tuple[str, str]:
|
|
123
|
+
storage_type = str(config.get("type", "")).lower()
|
|
124
|
+
if storage_type == "s3":
|
|
125
|
+
return _storage_s3_sql(config, alias)
|
|
126
|
+
if storage_type == "local":
|
|
127
|
+
path = config.get("path")
|
|
128
|
+
if not path:
|
|
129
|
+
raise ValueError("DuckLake local storage backend requires 'path'.")
|
|
130
|
+
literal_path = _stringify_scalar(path)
|
|
131
|
+
return "", f"DATA_PATH {literal_path}"
|
|
132
|
+
raise ValueError(f"Unsupported DuckLake storage backend type: {storage_type!r}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _storage_s3_sql(config: Mapping[str, Any], alias: str) -> tuple[str, str]:
|
|
136
|
+
secret_name = f"secret_storage_{alias}"
|
|
137
|
+
secret_config = config.get("secret")
|
|
138
|
+
secret_params: dict[str, Any]
|
|
139
|
+
if isinstance(secret_config, Mapping):
|
|
140
|
+
secret_params = {str(k): v for k, v in secret_config.items()}
|
|
141
|
+
else: # Backward-compatible typed configuration
|
|
142
|
+
required_keys = [
|
|
143
|
+
"aws_access_key_id",
|
|
144
|
+
"aws_secret_access_key",
|
|
145
|
+
"endpoint_url",
|
|
146
|
+
"bucket",
|
|
147
|
+
]
|
|
148
|
+
missing = [key for key in required_keys if key not in config]
|
|
149
|
+
if missing:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
"DuckLake S3 storage backend expects either a 'secret' mapping "
|
|
152
|
+
"or the legacy keys: "
|
|
153
|
+
+ ", ".join(required_keys)
|
|
154
|
+
+ f". Missing: {missing}"
|
|
155
|
+
)
|
|
156
|
+
secret_params = {
|
|
157
|
+
"KEY_ID": config["aws_access_key_id"],
|
|
158
|
+
"SECRET": config["aws_secret_access_key"],
|
|
159
|
+
"ENDPOINT": config["endpoint_url"],
|
|
160
|
+
"URL_STYLE": config.get("url_style", "path"),
|
|
161
|
+
"REGION": config.get("region", "us-east-1"),
|
|
162
|
+
"USE_SSL": config.get("use_ssl", True),
|
|
163
|
+
"SCOPE": config.get("scope") or f"s3://{config['bucket']}",
|
|
164
|
+
}
|
|
165
|
+
secret_sql = build_secret_sql(secret_name, "S3", secret_params)
|
|
166
|
+
|
|
167
|
+
data_path = config.get("data_path")
|
|
168
|
+
if not data_path:
|
|
169
|
+
bucket = config.get("bucket")
|
|
170
|
+
prefix = config.get("prefix")
|
|
171
|
+
if bucket:
|
|
172
|
+
clean_prefix = str(prefix or "").strip("/")
|
|
173
|
+
base_path = f"s3://{bucket}"
|
|
174
|
+
data_path = (
|
|
175
|
+
f"{base_path}/{clean_prefix}/" if clean_prefix else f"{base_path}/"
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
scope = secret_params.get("SCOPE")
|
|
179
|
+
if isinstance(scope, str) and scope.startswith("s3://"):
|
|
180
|
+
data_path = scope if scope.endswith("/") else f"{scope}/"
|
|
181
|
+
if not data_path:
|
|
182
|
+
raise ValueError(
|
|
183
|
+
"DuckLake S3 storage backend requires either 'data_path', a 'bucket', "
|
|
184
|
+
"or a secret SCOPE starting with 's3://'."
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
data_path_sql = f"DATA_PATH {_stringify_scalar(data_path)}"
|
|
188
|
+
return secret_sql, data_path_sql
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def build_secret_sql(
|
|
192
|
+
secret_name: str, secret_type: str, parameters: Mapping[str, Any]
|
|
193
|
+
) -> str:
|
|
194
|
+
"""Construct DuckDB secret creation SQL."""
|
|
195
|
+
formatted_params = _format_secret_parameters(parameters)
|
|
196
|
+
extra_clause = f", {', '.join(formatted_params)}" if formatted_params else ""
|
|
197
|
+
return (
|
|
198
|
+
f"CREATE OR REPLACE SECRET {secret_name} ( TYPE {secret_type}{extra_clause} );"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _format_secret_parameters(parameters: Mapping[str, Any]) -> list[str]:
|
|
203
|
+
parts: list[str] = []
|
|
204
|
+
for key, value in sorted(parameters.items()):
|
|
205
|
+
formatted = _stringify_scalar(value)
|
|
206
|
+
if formatted is None:
|
|
207
|
+
continue
|
|
208
|
+
parts.append(f"{key} {formatted}")
|
|
209
|
+
return parts
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _stringify_scalar(value: Any) -> str | None:
|
|
213
|
+
if value is None:
|
|
214
|
+
return None
|
|
215
|
+
if isinstance(value, bool):
|
|
216
|
+
return "true" if value else "false"
|
|
217
|
+
if isinstance(value, (int, float)):
|
|
218
|
+
return str(value)
|
|
219
|
+
escaped = str(value).replace("'", "''")
|
|
220
|
+
return f"'{escaped}'"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def format_attach_options(options: Mapping[str, Any] | None) -> str:
|
|
224
|
+
"""Format ATTACH options clause."""
|
|
225
|
+
if not options:
|
|
226
|
+
return ""
|
|
227
|
+
|
|
228
|
+
parts: list[str] = []
|
|
229
|
+
for key, value in sorted(options.items()):
|
|
230
|
+
formatted = _stringify_scalar(value)
|
|
231
|
+
if formatted is None:
|
|
232
|
+
continue
|
|
233
|
+
parts.append(f"{str(key).upper()} {formatted}")
|
|
234
|
+
|
|
235
|
+
return f" ({', '.join(parts)})" if parts else ""
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class DuckLakeAttachmentConfig(BaseModel):
|
|
239
|
+
"""Configuration payload used to attach DuckLake to a DuckDB connection."""
|
|
240
|
+
|
|
241
|
+
metadata_backend: DuckLakeBackend
|
|
242
|
+
storage_backend: DuckLakeBackend
|
|
243
|
+
alias: str = "ducklake"
|
|
244
|
+
plugins: tuple[str, ...] = Field(default_factory=lambda: ("ducklake",))
|
|
245
|
+
attach_options: dict[str, Any] = Field(default_factory=dict)
|
|
246
|
+
|
|
247
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
|
248
|
+
|
|
249
|
+
@field_validator("metadata_backend", "storage_backend", mode="before")
|
|
250
|
+
@classmethod
|
|
251
|
+
def _coerce_backends(
|
|
252
|
+
cls, value: DuckLakeBackendInput, info: ValidationInfo
|
|
253
|
+
) -> DuckLakeBackend:
|
|
254
|
+
field_name = info.field_name or "backend"
|
|
255
|
+
return coerce_backend_config(value, role=field_name.replace("_", " "))
|
|
256
|
+
|
|
257
|
+
@field_validator("alias", mode="before")
|
|
258
|
+
@classmethod
|
|
259
|
+
def _coerce_alias(cls, value: Any) -> str:
|
|
260
|
+
if value is None:
|
|
261
|
+
return "ducklake"
|
|
262
|
+
alias = str(value).strip()
|
|
263
|
+
return alias or "ducklake"
|
|
264
|
+
|
|
265
|
+
@field_validator("plugins", mode="before")
|
|
266
|
+
@classmethod
|
|
267
|
+
def _coerce_plugins(cls, value: Any) -> tuple[str, ...]:
|
|
268
|
+
if value is None:
|
|
269
|
+
return ("ducklake",)
|
|
270
|
+
if isinstance(value, str):
|
|
271
|
+
return (value,)
|
|
272
|
+
if isinstance(value, Sequence):
|
|
273
|
+
try:
|
|
274
|
+
return tuple(str(item) for item in value)
|
|
275
|
+
except TypeError as exc: # pragma: no cover - defensive guard
|
|
276
|
+
raise TypeError(
|
|
277
|
+
"DuckLake plugins must be a string or sequence of strings."
|
|
278
|
+
) from exc
|
|
279
|
+
raise TypeError("DuckLake plugins must be a string or sequence of strings.")
|
|
280
|
+
|
|
281
|
+
@field_validator("attach_options", mode="before")
|
|
282
|
+
@classmethod
|
|
283
|
+
def _coerce_attach_options(cls, value: Any) -> dict[str, Any]:
|
|
284
|
+
if value is None:
|
|
285
|
+
return {}
|
|
286
|
+
if isinstance(value, Mapping):
|
|
287
|
+
return dict(value)
|
|
288
|
+
raise TypeError("DuckLake attach_options must be a mapping if provided.")
|
|
289
|
+
|
|
290
|
+
@computed_field(return_type=tuple[str, str])
|
|
291
|
+
def metadata_sql_parts(self) -> tuple[str, str]:
|
|
292
|
+
"""Pre-computed metadata SQL components for DuckLake attachments."""
|
|
293
|
+
return resolve_metadata_backend(self.metadata_backend, self.alias)
|
|
294
|
+
|
|
295
|
+
@computed_field(return_type=tuple[str, str])
|
|
296
|
+
def storage_sql_parts(self) -> tuple[str, str]:
|
|
297
|
+
"""Pre-computed storage SQL components for DuckLake attachments."""
|
|
298
|
+
return resolve_storage_backend(self.storage_backend, self.alias)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class _PreviewCursor:
|
|
302
|
+
"""Collect commands for previewing DuckLake attachment SQL."""
|
|
303
|
+
|
|
304
|
+
def __init__(self) -> None:
|
|
305
|
+
self.commands: list[str] = []
|
|
306
|
+
|
|
307
|
+
def execute(self, command: str) -> None:
|
|
308
|
+
self.commands.append(command.strip())
|
|
309
|
+
|
|
310
|
+
def close(self) -> None: # pragma: no cover - no-op in preview mode
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class _PreviewConnection:
|
|
315
|
+
"""Mock DuckDB connection used for previewing generated SQL."""
|
|
316
|
+
|
|
317
|
+
def __init__(self) -> None:
|
|
318
|
+
self._cursor = _PreviewCursor()
|
|
319
|
+
|
|
320
|
+
def cursor(self) -> _PreviewCursor:
|
|
321
|
+
return self._cursor
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class DuckLakeAttachmentManager:
|
|
325
|
+
"""Responsible for configuring a DuckDB connection for DuckLake usage."""
|
|
326
|
+
|
|
327
|
+
def __init__(self, config: DuckLakeAttachmentConfig):
|
|
328
|
+
self._config = config
|
|
329
|
+
|
|
330
|
+
def configure(self, conn: DuckDBPyConnection | _PreviewConnection) -> None:
|
|
331
|
+
cursor = conn.cursor()
|
|
332
|
+
try:
|
|
333
|
+
for plugin in self._config.plugins:
|
|
334
|
+
cursor.execute(f"INSTALL {plugin};")
|
|
335
|
+
cursor.execute(f"LOAD {plugin};")
|
|
336
|
+
|
|
337
|
+
metadata_secret_sql, metadata_params_sql = self._config.metadata_sql_parts # pyright: ignore[reportGeneralTypeIssues]
|
|
338
|
+
storage_secret_sql, storage_params_sql = self._config.storage_sql_parts # pyright: ignore[reportGeneralTypeIssues]
|
|
339
|
+
|
|
340
|
+
if metadata_secret_sql:
|
|
341
|
+
cursor.execute(metadata_secret_sql)
|
|
342
|
+
if storage_secret_sql:
|
|
343
|
+
cursor.execute(storage_secret_sql)
|
|
344
|
+
|
|
345
|
+
ducklake_secret = f"secret_{self._config.alias}"
|
|
346
|
+
cursor.execute(
|
|
347
|
+
f"CREATE OR REPLACE SECRET {ducklake_secret} ("
|
|
348
|
+
" TYPE DUCKLAKE,"
|
|
349
|
+
f" {metadata_params_sql},"
|
|
350
|
+
f" {storage_params_sql}"
|
|
351
|
+
" );"
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
options_clause = format_attach_options(self._config.attach_options)
|
|
355
|
+
cursor.execute(
|
|
356
|
+
f"ATTACH 'ducklake:{ducklake_secret}' AS {self._config.alias}{options_clause};"
|
|
357
|
+
)
|
|
358
|
+
cursor.execute(f"USE {self._config.alias};")
|
|
359
|
+
finally:
|
|
360
|
+
cursor.close()
|
|
361
|
+
|
|
362
|
+
def preview_sql(self) -> list[str]:
|
|
363
|
+
"""Return the SQL statements that would be executed during configure()."""
|
|
364
|
+
preview_conn = _PreviewConnection()
|
|
365
|
+
self.configure(preview_conn)
|
|
366
|
+
return preview_conn.cursor().commands
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
DuckLakeConfigInput = DuckLakeAttachmentConfig | Mapping[str, Any]
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def build_ducklake_attachment(
|
|
373
|
+
config: DuckLakeConfigInput,
|
|
374
|
+
) -> tuple[DuckLakeAttachmentConfig, DuckLakeAttachmentManager]:
|
|
375
|
+
"""Normalise ducklake configuration and create attachment manager."""
|
|
376
|
+
if isinstance(config, DuckLakeAttachmentConfig):
|
|
377
|
+
attachment_config = config
|
|
378
|
+
elif isinstance(config, Mapping):
|
|
379
|
+
attachment_config = DuckLakeAttachmentConfig.model_validate(config)
|
|
380
|
+
else: # pragma: no cover - defensive programming
|
|
381
|
+
raise TypeError(
|
|
382
|
+
"DuckLake configuration must be a DuckLakeAttachmentConfig or mapping."
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
manager = DuckLakeAttachmentManager(attachment_config)
|
|
386
|
+
return attachment_config, manager
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def ensure_extensions_with_plugins(
|
|
390
|
+
extensions: list[
|
|
391
|
+
str | Any
|
|
392
|
+
], # list[str | ExtensionSpec] - ExtensionSpec from duckdb.py
|
|
393
|
+
plugins: Sequence[str],
|
|
394
|
+
) -> None:
|
|
395
|
+
"""Ensure DuckLake plugins are present in the extensions list.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
extensions: List of extension names (str) or ExtensionSpec objects
|
|
399
|
+
plugins: DuckLake plugin names to ensure are in the extensions list
|
|
400
|
+
"""
|
|
401
|
+
existing_names: set[str] = set()
|
|
402
|
+
for ext in extensions:
|
|
403
|
+
if isinstance(ext, str):
|
|
404
|
+
existing_names.add(ext)
|
|
405
|
+
elif isinstance(ext, Mapping):
|
|
406
|
+
name = ext.get("name")
|
|
407
|
+
if not name:
|
|
408
|
+
raise ValueError(
|
|
409
|
+
f"DuckDB extension mapping must have a non-empty 'name' key, got: {ext!r}"
|
|
410
|
+
)
|
|
411
|
+
existing_names.add(str(name))
|
|
412
|
+
else:
|
|
413
|
+
# Must be ExtensionSpec with 'name' attribute
|
|
414
|
+
existing_names.add(ext.name) # pyright: ignore[reportAttributeAccessIssue]
|
|
415
|
+
|
|
416
|
+
for plugin in plugins:
|
|
417
|
+
if plugin not in existing_names:
|
|
418
|
+
extensions.append(plugin)
|
|
419
|
+
existing_names.add(plugin)
|