metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
"""Delta Lake metadata store implemented with delta-rs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterator, Sequence
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from functools import cached_property
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
import deltalake
|
|
12
|
+
import narwhals as nw
|
|
13
|
+
import polars as pl
|
|
14
|
+
from narwhals.typing import Frame
|
|
15
|
+
from pydantic import Field
|
|
16
|
+
from typing_extensions import Self
|
|
17
|
+
|
|
18
|
+
from metaxy._utils import switch_implementation_to_polars
|
|
19
|
+
from metaxy.metadata_store.base import MetadataStore, MetadataStoreConfig
|
|
20
|
+
from metaxy.metadata_store.types import AccessMode
|
|
21
|
+
from metaxy.metadata_store.utils import is_local_path
|
|
22
|
+
from metaxy.models.plan import FeaturePlan
|
|
23
|
+
from metaxy.models.types import CoercibleToFeatureKey, FeatureKey
|
|
24
|
+
from metaxy.versioning.polars import PolarsVersioningEngine
|
|
25
|
+
from metaxy.versioning.types import HashAlgorithm
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DeltaMetadataStoreConfig(MetadataStoreConfig):
|
|
29
|
+
"""Configuration for DeltaMetadataStore.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
```python
|
|
33
|
+
config = DeltaMetadataStoreConfig(
|
|
34
|
+
root_path="s3://my-bucket/metaxy",
|
|
35
|
+
storage_options={"AWS_REGION": "us-west-2"},
|
|
36
|
+
layout="nested",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
store = DeltaMetadataStore.from_config(config)
|
|
40
|
+
```
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
root_path: str | Path = Field(
|
|
44
|
+
description="Base directory or URI where feature tables are stored.",
|
|
45
|
+
)
|
|
46
|
+
storage_options: dict[str, Any] | None = Field(
|
|
47
|
+
default=None,
|
|
48
|
+
description="Storage backend options passed to delta-rs.",
|
|
49
|
+
)
|
|
50
|
+
layout: Literal["flat", "nested"] = Field(
|
|
51
|
+
default="nested",
|
|
52
|
+
description="Directory layout for feature tables ('nested' or 'flat').",
|
|
53
|
+
)
|
|
54
|
+
delta_write_options: dict[str, Any] | None = Field(
|
|
55
|
+
default=None,
|
|
56
|
+
description="Options passed to deltalake.write_deltalake().",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DeltaMetadataStore(MetadataStore):
|
|
61
|
+
"""
|
|
62
|
+
Delta Lake metadata store backed by [delta-rs](https://github.com/delta-io/delta-rs).
|
|
63
|
+
|
|
64
|
+
It stores feature metadata in Delta Lake tables located under ``root_path``.
|
|
65
|
+
It uses the Polars versioning engine for provenance calculations.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
|
|
69
|
+
```py
|
|
70
|
+
from metaxy.metadata_store.delta import DeltaMetadataStore
|
|
71
|
+
|
|
72
|
+
store = DeltaMetadataStore(
|
|
73
|
+
root_path="s3://my-bucket/metaxy",
|
|
74
|
+
storage_options={"AWS_REGION": "us-west-2"},
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
_should_warn_auto_create_tables = False
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
root_path: str | Path,
|
|
84
|
+
*,
|
|
85
|
+
storage_options: dict[str, Any] | None = None,
|
|
86
|
+
fallback_stores: list[MetadataStore] | None = None,
|
|
87
|
+
layout: Literal["flat", "nested"] = "nested",
|
|
88
|
+
delta_write_options: dict[str, Any] | None = None,
|
|
89
|
+
**kwargs: Any,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Initialize Delta Lake metadata store.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
root_path: Base directory or URI where feature tables are stored.
|
|
96
|
+
Supports local paths (`/path/to/dir`), `s3://` URLs, and other object store URIs.
|
|
97
|
+
storage_options: Storage backend options passed to delta-rs.
|
|
98
|
+
Example: `{"AWS_REGION": "us-west-2", "AWS_ACCESS_KEY_ID": "...", ...}`
|
|
99
|
+
See https://delta-io.github.io/delta-rs/ for details on supported options.
|
|
100
|
+
fallback_stores: Ordered list of read-only fallback stores.
|
|
101
|
+
layout: Directory layout for feature tables. Options:
|
|
102
|
+
|
|
103
|
+
- `"nested"`: Feature tables stored in nested directories `{part1}/{part2}.delta`
|
|
104
|
+
|
|
105
|
+
- `"flat"`: Feature tables stored as `{part1}__{part2}.delta`
|
|
106
|
+
|
|
107
|
+
delta_write_options: Additional options passed to deltalake.write_deltalake() - see https://delta-io.github.io/delta-rs/upgrade-guides/guide-1.0.0/#write_deltalake-api.
|
|
108
|
+
Overrides default {"schema_mode": "merge"}. Example: {"max_workers": 4}
|
|
109
|
+
**kwargs: Forwarded to [metaxy.metadata_store.base.MetadataStore][metaxy.metadata_store.base.MetadataStore].
|
|
110
|
+
"""
|
|
111
|
+
self.storage_options = storage_options or {}
|
|
112
|
+
if layout not in ("flat", "nested"):
|
|
113
|
+
raise ValueError(f"Invalid layout: {layout}. Must be 'flat' or 'nested'.")
|
|
114
|
+
self.layout = layout
|
|
115
|
+
self.delta_write_options = delta_write_options or {}
|
|
116
|
+
|
|
117
|
+
root_str = str(root_path)
|
|
118
|
+
self._is_remote = not is_local_path(root_str)
|
|
119
|
+
|
|
120
|
+
if self._is_remote:
|
|
121
|
+
# Remote path (S3, Azure, GCS, etc.)
|
|
122
|
+
self._root_uri = root_str.rstrip("/")
|
|
123
|
+
else:
|
|
124
|
+
# Local path (including file:// and local:// URLs)
|
|
125
|
+
if root_str.startswith("file://"):
|
|
126
|
+
# Strip file:// prefix
|
|
127
|
+
root_str = root_str[7:]
|
|
128
|
+
elif root_str.startswith("local://"):
|
|
129
|
+
# Strip local:// prefix
|
|
130
|
+
root_str = root_str[8:]
|
|
131
|
+
local_path = Path(root_str).expanduser().resolve()
|
|
132
|
+
self._root_uri = str(local_path)
|
|
133
|
+
|
|
134
|
+
super().__init__(
|
|
135
|
+
fallback_stores=fallback_stores,
|
|
136
|
+
versioning_engine_cls=PolarsVersioningEngine,
|
|
137
|
+
versioning_engine="polars",
|
|
138
|
+
**kwargs,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# ===== MetadataStore abstract methods =====
|
|
142
|
+
|
|
143
|
+
def _has_feature_impl(self, feature: CoercibleToFeatureKey) -> bool:
|
|
144
|
+
"""Check if feature exists in Delta store.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
feature: Feature to check
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
True if feature exists, False otherwise
|
|
151
|
+
"""
|
|
152
|
+
feature_key = self._resolve_feature_key(feature)
|
|
153
|
+
return self._table_exists(self._feature_uri(feature_key))
|
|
154
|
+
|
|
155
|
+
def _get_default_hash_algorithm(self) -> HashAlgorithm:
|
|
156
|
+
"""Use XXHASH64 by default to match other non-SQL stores."""
|
|
157
|
+
return HashAlgorithm.XXHASH64
|
|
158
|
+
|
|
159
|
+
@contextmanager
|
|
160
|
+
def _create_versioning_engine(
|
|
161
|
+
self, plan: FeaturePlan
|
|
162
|
+
) -> Iterator[PolarsVersioningEngine]:
|
|
163
|
+
"""Create Polars versioning engine for Delta store."""
|
|
164
|
+
with self._create_polars_versioning_engine(plan) as engine:
|
|
165
|
+
yield engine
|
|
166
|
+
|
|
167
|
+
@contextmanager
|
|
168
|
+
def open(self, mode: AccessMode = "read") -> Iterator[Self]: # noqa: ARG002
|
|
169
|
+
"""Open the Delta Lake store.
|
|
170
|
+
|
|
171
|
+
Delta-rs opens connections lazily per operation, so no connection state management needed.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
mode: Access mode for this connection session (accepted for consistency but not used).
|
|
175
|
+
|
|
176
|
+
Yields:
|
|
177
|
+
Self: The store instance with connection open
|
|
178
|
+
"""
|
|
179
|
+
# Increment context depth to support nested contexts
|
|
180
|
+
self._context_depth += 1
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Only perform actual open on first entry
|
|
184
|
+
if self._context_depth == 1:
|
|
185
|
+
# Mark store as open and validate
|
|
186
|
+
# Note: Delta auto-creates tables on first write, no need to pre-create them
|
|
187
|
+
self._is_open = True
|
|
188
|
+
self._validate_after_open()
|
|
189
|
+
|
|
190
|
+
yield self
|
|
191
|
+
finally:
|
|
192
|
+
# Decrement context depth
|
|
193
|
+
self._context_depth -= 1
|
|
194
|
+
|
|
195
|
+
# Only perform actual close on last exit
|
|
196
|
+
if self._context_depth == 0:
|
|
197
|
+
self._is_open = False
|
|
198
|
+
|
|
199
|
+
@cached_property
|
|
200
|
+
def default_delta_write_options(self) -> dict[str, Any]:
|
|
201
|
+
"""Default write options for Delta Lake operations.
|
|
202
|
+
|
|
203
|
+
Merges base defaults with user-provided delta_write_options.
|
|
204
|
+
Base defaults: mode="append", schema_mode="merge", storage_options.
|
|
205
|
+
"""
|
|
206
|
+
write_kwargs: dict[str, Any] = {
|
|
207
|
+
"mode": "append",
|
|
208
|
+
"schema_mode": "merge", # Allow schema evolution
|
|
209
|
+
"storage_options": self.storage_options or None,
|
|
210
|
+
}
|
|
211
|
+
# Override with custom options from constructor
|
|
212
|
+
write_kwargs.update(self.delta_write_options)
|
|
213
|
+
return write_kwargs
|
|
214
|
+
|
|
215
|
+
# ===== Internal helpers =====
|
|
216
|
+
|
|
217
|
+
def _feature_uri(self, feature_key: FeatureKey) -> str:
|
|
218
|
+
"""Return the URI/path used by deltalake for this feature."""
|
|
219
|
+
if self.layout == "nested":
|
|
220
|
+
# Nested layout: store in directories like "part1/part2/part3"
|
|
221
|
+
# Filter out empty parts to avoid creating absolute paths that would
|
|
222
|
+
# cause os.path.join to discard the root_uri
|
|
223
|
+
table_path = "/".join(part for part in feature_key.parts if part)
|
|
224
|
+
else:
|
|
225
|
+
# Flat layout: store in directories like "part1__part2__part3"
|
|
226
|
+
# table_name already handles this correctly via __join
|
|
227
|
+
table_path = feature_key.table_name
|
|
228
|
+
return f"{self._root_uri}/{table_path}.delta"
|
|
229
|
+
|
|
230
|
+
def _table_exists(self, table_uri: str) -> bool:
|
|
231
|
+
"""Check whether the provided URI already contains a Delta table.
|
|
232
|
+
|
|
233
|
+
Works for both local and remote (object store) paths.
|
|
234
|
+
"""
|
|
235
|
+
# for weird reasons deltalake.DeltaTable.is_deltatable() sometimes hangs in multi-threading settings
|
|
236
|
+
# but a deltalake.DeltaTable can be constructed just fine
|
|
237
|
+
# so we are relying on DeltaTableNotFoundError to check for existence
|
|
238
|
+
from deltalake.exceptions import TableNotFoundError as DeltaTableNotFoundError
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
_ = deltalake.DeltaTable(
|
|
242
|
+
table_uri, storage_options=self.storage_options, without_files=True
|
|
243
|
+
)
|
|
244
|
+
except DeltaTableNotFoundError:
|
|
245
|
+
return False
|
|
246
|
+
return True
|
|
247
|
+
|
|
248
|
+
# ===== Storage operations =====
|
|
249
|
+
|
|
250
|
+
def write_metadata_to_store(
|
|
251
|
+
self,
|
|
252
|
+
feature_key: FeatureKey,
|
|
253
|
+
df: Frame,
|
|
254
|
+
**kwargs: Any,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Append metadata to the Delta table for a feature.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
feature_key: Feature key to write to
|
|
260
|
+
df: DataFrame with metadata (already validated)
|
|
261
|
+
**kwargs: Backend-specific parameters (currently unused)
|
|
262
|
+
"""
|
|
263
|
+
table_uri = self._feature_uri(feature_key)
|
|
264
|
+
|
|
265
|
+
# Delta Lake auto-creates tables on first write, no need to check existence
|
|
266
|
+
# Convert to Polars and collect lazy frames
|
|
267
|
+
df_polars = switch_implementation_to_polars(df)
|
|
268
|
+
|
|
269
|
+
# Collect lazy frames, keep eager frames as-is
|
|
270
|
+
if isinstance(df_polars, nw.LazyFrame):
|
|
271
|
+
df_native = df_polars.collect().to_native()
|
|
272
|
+
else:
|
|
273
|
+
df_native = df_polars.to_native()
|
|
274
|
+
|
|
275
|
+
assert isinstance(df_native, pl.DataFrame)
|
|
276
|
+
|
|
277
|
+
# Cast Enum columns to String to avoid delta-rs Utf8View incompatibility
|
|
278
|
+
# (delta-rs parquet writer cannot handle Utf8View dictionary values)
|
|
279
|
+
df_native = df_native.with_columns(pl.selectors.by_dtype(pl.Enum).cast(pl.Utf8))
|
|
280
|
+
|
|
281
|
+
# Prepare write parameters for Polars write_delta
|
|
282
|
+
# Extract mode and storage_options as top-level parameters
|
|
283
|
+
write_opts = self.default_delta_write_options.copy()
|
|
284
|
+
mode = write_opts.pop("mode", "append")
|
|
285
|
+
storage_options = write_opts.pop("storage_options", None)
|
|
286
|
+
|
|
287
|
+
# Write using Polars DataFrame.write_delta
|
|
288
|
+
df_native.write_delta(
|
|
289
|
+
table_uri,
|
|
290
|
+
mode=mode,
|
|
291
|
+
storage_options=storage_options,
|
|
292
|
+
delta_write_options=write_opts or None,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def _drop_feature_metadata_impl(self, feature_key: FeatureKey) -> None:
|
|
296
|
+
"""Drop Delta table for the specified feature using soft delete.
|
|
297
|
+
|
|
298
|
+
Uses Delta's delete operation which marks rows as deleted in the transaction log
|
|
299
|
+
rather than physically removing files.
|
|
300
|
+
"""
|
|
301
|
+
table_uri = self._feature_uri(feature_key)
|
|
302
|
+
|
|
303
|
+
# Check if table exists first
|
|
304
|
+
if not self._table_exists(table_uri):
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
# Load the Delta table
|
|
308
|
+
delta_table = deltalake.DeltaTable(
|
|
309
|
+
table_uri,
|
|
310
|
+
storage_options=self.storage_options or None,
|
|
311
|
+
without_files=True, # Don't track files for this operation
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Use Delta's delete operation - soft delete all rows
|
|
315
|
+
# This marks rows as deleted in transaction log without physically removing files
|
|
316
|
+
delta_table.delete()
|
|
317
|
+
|
|
318
|
+
def read_metadata_in_store(
|
|
319
|
+
self,
|
|
320
|
+
feature: CoercibleToFeatureKey,
|
|
321
|
+
*,
|
|
322
|
+
filters: Sequence[nw.Expr] | None = None,
|
|
323
|
+
columns: Sequence[str] | None = None,
|
|
324
|
+
**kwargs: Any,
|
|
325
|
+
) -> nw.LazyFrame[Any] | None:
|
|
326
|
+
"""Read metadata stored in Delta for a single feature using lazy evaluation.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
feature: Feature to read metadata for
|
|
330
|
+
filters: List of Narwhals filter expressions
|
|
331
|
+
columns: Subset of columns to return
|
|
332
|
+
**kwargs: Backend-specific parameters (currently unused)
|
|
333
|
+
"""
|
|
334
|
+
self._check_open()
|
|
335
|
+
|
|
336
|
+
feature_key = self._resolve_feature_key(feature)
|
|
337
|
+
table_uri = self._feature_uri(feature_key)
|
|
338
|
+
if not self._table_exists(table_uri):
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
# Use scan_delta for lazy evaluation
|
|
342
|
+
lf = pl.scan_delta(
|
|
343
|
+
table_uri,
|
|
344
|
+
storage_options=self.storage_options or None,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Convert to Narwhals
|
|
348
|
+
nw_lazy = nw.from_native(lf)
|
|
349
|
+
|
|
350
|
+
# Apply filters (unpack list, skip if empty)
|
|
351
|
+
if filters:
|
|
352
|
+
nw_lazy = nw_lazy.filter(*filters)
|
|
353
|
+
|
|
354
|
+
# Apply column selection
|
|
355
|
+
if columns is not None:
|
|
356
|
+
nw_lazy = nw_lazy.select(columns)
|
|
357
|
+
|
|
358
|
+
return nw_lazy
|
|
359
|
+
|
|
360
|
+
def display(self) -> str:
|
|
361
|
+
"""Return human-readable representation of the store."""
|
|
362
|
+
details = [f"path={self._root_uri}"]
|
|
363
|
+
details.append(f"layout={self.layout}")
|
|
364
|
+
return f"DeltaMetadataStore({', '.join(details)})"
|
|
365
|
+
|
|
366
|
+
def get_store_metadata(self, feature_key: CoercibleToFeatureKey) -> dict[str, Any]:
|
|
367
|
+
return {"uri": self._feature_uri(self._resolve_feature_key(feature_key))}
|
|
368
|
+
|
|
369
|
+
@classmethod
|
|
370
|
+
def config_model(cls) -> type[DeltaMetadataStoreConfig]: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
371
|
+
return DeltaMetadataStoreConfig
|