metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
"""Ibis-based metadata store for SQL databases.
|
|
2
|
+
|
|
3
|
+
Supports any SQL database that Ibis supports:
|
|
4
|
+
- DuckDB, PostgreSQL, MySQL (local/embedded)
|
|
5
|
+
- ClickHouse, Snowflake, BigQuery (cloud analytical)
|
|
6
|
+
- And 20+ other backends
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from collections.abc import Iterator, Sequence
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
import narwhals as nw
|
|
15
|
+
from narwhals.typing import Frame
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
from typing_extensions import Self
|
|
18
|
+
|
|
19
|
+
from metaxy.metadata_store.base import (
|
|
20
|
+
MetadataStore,
|
|
21
|
+
MetadataStoreConfig,
|
|
22
|
+
VersioningEngineOptions,
|
|
23
|
+
)
|
|
24
|
+
from metaxy.metadata_store.exceptions import (
|
|
25
|
+
HashAlgorithmNotSupportedError,
|
|
26
|
+
TableNotFoundError,
|
|
27
|
+
)
|
|
28
|
+
from metaxy.metadata_store.types import AccessMode
|
|
29
|
+
from metaxy.models.plan import FeaturePlan
|
|
30
|
+
from metaxy.models.types import CoercibleToFeatureKey, FeatureKey
|
|
31
|
+
from metaxy.versioning.ibis import IbisVersioningEngine
|
|
32
|
+
from metaxy.versioning.types import HashAlgorithm
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
import ibis
|
|
36
|
+
import ibis.expr.types
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class IbisMetadataStoreConfig(MetadataStoreConfig):
|
|
40
|
+
"""Configuration for IbisMetadataStore.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
```python
|
|
44
|
+
config = IbisMetadataStoreConfig(
|
|
45
|
+
connection_string="postgresql://user:pass@host:5432/db",
|
|
46
|
+
table_prefix="prod_",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Note: IbisMetadataStore is abstract, use a concrete implementation
|
|
50
|
+
```
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
connection_string: str | None = Field(
|
|
54
|
+
default=None,
|
|
55
|
+
description="Ibis connection string (e.g., 'clickhouse://host:9000/db').",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
backend: str | None = Field(
|
|
59
|
+
default=None,
|
|
60
|
+
description="Ibis backend name (e.g., 'clickhouse', 'postgres', 'duckdb').",
|
|
61
|
+
json_schema_extra={"mkdocs_metaxy_hide": True},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
connection_params: dict[str, Any] | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
description="Backend-specific connection parameters.",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
table_prefix: str | None = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Optional prefix for all table names.",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
auto_create_tables: bool | None = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="If True, create tables on open. For development/testing only.",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class IbisMetadataStore(MetadataStore, ABC):
|
|
81
|
+
"""
|
|
82
|
+
Generic SQL metadata store using Ibis.
|
|
83
|
+
|
|
84
|
+
Supports any Ibis backend that supports struct types, such as: DuckDB, PostgreSQL, ClickHouse, and others.
|
|
85
|
+
|
|
86
|
+
Warning:
|
|
87
|
+
Backends without native struct support (e.g., SQLite) are NOT supported.
|
|
88
|
+
|
|
89
|
+
Storage layout:
|
|
90
|
+
- Each feature gets its own table: {feature}__{key}
|
|
91
|
+
- System tables: metaxy__system__feature_versions, metaxy__system__migrations
|
|
92
|
+
- Uses Ibis for cross-database compatibility
|
|
93
|
+
|
|
94
|
+
Note: Uses MD5 hash by default for cross-database compatibility.
|
|
95
|
+
DuckDBMetadataStore overrides this with dynamic algorithm detection.
|
|
96
|
+
For other backends, override the calculator instance variable with backend-specific implementations.
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
```py
|
|
100
|
+
# ClickHouse
|
|
101
|
+
store = IbisMetadataStore("clickhouse://user:pass@host:9000/db")
|
|
102
|
+
|
|
103
|
+
# PostgreSQL
|
|
104
|
+
store = IbisMetadataStore("postgresql://user:pass@host:5432/db")
|
|
105
|
+
|
|
106
|
+
# DuckDB (use DuckDBMetadataStore instead for better hash support)
|
|
107
|
+
store = IbisMetadataStore("duckdb:///metadata.db")
|
|
108
|
+
|
|
109
|
+
with store:
|
|
110
|
+
store.write_metadata(MyFeature, df)
|
|
111
|
+
```
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
versioning_engine: VersioningEngineOptions = "auto",
|
|
117
|
+
connection_string: str | None = None,
|
|
118
|
+
*,
|
|
119
|
+
backend: str | None = None,
|
|
120
|
+
connection_params: dict[str, Any] | None = None,
|
|
121
|
+
table_prefix: str | None = None,
|
|
122
|
+
**kwargs: Any,
|
|
123
|
+
):
|
|
124
|
+
"""
|
|
125
|
+
Initialize Ibis metadata store.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
versioning_engine: Which versioning engine to use.
|
|
129
|
+
- "auto": Prefer the store's native engine, fall back to Polars if needed
|
|
130
|
+
- "native": Always use the store's native engine, raise `VersioningEngineMismatchError`
|
|
131
|
+
if provided dataframes are incompatible
|
|
132
|
+
- "polars": Always use the Polars engine
|
|
133
|
+
connection_string: Ibis connection string (e.g., "clickhouse://host:9000/db")
|
|
134
|
+
If provided, backend and connection_params are ignored.
|
|
135
|
+
backend: Ibis backend name (e.g., "clickhouse", "postgres", "duckdb")
|
|
136
|
+
Used with connection_params for more control.
|
|
137
|
+
connection_params: Backend-specific connection parameters
|
|
138
|
+
e.g., {"host": "localhost", "port": 9000, "database": "default"}
|
|
139
|
+
table_prefix: Optional prefix applied to all feature and system table names.
|
|
140
|
+
Useful for logically separating environments (e.g., "prod_"). Must form a valid SQL
|
|
141
|
+
identifier when combined with the generated table name.
|
|
142
|
+
**kwargs: Passed to MetadataStore.__init__ (e.g., fallback_stores, hash_algorithm)
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
ValueError: If neither connection_string nor backend is provided
|
|
146
|
+
ImportError: If Ibis or required backend driver not installed
|
|
147
|
+
|
|
148
|
+
Example:
|
|
149
|
+
```py
|
|
150
|
+
# Using connection string
|
|
151
|
+
store = IbisMetadataStore("clickhouse://user:pass@host:9000/db")
|
|
152
|
+
|
|
153
|
+
# Using backend + params
|
|
154
|
+
store = IbisMetadataStore(
|
|
155
|
+
backend="clickhouse",
|
|
156
|
+
connection_params={"host": "localhost", "port": 9000}
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
"""
|
|
160
|
+
import ibis
|
|
161
|
+
|
|
162
|
+
self.connection_string = connection_string
|
|
163
|
+
self.backend = backend
|
|
164
|
+
self.connection_params = connection_params or {}
|
|
165
|
+
self._conn: ibis.BaseBackend | None = None
|
|
166
|
+
self._table_prefix = table_prefix or ""
|
|
167
|
+
|
|
168
|
+
super().__init__(
|
|
169
|
+
**kwargs,
|
|
170
|
+
versioning_engine=versioning_engine,
|
|
171
|
+
versioning_engine_cls=IbisVersioningEngine,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def _has_feature_impl(self, feature: CoercibleToFeatureKey) -> bool:
|
|
175
|
+
feature_key = self._resolve_feature_key(feature)
|
|
176
|
+
table_name = self.get_table_name(feature_key)
|
|
177
|
+
return table_name in self.conn.list_tables()
|
|
178
|
+
|
|
179
|
+
def get_table_name(
|
|
180
|
+
self,
|
|
181
|
+
key: FeatureKey,
|
|
182
|
+
) -> str:
|
|
183
|
+
"""Generate the storage table name for a feature or system table.
|
|
184
|
+
|
|
185
|
+
Applies the configured table_prefix (if any) to the feature key's table name.
|
|
186
|
+
Subclasses can override this method to implement custom naming logic.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
key: Feature key to convert to storage table name.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Storage table name with optional prefix applied.
|
|
193
|
+
"""
|
|
194
|
+
base_name = key.table_name
|
|
195
|
+
|
|
196
|
+
return f"{self._table_prefix}{base_name}" if self._table_prefix else base_name
|
|
197
|
+
|
|
198
|
+
def _get_default_hash_algorithm(self) -> HashAlgorithm:
|
|
199
|
+
"""Get default hash algorithm for Ibis stores.
|
|
200
|
+
|
|
201
|
+
Uses MD5 as it's universally supported across SQL databases.
|
|
202
|
+
Subclasses like DuckDBMetadataStore can override for better algorithms.
|
|
203
|
+
"""
|
|
204
|
+
return HashAlgorithm.MD5
|
|
205
|
+
|
|
206
|
+
@contextmanager
|
|
207
|
+
def _create_versioning_engine(
|
|
208
|
+
self, plan: FeaturePlan
|
|
209
|
+
) -> Iterator[IbisVersioningEngine]:
|
|
210
|
+
"""Create provenance engine for Ibis backend as a context manager.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
plan: Feature plan for the feature we're tracking provenance for
|
|
214
|
+
|
|
215
|
+
Yields:
|
|
216
|
+
IbisVersioningEngine with backend-specific hash functions.
|
|
217
|
+
|
|
218
|
+
Note:
|
|
219
|
+
Base implementation only supports MD5 (universally available).
|
|
220
|
+
Subclasses can override _create_hash_functions() for backend-specific hashes.
|
|
221
|
+
"""
|
|
222
|
+
if self._conn is None:
|
|
223
|
+
raise RuntimeError(
|
|
224
|
+
"Cannot create provenance engine: store is not open. "
|
|
225
|
+
"Ensure store is used as context manager."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Create hash functions for Ibis expressions
|
|
229
|
+
hash_functions = self._create_hash_functions()
|
|
230
|
+
|
|
231
|
+
# Create engine (only accepts plan and hash_functions)
|
|
232
|
+
engine = IbisVersioningEngine(
|
|
233
|
+
plan=plan,
|
|
234
|
+
hash_functions=hash_functions,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
yield engine
|
|
239
|
+
finally:
|
|
240
|
+
# No cleanup needed for Ibis engine
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
@abstractmethod
|
|
244
|
+
def _create_hash_functions(self):
|
|
245
|
+
"""Create hash functions for Ibis expressions.
|
|
246
|
+
|
|
247
|
+
Base implementation returns empty dict. Subclasses must override
|
|
248
|
+
to provide backend-specific hash function implementations.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Dictionary mapping HashAlgorithm to Ibis expression functions
|
|
252
|
+
"""
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
def _validate_hash_algorithm_support(self) -> None:
|
|
256
|
+
"""Validate that the configured hash algorithm is supported by Ibis backend.
|
|
257
|
+
|
|
258
|
+
Raises:
|
|
259
|
+
ValueError: If hash algorithm is not supported
|
|
260
|
+
"""
|
|
261
|
+
# Create hash functions to check what's supported
|
|
262
|
+
hash_functions = self._create_hash_functions()
|
|
263
|
+
|
|
264
|
+
if self.hash_algorithm not in hash_functions:
|
|
265
|
+
supported = [algo.value for algo in hash_functions.keys()]
|
|
266
|
+
raise HashAlgorithmNotSupportedError(
|
|
267
|
+
f"Hash algorithm '{self.hash_algorithm.value}' not supported. "
|
|
268
|
+
f"Supported algorithms: {', '.join(supported)}"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def ibis_conn(self) -> "ibis.BaseBackend":
|
|
273
|
+
"""Get Ibis backend connection.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Active Ibis backend connection
|
|
277
|
+
|
|
278
|
+
Raises:
|
|
279
|
+
StoreNotOpenError: If store is not open
|
|
280
|
+
"""
|
|
281
|
+
from metaxy.metadata_store.exceptions import StoreNotOpenError
|
|
282
|
+
|
|
283
|
+
if self._conn is None:
|
|
284
|
+
raise StoreNotOpenError(
|
|
285
|
+
"Ibis connection is not open. Store must be used as a context manager."
|
|
286
|
+
)
|
|
287
|
+
return self._conn
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def conn(self) -> "ibis.BaseBackend":
|
|
291
|
+
"""Get connection (alias for ibis_conn for consistency).
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Active Ibis backend connection
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
StoreNotOpenError: If store is not open
|
|
298
|
+
"""
|
|
299
|
+
return self.ibis_conn
|
|
300
|
+
|
|
301
|
+
@contextmanager
|
|
302
|
+
def open(self, mode: AccessMode = "read") -> Iterator[Self]:
|
|
303
|
+
"""Open connection to database via Ibis.
|
|
304
|
+
|
|
305
|
+
Subclasses should override this to add backend-specific initialization
|
|
306
|
+
(e.g., loading extensions) and must call this method via super().open(mode).
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
mode: Access mode. Subclasses may use this to set backend-specific connection
|
|
310
|
+
parameters (e.g., `read_only` for DuckDB).
|
|
311
|
+
|
|
312
|
+
Yields:
|
|
313
|
+
Self: The store instance with connection open
|
|
314
|
+
"""
|
|
315
|
+
import ibis
|
|
316
|
+
|
|
317
|
+
# Increment context depth to support nested contexts
|
|
318
|
+
self._context_depth += 1
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
# Only perform actual open on first entry
|
|
322
|
+
if self._context_depth == 1:
|
|
323
|
+
# Setup: Connect to database
|
|
324
|
+
if self.connection_string:
|
|
325
|
+
# Use connection string
|
|
326
|
+
self._conn = ibis.connect(self.connection_string)
|
|
327
|
+
else:
|
|
328
|
+
# Use backend + params
|
|
329
|
+
# Get backend-specific connect function
|
|
330
|
+
assert self.backend is not None, (
|
|
331
|
+
"backend must be set if connection_string is None"
|
|
332
|
+
)
|
|
333
|
+
backend_module = getattr(ibis, self.backend)
|
|
334
|
+
self._conn = backend_module.connect(**self.connection_params)
|
|
335
|
+
|
|
336
|
+
# Mark store as open and validate
|
|
337
|
+
self._is_open = True
|
|
338
|
+
self._validate_after_open()
|
|
339
|
+
|
|
340
|
+
yield self
|
|
341
|
+
finally:
|
|
342
|
+
# Decrement context depth
|
|
343
|
+
self._context_depth -= 1
|
|
344
|
+
|
|
345
|
+
# Only perform actual close on last exit
|
|
346
|
+
if self._context_depth == 0:
|
|
347
|
+
# Teardown: Close connection
|
|
348
|
+
if self._conn is not None:
|
|
349
|
+
# Ibis connections may not have explicit close method
|
|
350
|
+
# but setting to None releases resources
|
|
351
|
+
self._conn = None
|
|
352
|
+
self._is_open = False
|
|
353
|
+
|
|
354
|
+
@property
|
|
355
|
+
def sqlalchemy_url(self) -> str:
|
|
356
|
+
"""Get SQLAlchemy-compatible connection URL for tools like Alembic.
|
|
357
|
+
|
|
358
|
+
Returns the connection string if available. If the store was initialized
|
|
359
|
+
with backend + connection_params instead of a connection string, raises
|
|
360
|
+
an error since constructing a proper URL is backend-specific.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
SQLAlchemy-compatible URL string
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
ValueError: If connection_string is not available
|
|
367
|
+
|
|
368
|
+
Example:
|
|
369
|
+
```python
|
|
370
|
+
store = IbisMetadataStore("postgresql://user:pass@host:5432/db")
|
|
371
|
+
print(store.sqlalchemy_url) # postgresql://user:pass@host:5432/db
|
|
372
|
+
```
|
|
373
|
+
"""
|
|
374
|
+
if self.connection_string:
|
|
375
|
+
return self.connection_string
|
|
376
|
+
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"SQLAlchemy URL not available. Store was initialized with backend + connection_params "
|
|
379
|
+
"instead of a connection string. To use Alembic, initialize with a connection string: "
|
|
380
|
+
f"IbisMetadataStore('postgresql://user:pass@host:5432/db') instead of "
|
|
381
|
+
f"IbisMetadataStore(backend='{self.backend}', connection_params={{...}})"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
def write_metadata_to_store(
|
|
385
|
+
self,
|
|
386
|
+
feature_key: FeatureKey,
|
|
387
|
+
df: Frame,
|
|
388
|
+
**kwargs: Any,
|
|
389
|
+
) -> None:
|
|
390
|
+
"""
|
|
391
|
+
Internal write implementation using Ibis.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
feature_key: Feature key to write to
|
|
395
|
+
df: DataFrame with metadata (already validated)
|
|
396
|
+
**kwargs: Backend-specific parameters (currently unused)
|
|
397
|
+
|
|
398
|
+
Raises:
|
|
399
|
+
TableNotFoundError: If table doesn't exist and auto_create_tables is False
|
|
400
|
+
"""
|
|
401
|
+
if df.implementation == nw.Implementation.IBIS:
|
|
402
|
+
df_to_insert = df.to_native() # Ibis expression
|
|
403
|
+
else:
|
|
404
|
+
from metaxy._utils import collect_to_polars
|
|
405
|
+
|
|
406
|
+
df_to_insert = collect_to_polars(df) # Polars DataFrame
|
|
407
|
+
|
|
408
|
+
table_name = self.get_table_name(feature_key)
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
self.conn.insert(table_name, obj=df_to_insert) # type: ignore[attr-defined] # pyright: ignore[reportAttributeAccessIssue]
|
|
412
|
+
except Exception as e:
|
|
413
|
+
import ibis.common.exceptions
|
|
414
|
+
|
|
415
|
+
if not isinstance(e, ibis.common.exceptions.TableNotFound):
|
|
416
|
+
raise
|
|
417
|
+
if self.auto_create_tables:
|
|
418
|
+
# Warn about auto-create (first time only)
|
|
419
|
+
if self._should_warn_auto_create_tables:
|
|
420
|
+
import warnings
|
|
421
|
+
|
|
422
|
+
warnings.warn(
|
|
423
|
+
f"AUTO_CREATE_TABLES is enabled - automatically creating table '{table_name}'. "
|
|
424
|
+
"Do not use in production! "
|
|
425
|
+
"Use proper database migration tools like Alembic for production deployments.",
|
|
426
|
+
UserWarning,
|
|
427
|
+
stacklevel=4,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Note: create_table(table_name, obj=df) both creates the table AND inserts the data
|
|
431
|
+
# No separate insert needed - the data from df is already written
|
|
432
|
+
self.conn.create_table(table_name, obj=df_to_insert)
|
|
433
|
+
else:
|
|
434
|
+
raise TableNotFoundError(
|
|
435
|
+
f"Table '{table_name}' does not exist for feature {feature_key.to_string()}. "
|
|
436
|
+
f"Enable auto_create_tables=True to automatically create tables, "
|
|
437
|
+
f"or use proper database migration tools like Alembic to create the table first."
|
|
438
|
+
) from e
|
|
439
|
+
|
|
440
|
+
def _drop_feature_metadata_impl(self, feature_key: FeatureKey) -> None:
|
|
441
|
+
"""Drop the table for a feature.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
feature_key: Feature key to drop metadata for
|
|
445
|
+
"""
|
|
446
|
+
table_name = self.get_table_name(feature_key)
|
|
447
|
+
|
|
448
|
+
# Check if table exists
|
|
449
|
+
if table_name in self.conn.list_tables():
|
|
450
|
+
self.conn.drop_table(table_name)
|
|
451
|
+
|
|
452
|
+
def read_metadata_in_store(
|
|
453
|
+
self,
|
|
454
|
+
feature: CoercibleToFeatureKey,
|
|
455
|
+
*,
|
|
456
|
+
feature_version: str | None = None,
|
|
457
|
+
filters: Sequence[nw.Expr] | None = None,
|
|
458
|
+
columns: Sequence[str] | None = None,
|
|
459
|
+
**kwargs: Any,
|
|
460
|
+
) -> nw.LazyFrame[Any] | None:
|
|
461
|
+
"""
|
|
462
|
+
Read metadata from this store only (no fallback).
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
feature: Feature to read
|
|
466
|
+
feature_version: Filter by specific feature_version (applied as SQL WHERE clause)
|
|
467
|
+
filters: List of Narwhals filter expressions (converted to SQL WHERE clauses)
|
|
468
|
+
columns: Optional list of columns to select
|
|
469
|
+
**kwargs: Backend-specific parameters (currently unused)
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
Narwhals LazyFrame with metadata, or None if not found
|
|
473
|
+
"""
|
|
474
|
+
feature_key = self._resolve_feature_key(feature)
|
|
475
|
+
table_name = self.get_table_name(feature_key)
|
|
476
|
+
|
|
477
|
+
# Check if table exists
|
|
478
|
+
existing_tables = self.conn.list_tables()
|
|
479
|
+
if table_name not in existing_tables:
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
# Get Ibis table reference
|
|
483
|
+
table = self.conn.table(table_name)
|
|
484
|
+
|
|
485
|
+
# Wrap Ibis table with Narwhals (stays lazy in SQL)
|
|
486
|
+
nw_lazy: nw.LazyFrame[Any] = nw.from_native(table, eager_only=False)
|
|
487
|
+
|
|
488
|
+
# Apply feature_version filter (stays in SQL via Narwhals)
|
|
489
|
+
if feature_version is not None:
|
|
490
|
+
nw_lazy = nw_lazy.filter(
|
|
491
|
+
nw.col("metaxy_feature_version") == feature_version
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Apply generic Narwhals filters (stays in SQL)
|
|
495
|
+
if filters is not None:
|
|
496
|
+
for filter_expr in filters:
|
|
497
|
+
nw_lazy = nw_lazy.filter(filter_expr)
|
|
498
|
+
|
|
499
|
+
# Select columns (stays in SQL)
|
|
500
|
+
if columns is not None:
|
|
501
|
+
nw_lazy = nw_lazy.select(columns)
|
|
502
|
+
|
|
503
|
+
# Return Narwhals LazyFrame wrapping Ibis table (stays lazy in SQL)
|
|
504
|
+
return nw_lazy
|
|
505
|
+
|
|
506
|
+
def _can_compute_native(self) -> bool:
|
|
507
|
+
"""
|
|
508
|
+
Ibis backends support native field provenance calculations (Narwhals-based).
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
True (use Narwhals components with Ibis-backed tables)
|
|
512
|
+
|
|
513
|
+
Note: All Ibis stores now use Narwhals-based components (NarwhalsJoiner,
|
|
514
|
+
PolarsProvenanceByFieldCalculator, NarwhalsDiffResolver) which work efficiently
|
|
515
|
+
with Ibis-backed tables.
|
|
516
|
+
"""
|
|
517
|
+
return True
|
|
518
|
+
|
|
519
|
+
def display(self) -> str:
|
|
520
|
+
"""Display string for this store."""
|
|
521
|
+
from metaxy.metadata_store.utils import sanitize_uri
|
|
522
|
+
|
|
523
|
+
backend_info = self.connection_string or f"{self.backend}"
|
|
524
|
+
# Sanitize connection strings that may contain credentials
|
|
525
|
+
sanitized_info = sanitize_uri(backend_info)
|
|
526
|
+
return f"{self.__class__.__name__}(backend={sanitized_info})"
|
|
527
|
+
|
|
528
|
+
def get_store_metadata(self, feature_key: CoercibleToFeatureKey) -> dict[str, Any]:
|
|
529
|
+
"""Return store metadata including table name.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
feature_key: Feature key to get metadata for.
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Dictionary with `table_name` key.
|
|
536
|
+
"""
|
|
537
|
+
resolved_key = self._resolve_feature_key(feature_key)
|
|
538
|
+
return {"table_name": self.get_table_name(resolved_key)}
|
|
539
|
+
|
|
540
|
+
@classmethod
|
|
541
|
+
def config_model(cls) -> type[IbisMetadataStoreConfig]: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
542
|
+
return IbisMetadataStoreConfig
|