metaxy 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metaxy might be problematic. Click here for more details.

Files changed (75) hide show
  1. metaxy/__init__.py +61 -0
  2. metaxy/_testing.py +542 -0
  3. metaxy/_utils.py +16 -0
  4. metaxy/_version.py +1 -0
  5. metaxy/cli/app.py +76 -0
  6. metaxy/cli/context.py +71 -0
  7. metaxy/cli/graph.py +576 -0
  8. metaxy/cli/graph_diff.py +290 -0
  9. metaxy/cli/list.py +42 -0
  10. metaxy/cli/metadata.py +271 -0
  11. metaxy/cli/migrations.py +862 -0
  12. metaxy/cli/push.py +55 -0
  13. metaxy/config.py +450 -0
  14. metaxy/data_versioning/__init__.py +24 -0
  15. metaxy/data_versioning/calculators/__init__.py +13 -0
  16. metaxy/data_versioning/calculators/base.py +97 -0
  17. metaxy/data_versioning/calculators/duckdb.py +186 -0
  18. metaxy/data_versioning/calculators/ibis.py +225 -0
  19. metaxy/data_versioning/calculators/polars.py +135 -0
  20. metaxy/data_versioning/diff/__init__.py +15 -0
  21. metaxy/data_versioning/diff/base.py +150 -0
  22. metaxy/data_versioning/diff/narwhals.py +108 -0
  23. metaxy/data_versioning/hash_algorithms.py +19 -0
  24. metaxy/data_versioning/joiners/__init__.py +9 -0
  25. metaxy/data_versioning/joiners/base.py +70 -0
  26. metaxy/data_versioning/joiners/narwhals.py +235 -0
  27. metaxy/entrypoints.py +309 -0
  28. metaxy/ext/__init__.py +1 -0
  29. metaxy/ext/alembic.py +326 -0
  30. metaxy/ext/sqlmodel.py +172 -0
  31. metaxy/ext/sqlmodel_system_tables.py +139 -0
  32. metaxy/graph/__init__.py +21 -0
  33. metaxy/graph/diff/__init__.py +21 -0
  34. metaxy/graph/diff/diff_models.py +399 -0
  35. metaxy/graph/diff/differ.py +740 -0
  36. metaxy/graph/diff/models.py +418 -0
  37. metaxy/graph/diff/rendering/__init__.py +18 -0
  38. metaxy/graph/diff/rendering/base.py +274 -0
  39. metaxy/graph/diff/rendering/cards.py +188 -0
  40. metaxy/graph/diff/rendering/formatter.py +805 -0
  41. metaxy/graph/diff/rendering/graphviz.py +246 -0
  42. metaxy/graph/diff/rendering/mermaid.py +320 -0
  43. metaxy/graph/diff/rendering/rich.py +165 -0
  44. metaxy/graph/diff/rendering/theme.py +48 -0
  45. metaxy/graph/diff/traversal.py +247 -0
  46. metaxy/graph/utils.py +58 -0
  47. metaxy/metadata_store/__init__.py +31 -0
  48. metaxy/metadata_store/_protocols.py +38 -0
  49. metaxy/metadata_store/base.py +1676 -0
  50. metaxy/metadata_store/clickhouse.py +161 -0
  51. metaxy/metadata_store/duckdb.py +167 -0
  52. metaxy/metadata_store/exceptions.py +43 -0
  53. metaxy/metadata_store/ibis.py +451 -0
  54. metaxy/metadata_store/memory.py +228 -0
  55. metaxy/metadata_store/sqlite.py +187 -0
  56. metaxy/metadata_store/system_tables.py +257 -0
  57. metaxy/migrations/__init__.py +34 -0
  58. metaxy/migrations/detector.py +153 -0
  59. metaxy/migrations/executor.py +208 -0
  60. metaxy/migrations/loader.py +260 -0
  61. metaxy/migrations/models.py +718 -0
  62. metaxy/migrations/ops.py +390 -0
  63. metaxy/models/__init__.py +0 -0
  64. metaxy/models/bases.py +6 -0
  65. metaxy/models/constants.py +24 -0
  66. metaxy/models/feature.py +665 -0
  67. metaxy/models/feature_spec.py +105 -0
  68. metaxy/models/field.py +25 -0
  69. metaxy/models/plan.py +155 -0
  70. metaxy/models/types.py +157 -0
  71. metaxy/py.typed +0 -0
  72. metaxy-0.0.0.dist-info/METADATA +247 -0
  73. metaxy-0.0.0.dist-info/RECORD +75 -0
  74. metaxy-0.0.0.dist-info/WHEEL +4 -0
  75. metaxy-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,451 @@
1
+ """Ibis-based metadata store for SQL databases.
2
+
3
+ Supports any SQL database that Ibis supports:
4
+ - DuckDB, PostgreSQL, MySQL, SQLite (local/embedded)
5
+ - ClickHouse, Snowflake, BigQuery (cloud analytical)
6
+ - And 20+ other backends
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ import narwhals as nw
13
+ import polars as pl
14
+
15
+ from metaxy.data_versioning.hash_algorithms import HashAlgorithm
16
+ from metaxy.metadata_store.base import MetadataStore
17
+ from metaxy.models.feature import Feature
18
+ from metaxy.models.types import FeatureKey
19
+
20
+ if TYPE_CHECKING:
21
+ import ibis
22
+ import ibis.expr.types
23
+
24
+ from metaxy.data_versioning.calculators.ibis import HashSQLGenerator
25
+
26
+
27
+ class IbisMetadataStore(MetadataStore):
28
+ """
29
+ Generic SQL metadata store using Ibis.
30
+
31
+ Supports any Ibis backend including:
32
+ - DuckDB: Fast local analytical database
33
+ - PostgreSQL: Production-grade RDBMS
34
+ - MySQL: Popular RDBMS
35
+ - ClickHouse: High-performance analytical database
36
+ - SQLite: Embedded database
37
+ - And 20+ other backends
38
+
39
+ Storage layout:
40
+ - Each feature gets its own table: {namespace}__{feature_name}
41
+ - System tables: __metaxy__feature_versions, __metaxy__migrations
42
+ - Uses Ibis for cross-database compatibility
43
+
44
+ Note: Uses MD5 hash by default for cross-database compatibility.
45
+ DuckDBMetadataStore overrides this with dynamic algorithm detection.
46
+ For other backends, override the calculator instance variable with backend-specific implementations.
47
+
48
+ Example:
49
+ >>> # ClickHouse
50
+ >>> store = IbisMetadataStore("clickhouse://user:pass@host:9000/db")
51
+ >>>
52
+ >>> # PostgreSQL
53
+ >>> store = IbisMetadataStore("postgresql://user:pass@host:5432/db")
54
+ >>>
55
+ >>> # DuckDB (use DuckDBMetadataStore instead for better hash support)
56
+ >>> store = IbisMetadataStore("duckdb:///metadata.db")
57
+ >>>
58
+ >>> with store:
59
+ ... store.write_metadata(MyFeature, df)
60
+ """
61
+
62
+ @classmethod
63
+ def supports_structs(cls) -> bool:
64
+ """Check if backend supports struct types natively.
65
+
66
+ Subclasses should override this for backends that don't support structs.
67
+ Default implementation returns True (most SQL databases support structs).
68
+
69
+ Returns:
70
+ True if backend supports structs, False if needs JSON serialization
71
+ """
72
+ return True
73
+
74
+ def __init__(
75
+ self,
76
+ connection_string: str | None = None,
77
+ *,
78
+ backend: str | None = None,
79
+ connection_params: dict[str, Any] | None = None,
80
+ **kwargs,
81
+ ):
82
+ """
83
+ Initialize Ibis metadata store.
84
+
85
+ Args:
86
+ connection_string: Ibis connection string (e.g., "clickhouse://host:9000/db")
87
+ If provided, backend and connection_params are ignored.
88
+ backend: Ibis backend name (e.g., "clickhouse", "postgres", "duckdb")
89
+ Used with connection_params for more control.
90
+ connection_params: Backend-specific connection parameters
91
+ e.g., {"host": "localhost", "port": 9000, "database": "default"}
92
+ **kwargs: Passed to MetadataStore.__init__ (e.g., fallback_stores, hash_algorithm)
93
+
94
+ Raises:
95
+ ValueError: If neither connection_string nor backend is provided
96
+ ImportError: If Ibis or required backend driver not installed
97
+
98
+ Example:
99
+ >>> # Using connection string
100
+ >>> store = IbisMetadataStore("clickhouse://user:pass@host:9000/db")
101
+ >>>
102
+ >>> # Using backend + params
103
+ >>> store = IbisMetadataStore(
104
+ ... backend="clickhouse",
105
+ ... connection_params={"host": "localhost", "port": 9000}
106
+ ... )
107
+ """
108
+ try:
109
+ import ibis
110
+
111
+ self._ibis = ibis
112
+ except ImportError as e:
113
+ raise ImportError(
114
+ "Ibis is required for IbisMetadataStore. "
115
+ "Install with: pip install ibis-framework[BACKEND] "
116
+ "where BACKEND is one of: duckdb, postgres, clickhouse, mysql, etc."
117
+ ) from e
118
+
119
+ if connection_string is None and backend is None:
120
+ raise ValueError(
121
+ "Must provide either connection_string or backend. "
122
+ "Example: connection_string='clickhouse://host:9000/db' "
123
+ "or backend='clickhouse' with connection_params"
124
+ )
125
+
126
+ self.connection_string = connection_string
127
+ self.backend = backend
128
+ self.connection_params = connection_params or {}
129
+ self._conn: ibis.BaseBackend | None = None
130
+
131
+ super().__init__(**kwargs)
132
+
133
+ def _get_default_hash_algorithm(self) -> HashAlgorithm:
134
+ """Get default hash algorithm for Ibis stores.
135
+
136
+ Uses MD5 as it's universally supported across SQL databases.
137
+ Subclasses like DuckDBMetadataStore can override for better algorithms.
138
+ """
139
+ return HashAlgorithm.MD5
140
+
141
+ def _supports_native_components(self) -> bool:
142
+ """Ibis stores support native (Ibis-based) components when connection is open."""
143
+ return self._conn is not None
144
+
145
+ def _create_native_components(self):
146
+ """Create components for native SQL execution via Ibis."""
147
+ from metaxy.data_versioning.calculators.ibis import IbisDataVersionCalculator
148
+ from metaxy.data_versioning.diff.narwhals import NarwhalsDiffResolver
149
+ from metaxy.data_versioning.joiners.narwhals import NarwhalsJoiner
150
+
151
+ if self._conn is None:
152
+ raise RuntimeError(
153
+ "Cannot create native data version calculations: store is not open. "
154
+ "Ensure store is used as context manager."
155
+ )
156
+
157
+ # All components accept/return Narwhals LazyFrames
158
+ # IbisDataVersionCalculator converts to Ibis internally for SQL hash generation
159
+ joiner = NarwhalsJoiner()
160
+ calculator = IbisDataVersionCalculator(
161
+ backend=self._conn,
162
+ hash_sql_generators=self._get_hash_sql_generators(),
163
+ )
164
+ diff_resolver = NarwhalsDiffResolver()
165
+
166
+ return joiner, calculator, diff_resolver
167
+
168
+ def _get_hash_sql_generators(self) -> dict[HashAlgorithm, "HashSQLGenerator"]:
169
+ """Get hash SQL generators for this backend.
170
+
171
+ Base implementation only supports MD5 (universally available in SQL).
172
+ Subclasses override to add backend-specific hash functions.
173
+
174
+ Returns:
175
+ Dictionary mapping HashAlgorithm to SQL generator functions
176
+ """
177
+
178
+ def md5_generator(table, concat_columns: dict[str, str]) -> str:
179
+ """Generate SQL to compute MD5 hashes (universal SQL support).
180
+
181
+ Note: This generic implementation assumes MD5() returns a hex string.
182
+ Subclasses should override if their backend returns binary or different format.
183
+ For example, ClickHouse returns binary and needs lower(hex(MD5(...))).
184
+ """
185
+ # Build SELECT clause with hash columns
186
+ hash_selects: list[str] = []
187
+ for field_key, concat_col in concat_columns.items():
188
+ hash_col = f"__hash_{field_key}"
189
+ # Use MD5 function (universally available in SQL databases)
190
+ # WARNING: Different databases return different formats (hex string vs binary)
191
+ # This generic version assumes hex string output
192
+ hash_expr = f"MD5({concat_col})"
193
+ hash_selects.append(f"{hash_expr} as {hash_col}")
194
+
195
+ hash_clause = ", ".join(hash_selects)
196
+ table_sql = table.compile()
197
+ return f"SELECT *, {hash_clause} FROM ({table_sql}) AS __metaxy_temp"
198
+
199
+ return {HashAlgorithm.MD5: md5_generator}
200
+
201
+ @property
202
+ def ibis_conn(self) -> "ibis.BaseBackend":
203
+ """Get Ibis backend connection.
204
+
205
+ Returns:
206
+ Active Ibis backend connection
207
+
208
+ Raises:
209
+ StoreNotOpenError: If store is not open
210
+ """
211
+ from metaxy.metadata_store.exceptions import StoreNotOpenError
212
+
213
+ if self._conn is None:
214
+ raise StoreNotOpenError(
215
+ "Ibis connection is not open. Store must be used as a context manager."
216
+ )
217
+ return self._conn
218
+
219
+ @property
220
+ def conn(self) -> "ibis.BaseBackend":
221
+ """Get connection (alias for ibis_conn for consistency).
222
+
223
+ Returns:
224
+ Active Ibis backend connection
225
+
226
+ Raises:
227
+ StoreNotOpenError: If store is not open
228
+ """
229
+ return self.ibis_conn
230
+
231
+ def open(self) -> None:
232
+ """Open connection to database via Ibis.
233
+
234
+ Subclasses should override this to add backend-specific initialization
235
+ (e.g., loading extensions) and should call super().open() first.
236
+ """
237
+ if self.connection_string:
238
+ # Use connection string
239
+ self._conn = self._ibis.connect(self.connection_string)
240
+ else:
241
+ # Use backend + params
242
+ # Get backend-specific connect function
243
+ assert self.backend is not None, (
244
+ "backend must be set if connection_string is None"
245
+ )
246
+ backend_module = getattr(self._ibis, self.backend)
247
+ self._conn = backend_module.connect(**self.connection_params)
248
+
249
+ def close(self) -> None:
250
+ """Close the Ibis connection."""
251
+ if self._conn is not None:
252
+ # Ibis connections may not have explicit close method
253
+ # but setting to None releases resources
254
+ self._conn = None
255
+
256
+ def _table_name_to_feature_key(self, table_name: str) -> FeatureKey:
257
+ """Convert table name back to feature key."""
258
+ return FeatureKey(table_name.split("__"))
259
+
260
+ def _serialize_for_storage(self, df: pl.DataFrame) -> pl.DataFrame:
261
+ """Serialize DataFrame for storage (e.g., convert structs to JSON).
262
+
263
+ Base implementation does nothing - backends that don't support structs
264
+ should override this method.
265
+
266
+ Args:
267
+ df: DataFrame to serialize
268
+
269
+ Returns:
270
+ Serialized DataFrame
271
+ """
272
+ return df
273
+
274
+ def _deserialize_from_storage(self, df: pl.DataFrame) -> pl.DataFrame:
275
+ """Deserialize DataFrame from storage (e.g., convert JSON back to structs).
276
+
277
+ Base implementation does nothing - backends that don't support structs
278
+ should override this method.
279
+
280
+ Args:
281
+ df: DataFrame to deserialize
282
+
283
+ Returns:
284
+ Deserialized DataFrame
285
+ """
286
+ return df
287
+
288
+ def _write_metadata_impl(
289
+ self,
290
+ feature_key: FeatureKey,
291
+ df: pl.DataFrame,
292
+ ) -> None:
293
+ """
294
+ Internal write implementation using Ibis.
295
+
296
+ Args:
297
+ feature_key: Feature key to write to
298
+ df: DataFrame with metadata (already validated)
299
+ """
300
+ table_name = feature_key.table_name
301
+
302
+ # Serialize for storage (e.g., convert structs to JSON for SQLite)
303
+ df = self._serialize_for_storage(df)
304
+
305
+ # Check if table exists
306
+ existing_tables = self.conn.list_tables()
307
+
308
+ if table_name not in existing_tables:
309
+ # Create table from DataFrame
310
+ # Ensure NULL columns have proper types by filling with a typed value
311
+ # This handles cases like snapshot_version which can be NULL
312
+ df_typed = df
313
+ for col in df.columns:
314
+ if df[col].dtype == pl.Null:
315
+ # Cast NULL columns to String
316
+ df_typed = df_typed.with_columns(pl.col(col).cast(pl.Utf8))
317
+
318
+ self.conn.create_table(table_name, obj=df_typed)
319
+ else:
320
+ # Append to existing table
321
+ self.conn.insert(table_name, obj=df) # type: ignore[attr-defined] # pyright: ignore[reportAttributeAccessIssue]
322
+
323
+ def _drop_feature_metadata_impl(self, feature_key: FeatureKey) -> None:
324
+ """Drop the table for a feature.
325
+
326
+ Args:
327
+ feature_key: Feature key to drop metadata for
328
+ """
329
+ table_name = feature_key.table_name
330
+
331
+ # Check if table exists
332
+ if table_name in self.conn.list_tables():
333
+ self.conn.drop_table(table_name)
334
+
335
+ def _read_metadata_native(
336
+ self,
337
+ feature: FeatureKey | type[Feature],
338
+ *,
339
+ feature_version: str | None = None,
340
+ filters: Sequence[nw.Expr] | None = None,
341
+ columns: Sequence[str] | None = None,
342
+ ) -> nw.LazyFrame[Any] | None:
343
+ """
344
+ Read metadata from this store only (no fallback).
345
+
346
+ Args:
347
+ feature: Feature to read
348
+ feature_version: Filter by specific feature_version (applied as SQL WHERE clause)
349
+ filters: List of Narwhals filter expressions (converted to SQL WHERE clauses)
350
+ columns: Optional list of columns to select
351
+
352
+ Returns:
353
+ Narwhals LazyFrame with metadata, or None if not found
354
+ """
355
+ feature_key = self._resolve_feature_key(feature)
356
+ table_name = feature_key.table_name
357
+
358
+ # Check if table exists
359
+ existing_tables = self.conn.list_tables()
360
+ if table_name not in existing_tables:
361
+ return None
362
+
363
+ # Get Ibis table reference
364
+ table = self.conn.table(table_name)
365
+
366
+ # Wrap Ibis table with Narwhals (stays lazy in SQL)
367
+ nw_lazy: nw.LazyFrame[Any] = nw.from_native(table, eager_only=False)
368
+
369
+ # Apply feature_version filter (stays in SQL via Narwhals)
370
+ if feature_version is not None:
371
+ nw_lazy = nw_lazy.filter(nw.col("feature_version") == feature_version)
372
+
373
+ # Apply generic Narwhals filters (stays in SQL)
374
+ if filters is not None:
375
+ for filter_expr in filters:
376
+ nw_lazy = nw_lazy.filter(filter_expr)
377
+
378
+ # Select columns (stays in SQL)
379
+ if columns is not None:
380
+ nw_lazy = nw_lazy.select(columns)
381
+
382
+ # For backends that don't support structs (e.g., SQLite),
383
+ # we need to deserialize JSON strings to structs
384
+ if not self.supports_structs():
385
+ # Convert to Polars, deserialize, then wrap back as Narwhals lazy
386
+ table_native = nw_lazy.to_native()
387
+ if hasattr(table_native, "to_polars"):
388
+ # Ibis table
389
+ df_polars = table_native.to_polars()
390
+ else:
391
+ # Already Polars
392
+ df_polars = (
393
+ table_native
394
+ if isinstance(table_native, pl.DataFrame)
395
+ else table_native.collect()
396
+ )
397
+
398
+ # Deserialize JSON → structs
399
+ df_polars = self._deserialize_from_storage(df_polars)
400
+
401
+ # Make lazy and wrap in Narwhals
402
+ return nw.from_native(df_polars.lazy())
403
+
404
+ # Return Narwhals LazyFrame wrapping Ibis table (stays lazy in SQL)
405
+ return nw_lazy
406
+
407
+ def _list_features_local(self) -> list[FeatureKey]:
408
+ """
409
+ List all features in this store.
410
+
411
+ Returns:
412
+ List of FeatureKey objects (excluding system tables)
413
+ """
414
+ # Query all table names
415
+ table_names = self.conn.list_tables()
416
+
417
+ features = []
418
+ for table_name in table_names:
419
+ # Skip Ibis internal tables (start with "ibis_")
420
+ if table_name.startswith("ibis_"):
421
+ continue
422
+
423
+ feature_key = self._table_name_to_feature_key(table_name)
424
+
425
+ # Skip system tables
426
+ if not self._is_system_table(feature_key):
427
+ features.append(feature_key)
428
+
429
+ return features
430
+
431
+ def _can_compute_native(self) -> bool:
432
+ """
433
+ Ibis backends support native data version calculations (Narwhals-based).
434
+
435
+ Returns:
436
+ True (use Narwhals components with Ibis-backed tables)
437
+
438
+ Note: All Ibis stores now use Narwhals-based components (NarwhalsJoiner,
439
+ PolarsDataVersionCalculator, NarwhalsDiffResolver) which work efficiently
440
+ with Ibis-backed tables.
441
+ """
442
+ return True
443
+
444
+ def display(self) -> str:
445
+ """Display string for this store."""
446
+ backend_info = self.connection_string or f"{self.backend}"
447
+ if self._is_open:
448
+ num_features = len(self._list_features_local())
449
+ return f"IbisMetadataStore(backend={backend_info}, features={num_features})"
450
+ else:
451
+ return f"IbisMetadataStore(backend={backend_info})"
@@ -0,0 +1,228 @@
1
+ """In-memory metadata store implementation."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import Any
5
+
6
+ import narwhals as nw
7
+ import polars as pl
8
+
9
+ from metaxy.data_versioning.calculators.base import DataVersionCalculator
10
+ from metaxy.data_versioning.diff.base import MetadataDiffResolver
11
+ from metaxy.data_versioning.hash_algorithms import HashAlgorithm
12
+ from metaxy.data_versioning.joiners.base import UpstreamJoiner
13
+ from metaxy.metadata_store.base import MetadataStore
14
+ from metaxy.models.feature import Feature
15
+ from metaxy.models.types import FeatureKey
16
+
17
+
18
+ class InMemoryMetadataStore(MetadataStore):
19
+ """
20
+ In-memory metadata store using dict-based storage.
21
+
22
+ Features:
23
+ - Simple dict storage: {FeatureKey: pl.DataFrame}
24
+ - Fast for testing and prototyping
25
+ - No persistence (data lost when process exits)
26
+ - Schema validation on write
27
+ - Uses Polars components for all operations
28
+
29
+ Limitations:
30
+ - Not suitable for production
31
+ - Data lost on process exit
32
+ - No concurrency support across processes
33
+ - Memory-bound (all data in RAM)
34
+
35
+ Type Parameters:
36
+ TRef = nw.LazyFrame (uses Narwhals LazyFrames)
37
+
38
+ Components:
39
+ Components are created on-demand in resolve_update().
40
+ Uses Polars internally but exposes Narwhals interface.
41
+ Only supports Polars components (no native backend).
42
+ """
43
+
44
+ def __init__(self, **kwargs):
45
+ """
46
+ Initialize in-memory store.
47
+
48
+ Args:
49
+ **kwargs: Passed to MetadataStore.__init__ (e.g., fallback_stores, hash_algorithm)
50
+ """
51
+ # Use tuple as key (hashable) instead of string to avoid parsing issues
52
+ self._storage: dict[tuple[str, ...], pl.DataFrame] = {}
53
+ super().__init__(**kwargs)
54
+
55
+ def _get_default_hash_algorithm(self) -> HashAlgorithm:
56
+ """Get default hash algorithm for in-memory store."""
57
+ return HashAlgorithm.XXHASH64
58
+
59
+ def _get_storage_key(self, feature_key: FeatureKey) -> tuple[str, ...]:
60
+ """Convert feature key to storage key (tuple for hashability)."""
61
+ return tuple(feature_key)
62
+
63
+ def _supports_native_components(self) -> bool:
64
+ """In-memory store only supports Polars components."""
65
+ return False
66
+
67
+ def _create_native_components(
68
+ self,
69
+ ) -> tuple[
70
+ UpstreamJoiner,
71
+ DataVersionCalculator,
72
+ MetadataDiffResolver,
73
+ ]:
74
+ """Not supported - in-memory store only uses Polars components."""
75
+ raise NotImplementedError(
76
+ "InMemoryMetadataStore does not support native data version calculations"
77
+ )
78
+
79
+ def _write_metadata_impl(
80
+ self,
81
+ feature_key: FeatureKey,
82
+ df: pl.DataFrame,
83
+ ) -> None:
84
+ """
85
+ Internal write implementation for in-memory storage.
86
+
87
+ Args:
88
+ feature_key: Feature key to write to
89
+ df: DataFrame with metadata (already validated)
90
+ """
91
+ storage_key = self._get_storage_key(feature_key)
92
+
93
+ # Append or create
94
+ if storage_key in self._storage:
95
+ # Append to existing
96
+ self._storage[storage_key] = pl.concat(
97
+ [self._storage[storage_key], df],
98
+ how="vertical",
99
+ )
100
+ else:
101
+ # Create new
102
+ self._storage[storage_key] = df
103
+
104
+ def _drop_feature_metadata_impl(self, feature_key: FeatureKey) -> None:
105
+ """Drop all metadata for a feature from in-memory storage.
106
+
107
+ Args:
108
+ feature_key: Feature key to drop metadata for
109
+ """
110
+ storage_key = self._get_storage_key(feature_key)
111
+
112
+ # Remove from storage if it exists
113
+ if storage_key in self._storage:
114
+ del self._storage[storage_key]
115
+
116
+ def _read_metadata_native(
117
+ self,
118
+ feature: FeatureKey | type[Feature],
119
+ *,
120
+ feature_version: str | None = None,
121
+ filters: Sequence[nw.Expr] | None = None,
122
+ columns: Sequence[str] | None = None,
123
+ ) -> nw.LazyFrame[Any] | None:
124
+ """
125
+ Read metadata from this store only (no fallback).
126
+
127
+ Args:
128
+ feature: Feature to read
129
+ feature_version: Filter by specific feature_version
130
+ filters: List of Narwhals filter expressions
131
+ columns: Optional list of columns to select
132
+
133
+ Returns:
134
+ Narwhals LazyFrame with metadata, or None if not found
135
+
136
+ Raises:
137
+ StoreNotOpenError: If store is not open
138
+ """
139
+ self._check_open()
140
+
141
+ feature_key = self._resolve_feature_key(feature)
142
+ storage_key = self._get_storage_key(feature_key)
143
+
144
+ if storage_key not in self._storage:
145
+ return None
146
+
147
+ # Start with lazy Polars DataFrame, wrap with Narwhals
148
+ df_lazy = self._storage[storage_key].lazy()
149
+ nw_lazy = nw.from_native(df_lazy)
150
+
151
+ # Apply feature_version filter
152
+ if feature_version is not None:
153
+ nw_lazy = nw_lazy.filter(nw.col("feature_version") == feature_version)
154
+
155
+ # Apply generic Narwhals filters
156
+ if filters is not None:
157
+ for filter_expr in filters:
158
+ nw_lazy = nw_lazy.filter(filter_expr)
159
+
160
+ # Select columns
161
+ if columns is not None:
162
+ nw_lazy = nw_lazy.select(columns)
163
+
164
+ # Check if result would be empty (we need to check the underlying frame)
165
+ # For now, return the lazy frame - emptiness check happens when materializing
166
+ return nw_lazy
167
+
168
+ def _list_features_local(self) -> list[FeatureKey]:
169
+ """
170
+ List all features in this store.
171
+
172
+ Returns:
173
+ List of FeatureKey objects (excluding system tables)
174
+ """
175
+ features = []
176
+ for key_tuple in self._storage.keys():
177
+ # Convert tuple back to FeatureKey
178
+ feature_key = FeatureKey(list(key_tuple))
179
+
180
+ # Skip system tables
181
+ if not self._is_system_table(feature_key):
182
+ features.append(feature_key)
183
+
184
+ return features
185
+
186
+ def clear(self) -> None:
187
+ """
188
+ Clear all metadata from store.
189
+
190
+ Useful for testing.
191
+ """
192
+ self._storage.clear()
193
+
194
+ # ========== Context Manager Implementation ==========
195
+
196
+ def open(self) -> None:
197
+ """Open the in-memory store.
198
+
199
+ For InMemoryMetadataStore, this is a no-op since no external
200
+ resources need initialization.
201
+ """
202
+ pass # No resources to initialize for in-memory storage
203
+
204
+ def close(self) -> None:
205
+ """Close the in-memory store.
206
+
207
+ For InMemoryMetadataStore, this is a no-op since no external
208
+ resources need cleanup.
209
+ """
210
+ pass # No resources to cleanup for in-memory storage
211
+
212
+ def __repr__(self) -> str:
213
+ """String representation."""
214
+ num_features = len(self._storage)
215
+ num_fallbacks = len(self.fallback_stores)
216
+ return (
217
+ f"InMemoryMetadataStore("
218
+ f"features={num_features}, "
219
+ f"fallback_stores={num_fallbacks})"
220
+ )
221
+
222
+ def display(self) -> str:
223
+ """Display string for this store."""
224
+ if self._is_open:
225
+ num_features = len(self._storage)
226
+ return f"InMemoryMetadataStore(features={num_features})"
227
+ else:
228
+ return "InMemoryMetadataStore()"