metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
metaxy/graph/status.py ADDED
@@ -0,0 +1,329 @@
1
+ """Feature metadata status inspection utilities.
2
+
3
+ This module provides reusable SDK functions for inspecting feature metadata status,
4
+ useful for both CLI commands and programmatic usage.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Sequence
10
+ from typing import TYPE_CHECKING, Any, Literal, NamedTuple
11
+
12
+ import narwhals as nw
13
+ from pydantic import BaseModel, Field
14
+
15
+ from metaxy.models.types import (
16
+ CoercibleToFeatureKey,
17
+ FeatureKey,
18
+ ValidatedFeatureKeyAdapter,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ from metaxy import BaseFeature
23
+ from metaxy.metadata_store.base import MetadataStore
24
+ from metaxy.versioning.types import LazyIncrement
25
+
26
+
27
+ class FullFeatureMetadataRepresentation(BaseModel):
28
+ """Full JSON-safe representation of feature metadata status."""
29
+
30
+ feature_key: str
31
+ status: Literal["missing", "needs_update", "up_to_date", "root_feature"]
32
+ needs_update: bool
33
+ metadata_exists: bool
34
+ rows: int
35
+ added: int | None
36
+ changed: int | None
37
+ target_version: str
38
+ is_root_feature: bool = False
39
+ sample_details: list[str] | None = None
40
+
41
+
42
+ StatusCategory = Literal["missing", "needs_update", "up_to_date", "root_feature"]
43
+
44
+ # Status display configuration
45
+ _STATUS_ICONS: dict[StatusCategory, str] = {
46
+ "missing": "[red]✗[/red]",
47
+ "root_feature": "[blue]○[/blue]",
48
+ "needs_update": "[yellow]⚠[/yellow]",
49
+ "up_to_date": "[green]✓[/green]",
50
+ }
51
+
52
+ _STATUS_TEXTS: dict[StatusCategory, str] = {
53
+ "missing": "missing metadata",
54
+ "root_feature": "root feature",
55
+ "needs_update": "needs update",
56
+ "up_to_date": "up-to-date",
57
+ }
58
+
59
+
60
+ class FeatureMetadataStatus(BaseModel):
61
+ """Status information for feature metadata in a metadata store.
62
+
63
+ This model encapsulates the current state of metadata for a feature,
64
+ including whether it exists, needs updates, and sample counts.
65
+
66
+ This is a pure Pydantic model without arbitrary types. For working with
67
+ LazyIncrement objects, use FeatureMetadataStatusWithIncrement.
68
+ """
69
+
70
+ feature_key: FeatureKey = Field(description="The feature key being inspected")
71
+ target_version: str = Field(description="The feature version from code")
72
+ metadata_exists: bool = Field(description="Whether metadata exists in the store")
73
+ row_count: int = Field(description="Number of metadata rows (0 if none exist)")
74
+ added_count: int = Field(description="Number of samples that would be added")
75
+ changed_count: int = Field(description="Number of samples that would be changed")
76
+ needs_update: bool = Field(description="Whether updates are needed")
77
+ is_root_feature: bool = Field(
78
+ default=False,
79
+ description="Whether this is a root feature (no upstream dependencies)",
80
+ )
81
+
82
+ @property
83
+ def status_category(self) -> StatusCategory:
84
+ """Compute the status category from current state."""
85
+ if not self.metadata_exists:
86
+ return "missing"
87
+ if self.is_root_feature:
88
+ return "root_feature"
89
+ if self.needs_update:
90
+ return "needs_update"
91
+ return "up_to_date"
92
+
93
+ def format_status_line(self) -> str:
94
+ """Format a status line for display with Rich markup."""
95
+ category = self.status_category
96
+ icon = _STATUS_ICONS[category]
97
+ text = _STATUS_TEXTS[category]
98
+ key = self.feature_key.to_string()
99
+
100
+ # Root features: don't show added/changed counts (not meaningful)
101
+ if self.is_root_feature:
102
+ return f"{icon} {key} (rows: {self.row_count}) — {text}"
103
+
104
+ return (
105
+ f"{icon} {key} "
106
+ f"(rows: {self.row_count}, added: {self.added_count}, "
107
+ f"changed: {self.changed_count}) — {text}"
108
+ )
109
+
110
+
111
+ class FeatureMetadataStatusWithIncrement(NamedTuple):
112
+ """Feature metadata status paired with its LazyIncrement data.
113
+
114
+ This combines a pure Pydantic status model with the LazyIncrement object
115
+ needed for sample-level operations like generating previews.
116
+ """
117
+
118
+ status: FeatureMetadataStatus
119
+ lazy_increment: LazyIncrement | None
120
+
121
+ @property
122
+ def status_category(self) -> StatusCategory:
123
+ """Delegate to the status model's category."""
124
+ return self.status.status_category
125
+
126
+ def sample_details(
127
+ self,
128
+ feature_cls: type[BaseFeature],
129
+ *,
130
+ limit: int = 5,
131
+ ) -> list[str]:
132
+ """Return formatted sample preview lines for verbose output."""
133
+ if self.lazy_increment is None:
134
+ return []
135
+
136
+ id_columns_spec = feature_cls.spec().id_columns # type: ignore[attr-defined]
137
+ id_columns_seq = tuple(id_columns_spec) if id_columns_spec is not None else None
138
+
139
+ return [
140
+ line.strip()
141
+ for line in format_sample_previews(
142
+ self.lazy_increment,
143
+ self.status.added_count,
144
+ self.status.changed_count,
145
+ id_columns_seq,
146
+ limit=limit,
147
+ )
148
+ ]
149
+
150
+ def to_representation(
151
+ self,
152
+ feature_cls: type[BaseFeature],
153
+ *,
154
+ verbose: bool,
155
+ ) -> FullFeatureMetadataRepresentation:
156
+ """Convert status to the full JSON representation used by the CLI."""
157
+ sample_details = (
158
+ self.sample_details(feature_cls)
159
+ if verbose and self.lazy_increment
160
+ else None
161
+ )
162
+ # For root features, added/changed are not meaningful
163
+ added = None if self.status.is_root_feature else self.status.added_count
164
+ changed = None if self.status.is_root_feature else self.status.changed_count
165
+
166
+ return FullFeatureMetadataRepresentation(
167
+ feature_key=self.status.feature_key.to_string(),
168
+ status=self.status_category,
169
+ needs_update=self.status.needs_update,
170
+ metadata_exists=self.status.metadata_exists,
171
+ rows=self.status.row_count,
172
+ added=added,
173
+ changed=changed,
174
+ target_version=self.status.target_version,
175
+ is_root_feature=self.status.is_root_feature,
176
+ sample_details=sample_details,
177
+ )
178
+
179
+
180
+ def format_sample_previews(
181
+ lazy_increment: LazyIncrement,
182
+ added_count: int,
183
+ changed_count: int,
184
+ id_columns: Sequence[str] | None = None,
185
+ limit: int = 5,
186
+ ) -> list[str]:
187
+ """Format sample previews for added and changed samples.
188
+
189
+ Args:
190
+ lazy_increment: The LazyIncrement containing added/changed samples
191
+ added_count: Number of added samples (to avoid re-counting)
192
+ changed_count: Number of changed samples (to avoid re-counting)
193
+ id_columns: Columns to include in previews (defaults to ["sample_uid"])
194
+ limit: Maximum number of samples to preview per category
195
+
196
+ Returns:
197
+ List of formatted preview lines
198
+ """
199
+ lines: list[str] = []
200
+ cols = list(id_columns or ["sample_uid"])
201
+
202
+ if added_count > 0:
203
+ added_preview_df = (
204
+ lazy_increment.added.select(cols).head(limit).collect().to_polars()
205
+ )
206
+ if added_preview_df.height > 0:
207
+ preview_lines = [
208
+ ", ".join(f"{col}={row[col]}" for col in added_preview_df.columns)
209
+ for row in added_preview_df.to_dicts()
210
+ ]
211
+ lines.append(" Added samples: " + "; ".join(preview_lines))
212
+
213
+ if changed_count > 0:
214
+ changed_preview_df = (
215
+ lazy_increment.changed.select(cols).head(limit).collect().to_polars()
216
+ )
217
+ if changed_preview_df.height > 0:
218
+ preview_lines = [
219
+ ", ".join(f"{col}={row[col]}" for col in changed_preview_df.columns)
220
+ for row in changed_preview_df.to_dicts()
221
+ ]
222
+ lines.append(" Changed samples: " + "; ".join(preview_lines))
223
+
224
+ return lines
225
+
226
+
227
+ def count_lazy_rows(lazy_frame: nw.LazyFrame[Any]) -> int:
228
+ """Return row count for a Narwhals LazyFrame.
229
+
230
+ Args:
231
+ lazy_frame: The LazyFrame to count rows from
232
+
233
+ Returns:
234
+ Number of rows in the LazyFrame
235
+ """
236
+ return lazy_frame.select(nw.len()).collect().to_polars()["len"].item()
237
+
238
+
239
+ def get_feature_metadata_status(
240
+ feature_key: CoercibleToFeatureKey,
241
+ metadata_store: MetadataStore,
242
+ *,
243
+ use_fallback: bool = False,
244
+ ) -> FeatureMetadataStatusWithIncrement:
245
+ """Get metadata status for a single feature.
246
+
247
+ Args:
248
+ feature_key: The feature key or feature class to check.
249
+ Accepts a string ("a/b/c"), sequence of strings (["a", "b", "c"]),
250
+ FeatureKey instance, or BaseFeature class.
251
+ metadata_store: The metadata store to query
252
+ use_fallback: Whether to read metadata row counts from fallback stores.
253
+ When True, checks fallback stores if metadata is missing in the primary store.
254
+ When False (default), only checks the primary store.
255
+ Note: resolve_update always uses the primary store only.
256
+
257
+ Returns:
258
+ FeatureMetadataStatusWithIncrement containing status and lazy increment
259
+ """
260
+ from metaxy.metadata_store.exceptions import FeatureNotFoundError
261
+ from metaxy.models.feature import FeatureGraph
262
+
263
+ # Resolve to FeatureKey using the type adapter (handles all input types)
264
+ key = ValidatedFeatureKeyAdapter.validate_python(feature_key)
265
+
266
+ # Look up feature class from the active graph
267
+ graph = FeatureGraph.get_active()
268
+ if key not in graph.features_by_key:
269
+ raise ValueError(f"Feature {key.to_string()} not found in active graph")
270
+ feature_cls = graph.features_by_key[key]
271
+
272
+ target_version = feature_cls.feature_version()
273
+
274
+ # Check if this is a root feature (no upstream dependencies)
275
+ plan = graph.get_feature_plan(key)
276
+ is_root_feature = not plan.deps
277
+
278
+ # Get row count for this feature version
279
+ id_columns = feature_cls.spec().id_columns # type: ignore[attr-defined]
280
+ id_columns_seq = tuple(id_columns) if id_columns is not None else None
281
+
282
+ try:
283
+ metadata_lazy = metadata_store.read_metadata(
284
+ key,
285
+ columns=list(id_columns_seq) if id_columns_seq is not None else None,
286
+ allow_fallback=use_fallback,
287
+ )
288
+ row_count = count_lazy_rows(metadata_lazy)
289
+ metadata_exists = True
290
+ except FeatureNotFoundError:
291
+ row_count = 0
292
+ metadata_exists = False
293
+
294
+ # For root features, we can't determine added/changed without samples
295
+ if is_root_feature:
296
+ status = FeatureMetadataStatus(
297
+ feature_key=key,
298
+ target_version=target_version,
299
+ metadata_exists=metadata_exists,
300
+ row_count=row_count,
301
+ added_count=0,
302
+ changed_count=0,
303
+ needs_update=False,
304
+ is_root_feature=True,
305
+ )
306
+ return FeatureMetadataStatusWithIncrement(status=status, lazy_increment=None)
307
+
308
+ # For non-root features, resolve the update to get added/changed counts
309
+ lazy_increment = metadata_store.resolve_update(
310
+ feature_cls,
311
+ lazy=True,
312
+ )
313
+
314
+ # Count changes
315
+ added_count = count_lazy_rows(lazy_increment.added)
316
+ changed_count = count_lazy_rows(lazy_increment.changed)
317
+
318
+ status = FeatureMetadataStatus(
319
+ feature_key=key,
320
+ target_version=target_version,
321
+ metadata_exists=metadata_exists,
322
+ row_count=row_count,
323
+ added_count=added_count,
324
+ changed_count=changed_count,
325
+ needs_update=added_count > 0 or changed_count > 0,
326
+ )
327
+ return FeatureMetadataStatusWithIncrement(
328
+ status=status, lazy_increment=lazy_increment
329
+ )
metaxy/graph/utils.py ADDED
@@ -0,0 +1,58 @@
1
+ """Shared utilities for graph rendering and formatting."""
2
+
3
+ from metaxy.models.types import FeatureKey, FieldKey
4
+
5
+
6
+ def sanitize_mermaid_id(s: str) -> str:
7
+ """Sanitize string for use as Mermaid node ID.
8
+
9
+ Replaces characters that are invalid in Mermaid identifiers.
10
+
11
+ Args:
12
+ s: String to sanitize
13
+
14
+ Returns:
15
+ Sanitized string safe for use as Mermaid node ID
16
+ """
17
+ return s.replace("/", "_").replace("-", "_").replace("__", "_")
18
+
19
+
20
+ def format_hash(hash_str: str, length: int = 8) -> str:
21
+ """Format hash string with optional truncation.
22
+
23
+ Args:
24
+ hash_str: Full hash string
25
+ length: Number of characters to show (0 for full hash)
26
+
27
+ Returns:
28
+ Truncated or full hash string
29
+ """
30
+ if length == 0 or length >= len(hash_str):
31
+ return hash_str
32
+ return hash_str[:length]
33
+
34
+
35
+ def format_feature_key(key: FeatureKey) -> str:
36
+ """Format feature key for display.
37
+
38
+ Uses / separator for better readability.
39
+
40
+ Args:
41
+ key: Feature key
42
+
43
+ Returns:
44
+ Formatted string like "my/feature/key"
45
+ """
46
+ return "/".join(key)
47
+
48
+
49
+ def format_field_key(key: FieldKey) -> str:
50
+ """Format field key for display.
51
+
52
+ Args:
53
+ key: Field key
54
+
55
+ Returns:
56
+ Formatted string like "field_name"
57
+ """
58
+ return "/".join(key)
@@ -0,0 +1,32 @@
1
+ """Metadata store for feature pipeline management."""
2
+
3
+ from metaxy.metadata_store.base import MetadataStore, allow_feature_version_override
4
+ from metaxy.metadata_store.exceptions import (
5
+ DependencyError,
6
+ FeatureNotFoundError,
7
+ FieldNotFoundError,
8
+ HashAlgorithmNotSupportedError,
9
+ MetadataSchemaError,
10
+ MetadataStoreError,
11
+ StoreNotOpenError,
12
+ )
13
+ from metaxy.metadata_store.memory import InMemoryMetadataStore
14
+ from metaxy.metadata_store.system import (
15
+ FEATURE_VERSIONS_KEY,
16
+ )
17
+ from metaxy.metadata_store.types import AccessMode
18
+
19
+ __all__ = [
20
+ "MetadataStore",
21
+ "InMemoryMetadataStore",
22
+ "MetadataStoreError",
23
+ "FeatureNotFoundError",
24
+ "FieldNotFoundError",
25
+ "MetadataSchemaError",
26
+ "DependencyError",
27
+ "StoreNotOpenError",
28
+ "HashAlgorithmNotSupportedError",
29
+ "FEATURE_VERSIONS_KEY",
30
+ "allow_feature_version_override",
31
+ "AccessMode",
32
+ ]