metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,443 @@
1
+ """Core data models for graph rendering."""
2
+
3
+ from enum import Enum
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from pydantic import Field
7
+ from typing_extensions import Self
8
+
9
+ from metaxy.models.bases import FrozenBaseModel
10
+ from metaxy.models.types import FeatureKey, FieldKey
11
+ from metaxy.utils.constants import DEFAULT_CODE_VERSION
12
+ from metaxy.utils.exceptions import MetaxyEmptyCodeVersionError
13
+
14
+ if TYPE_CHECKING:
15
+ from metaxy.models.feature import FeatureGraph
16
+
17
+
18
+ class NodeStatus(str, Enum):
19
+ """Status of a node in a diff view."""
20
+
21
+ NORMAL = "normal" # Normal node (not in diff mode)
22
+ UNCHANGED = "unchanged" # Unchanged in diff
23
+ ADDED = "added" # Added in diff
24
+ REMOVED = "removed" # Removed in diff
25
+ CHANGED = "changed" # Changed in diff
26
+
27
+
28
+ class FieldNode(FrozenBaseModel):
29
+ """Represents a field within a feature node.
30
+
31
+ Attributes:
32
+ key: Field key
33
+ version: Current field version hash
34
+ old_version: Previous field version hash (for diffs)
35
+ code_version: Code version (if available)
36
+ status: Field status (for diff rendering)
37
+ """
38
+
39
+ key: FieldKey
40
+ version: str | None = None # None if field was removed
41
+ old_version: str | None = None # For diff mode
42
+ code_version: str | None = None
43
+ status: NodeStatus = NodeStatus.NORMAL
44
+
45
+
46
+ class GraphNode(FrozenBaseModel):
47
+ """Represents a feature node in the graph.
48
+
49
+ Attributes:
50
+ key: Feature key
51
+ version: Current feature version hash
52
+ old_version: Previous feature version hash (for diffs)
53
+ code_version: Code version (if available)
54
+ fields: List of field nodes
55
+ dependencies: List of feature keys this node depends on
56
+ status: Node status (for diff rendering)
57
+ project: Project name this feature belongs to
58
+ metadata: Additional custom metadata
59
+ """
60
+
61
+ key: FeatureKey
62
+ version: str | None = None # None if feature was removed
63
+ old_version: str | None = None # For diff mode
64
+ code_version: str | None = None
65
+ fields: list[FieldNode] = Field(default_factory=list)
66
+ dependencies: list[FeatureKey] = Field(default_factory=list)
67
+ status: NodeStatus = NodeStatus.NORMAL
68
+ project: str | None = None # Project name (None for legacy features)
69
+ metadata: dict[str, Any] = Field(default_factory=dict)
70
+
71
+
72
+ class EdgeData(FrozenBaseModel):
73
+ """Represents an edge between two nodes.
74
+
75
+ Attributes:
76
+ from_key: Source feature key (dependency)
77
+ to_key: Target feature key (dependent)
78
+ """
79
+
80
+ from_key: FeatureKey
81
+ to_key: FeatureKey
82
+
83
+
84
+ class GraphData(FrozenBaseModel):
85
+ """Container for complete graph structure.
86
+
87
+ This is the unified data model used by all renderers.
88
+
89
+ Attributes:
90
+ nodes: Map from feature key string to GraphNode
91
+ edges: List of edges
92
+ snapshot_version: Optional snapshot version
93
+ old_snapshot_version: Optional old snapshot version (for diffs)
94
+ """
95
+
96
+ nodes: dict[str, GraphNode] # Key is feature_key.to_string()
97
+ edges: list[EdgeData] = Field(default_factory=list)
98
+ snapshot_version: str | None = None
99
+ old_snapshot_version: str | None = None # For diff mode
100
+
101
+ def get_node(self, key: FeatureKey) -> GraphNode | None:
102
+ """Get node by feature key.
103
+
104
+ Args:
105
+ key: Feature key to lookup
106
+
107
+ Returns:
108
+ GraphNode if found, None otherwise
109
+ """
110
+ return self.nodes.get(key.to_string())
111
+
112
+ def get_nodes_by_status(self, status: NodeStatus) -> list[GraphNode]:
113
+ """Get all nodes with a specific status.
114
+
115
+ Args:
116
+ status: Status to filter by
117
+
118
+ Returns:
119
+ List of nodes with matching status
120
+ """
121
+ return [node for node in self.nodes.values() if node.status == status]
122
+
123
+ def to_struct(self) -> dict[str, Any]:
124
+ """Serialize to struct (native Python types for storage).
125
+
126
+ Note: This uses custom serialization instead of Pydantic's model_dump() because:
127
+ 1. Polars struct columns require specific type conversions (e.g., None → "" for strings, None → 0 for ints)
128
+ 2. Custom types (FeatureKey, FieldKey) need explicit string conversion for storage
129
+ 3. The storage schema is a separate concern from the domain model's Python representation
130
+ 4. Different storage backends may need different serialization formats in the future
131
+
132
+ Returns:
133
+ Dict with structure compatible with Polars struct type
134
+ """
135
+ nodes_list = []
136
+ for node in self.nodes.values():
137
+ fields_list = []
138
+ for field in node.fields:
139
+ if field.code_version is None:
140
+ raise MetaxyEmptyCodeVersionError(
141
+ f"Field {field.key.to_string()} in feature {node.key.to_string()} has empty code_version."
142
+ )
143
+ fields_list.append(
144
+ {
145
+ "key": field.key.to_string(),
146
+ "version": field.version if field.version is not None else "",
147
+ "code_version": field.code_version,
148
+ }
149
+ )
150
+
151
+ if node.code_version is None:
152
+ raise MetaxyEmptyCodeVersionError(
153
+ f"Feature {node.key.to_string()} has empty code_version."
154
+ )
155
+ nodes_list.append(
156
+ {
157
+ "key": node.key.to_string(),
158
+ "version": node.version if node.version is not None else "",
159
+ "code_version": node.code_version,
160
+ "fields": fields_list,
161
+ "dependencies": [dep.to_string() for dep in node.dependencies],
162
+ "project": node.project if node.project is not None else "",
163
+ }
164
+ )
165
+
166
+ edges_list = []
167
+ for edge in self.edges:
168
+ edges_list.append(
169
+ {
170
+ "from_key": edge.from_key.to_string(),
171
+ "to_key": edge.to_key.to_string(),
172
+ }
173
+ )
174
+
175
+ result: dict[str, Any] = {
176
+ "nodes": nodes_list,
177
+ "edges": edges_list,
178
+ }
179
+
180
+ # Include snapshot_version if present
181
+ if self.snapshot_version is not None:
182
+ result["metaxy_snapshot_version"] = self.snapshot_version
183
+
184
+ # Include old_snapshot_version if present (for diffs)
185
+ if self.old_snapshot_version is not None:
186
+ result["old_snapshot_version"] = self.old_snapshot_version
187
+
188
+ return result
189
+
190
+ @classmethod
191
+ def from_struct(cls, struct_data: dict[str, Any]) -> Self:
192
+ """Deserialize from struct.
193
+
194
+ Args:
195
+ struct_data: Dict with structure from to_struct()
196
+
197
+ Returns:
198
+ GraphData instance
199
+ """
200
+ nodes = {}
201
+ for node_data in struct_data["nodes"]:
202
+ fields = []
203
+ for field_data in node_data["fields"]:
204
+ if (
205
+ field_data["code_version"] == ""
206
+ or field_data["code_version"] is None
207
+ or field_data["code_version"] == DEFAULT_CODE_VERSION
208
+ ):
209
+ raise MetaxyEmptyCodeVersionError(
210
+ f"Field {field_data['key']} in feature {node_data['key']} has empty code_version."
211
+ )
212
+ fields.append(
213
+ FieldNode(
214
+ key=FieldKey(field_data["key"].split("/")),
215
+ version=field_data["version"]
216
+ if field_data["version"]
217
+ else None,
218
+ code_version=field_data["code_version"],
219
+ )
220
+ )
221
+
222
+ if (
223
+ node_data["code_version"] == ""
224
+ or node_data["code_version"] is None
225
+ or node_data["code_version"] == DEFAULT_CODE_VERSION
226
+ ):
227
+ raise MetaxyEmptyCodeVersionError(
228
+ f"Feature {node_data['key']} has empty code_version."
229
+ )
230
+ node = GraphNode(
231
+ key=FeatureKey(node_data["key"].split("/")),
232
+ version=node_data["version"] if node_data["version"] else None,
233
+ code_version=node_data["code_version"],
234
+ fields=fields,
235
+ dependencies=[
236
+ FeatureKey(dep.split("/")) for dep in node_data["dependencies"]
237
+ ],
238
+ project=node_data.get("project") if node_data.get("project") else None,
239
+ )
240
+ nodes[node_data["key"]] = node
241
+
242
+ edges = []
243
+ for edge_data in struct_data["edges"]:
244
+ edges.append(
245
+ EdgeData(
246
+ from_key=FeatureKey(edge_data["from_key"].split("/")),
247
+ to_key=FeatureKey(edge_data["to_key"].split("/")),
248
+ )
249
+ )
250
+
251
+ # Extract snapshot_version if present
252
+ snapshot_version = struct_data.get("metaxy_snapshot_version")
253
+
254
+ # Extract old_snapshot_version if present (for diffs)
255
+ old_snapshot_version = struct_data.get("old_snapshot_version")
256
+
257
+ return cls(
258
+ nodes=nodes,
259
+ edges=edges,
260
+ snapshot_version=snapshot_version,
261
+ old_snapshot_version=old_snapshot_version,
262
+ )
263
+
264
+ @classmethod
265
+ def from_feature_graph(cls, graph: "FeatureGraph") -> "GraphData":
266
+ """Convert a FeatureGraph to GraphData.
267
+
268
+ Args:
269
+ graph: FeatureGraph instance
270
+
271
+ Returns:
272
+ GraphData with all nodes and edges
273
+ """
274
+ from metaxy.models.plan import FQFieldKey
275
+
276
+ nodes: dict[str, GraphNode] = {}
277
+ edges: list[EdgeData] = []
278
+
279
+ # Convert each feature to a GraphNode
280
+ for feature_key, feature_cls in graph.features_by_key.items():
281
+ feature_key_str = feature_key.to_string()
282
+ spec = feature_cls.spec()
283
+
284
+ # Get feature version
285
+ feature_version = graph.get_feature_version(feature_key)
286
+
287
+ # Convert fields
288
+ field_nodes: list[FieldNode] = []
289
+ if spec.fields:
290
+ for field_spec in spec.fields:
291
+ # Compute field version
292
+ fq_field_key = FQFieldKey(feature=feature_key, field=field_spec.key)
293
+ field_version = graph.get_field_version(fq_field_key)
294
+
295
+ field_node = FieldNode(
296
+ key=field_spec.key,
297
+ version=field_version,
298
+ code_version=field_spec.code_version,
299
+ status=NodeStatus.NORMAL,
300
+ )
301
+ field_nodes.append(field_node)
302
+
303
+ # Extract dependencies
304
+ dependencies: list[FeatureKey] = []
305
+ if spec.deps:
306
+ dependencies = [dep.feature for dep in spec.deps]
307
+
308
+ # Get project from feature class
309
+ feature_project = feature_cls.project # type: ignore[attr-defined]
310
+
311
+ # Create node
312
+ node = GraphNode(
313
+ key=feature_key,
314
+ version=feature_version,
315
+ fields=field_nodes,
316
+ dependencies=dependencies,
317
+ status=NodeStatus.NORMAL,
318
+ project=feature_project,
319
+ )
320
+ nodes[feature_key_str] = node
321
+
322
+ # Create edges
323
+ for dep_key in dependencies:
324
+ edges.append(EdgeData(from_key=dep_key, to_key=feature_key))
325
+
326
+ return cls(
327
+ nodes=nodes,
328
+ edges=edges,
329
+ snapshot_version=graph.snapshot_version,
330
+ )
331
+
332
+ @classmethod
333
+ def from_merged_diff(cls, merged_data: dict[str, Any]) -> "GraphData":
334
+ """Convert merged diff data to GraphData.
335
+
336
+ Args:
337
+ merged_data: Merged diff data from GraphDiffer.create_merged_graph_data()
338
+
339
+ Returns:
340
+ GraphData with status annotations
341
+ """
342
+ from metaxy.graph.diff.diff_models import FieldChange
343
+
344
+ nodes: dict[str, GraphNode] = {}
345
+ edges: list[EdgeData] = []
346
+
347
+ # Convert nodes
348
+ for feature_key_str, node_data in merged_data["nodes"].items():
349
+ # Parse feature key
350
+ feature_key = FeatureKey(feature_key_str.split("/"))
351
+
352
+ # Map status strings to NodeStatus enum
353
+ status_str = node_data["status"]
354
+ if status_str == "added":
355
+ status = NodeStatus.ADDED
356
+ elif status_str == "removed":
357
+ status = NodeStatus.REMOVED
358
+ elif status_str == "changed":
359
+ status = NodeStatus.CHANGED
360
+ elif status_str == "unchanged":
361
+ status = NodeStatus.UNCHANGED
362
+ else:
363
+ status = NodeStatus.NORMAL
364
+
365
+ # Convert fields
366
+ fields_dict = node_data.get("fields", {})
367
+ field_changes_list = node_data.get("field_changes", [])
368
+
369
+ # Build field change map for quick lookup
370
+ field_change_map: dict[str, FieldChange] = {}
371
+ for fc in field_changes_list:
372
+ if isinstance(fc, FieldChange):
373
+ field_change_map[fc.field_key.to_string()] = fc
374
+
375
+ # Get all field keys (from both current fields and removed fields in changes)
376
+ all_field_keys = set(fields_dict.keys())
377
+ all_field_keys.update(field_change_map.keys())
378
+
379
+ field_nodes: list[FieldNode] = []
380
+ for field_key_str in all_field_keys:
381
+ # Parse field key
382
+ field_key = FieldKey(field_key_str.split("/"))
383
+
384
+ # Determine field status and versions
385
+ if field_key_str in field_change_map:
386
+ fc = field_change_map[field_key_str]
387
+ if fc.is_added:
388
+ field_status = NodeStatus.ADDED
389
+ field_version = fc.new_version
390
+ old_field_version = None
391
+ elif fc.is_removed:
392
+ field_status = NodeStatus.REMOVED
393
+ field_version = None
394
+ old_field_version = fc.old_version
395
+ elif fc.is_changed:
396
+ field_status = NodeStatus.CHANGED
397
+ field_version = fc.new_version
398
+ old_field_version = fc.old_version
399
+ else:
400
+ field_status = NodeStatus.UNCHANGED
401
+ field_version = fc.new_version or fc.old_version
402
+ old_field_version = None
403
+ else:
404
+ # Unchanged field
405
+ field_status = NodeStatus.UNCHANGED
406
+ field_version = fields_dict.get(field_key_str)
407
+ old_field_version = None
408
+
409
+ field_node = FieldNode(
410
+ key=field_key,
411
+ version=field_version,
412
+ old_version=old_field_version,
413
+ status=field_status,
414
+ )
415
+ field_nodes.append(field_node)
416
+
417
+ # Parse dependencies
418
+ dependencies = [
419
+ FeatureKey(dep_str.split("/"))
420
+ for dep_str in node_data.get("dependencies", [])
421
+ ]
422
+
423
+ # Create node
424
+ node = GraphNode(
425
+ key=feature_key,
426
+ version=node_data.get("new_version"),
427
+ old_version=node_data.get("old_version"),
428
+ fields=field_nodes,
429
+ dependencies=dependencies,
430
+ status=status,
431
+ )
432
+ nodes[feature_key_str] = node
433
+
434
+ # Convert edges
435
+ for edge_dict in merged_data["edges"]:
436
+ from_key = FeatureKey(edge_dict["from"].split("/"))
437
+ to_key = FeatureKey(edge_dict["to"].split("/"))
438
+ edges.append(EdgeData(from_key=from_key, to_key=to_key))
439
+
440
+ return cls(
441
+ nodes=nodes,
442
+ edges=edges,
443
+ )
@@ -0,0 +1,18 @@
1
+ """Graph rendering - visualization backends for graphs and diffs."""
2
+
3
+ from metaxy.graph.diff.rendering.base import BaseRenderer, RenderConfig
4
+ from metaxy.graph.diff.rendering.cards import CardsRenderer
5
+ from metaxy.graph.diff.rendering.graphviz import GraphvizRenderer
6
+ from metaxy.graph.diff.rendering.mermaid import MermaidRenderer
7
+ from metaxy.graph.diff.rendering.rich import TerminalRenderer
8
+ from metaxy.graph.diff.rendering.theme import Theme
9
+
10
+ __all__ = [
11
+ "BaseRenderer",
12
+ "RenderConfig",
13
+ "TerminalRenderer",
14
+ "CardsRenderer",
15
+ "MermaidRenderer",
16
+ "GraphvizRenderer",
17
+ "Theme",
18
+ ]