metaxy 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of metaxy might be problematic. Click here for more details.
- metaxy/__init__.py +61 -0
- metaxy/_testing.py +542 -0
- metaxy/_utils.py +16 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +76 -0
- metaxy/cli/context.py +71 -0
- metaxy/cli/graph.py +576 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +42 -0
- metaxy/cli/metadata.py +271 -0
- metaxy/cli/migrations.py +862 -0
- metaxy/cli/push.py +55 -0
- metaxy/config.py +450 -0
- metaxy/data_versioning/__init__.py +24 -0
- metaxy/data_versioning/calculators/__init__.py +13 -0
- metaxy/data_versioning/calculators/base.py +97 -0
- metaxy/data_versioning/calculators/duckdb.py +186 -0
- metaxy/data_versioning/calculators/ibis.py +225 -0
- metaxy/data_versioning/calculators/polars.py +135 -0
- metaxy/data_versioning/diff/__init__.py +15 -0
- metaxy/data_versioning/diff/base.py +150 -0
- metaxy/data_versioning/diff/narwhals.py +108 -0
- metaxy/data_versioning/hash_algorithms.py +19 -0
- metaxy/data_versioning/joiners/__init__.py +9 -0
- metaxy/data_versioning/joiners/base.py +70 -0
- metaxy/data_versioning/joiners/narwhals.py +235 -0
- metaxy/entrypoints.py +309 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/alembic.py +326 -0
- metaxy/ext/sqlmodel.py +172 -0
- metaxy/ext/sqlmodel_system_tables.py +139 -0
- metaxy/graph/__init__.py +21 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +399 -0
- metaxy/graph/diff/differ.py +740 -0
- metaxy/graph/diff/models.py +418 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +274 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +320 -0
- metaxy/graph/diff/rendering/rich.py +165 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +31 -0
- metaxy/metadata_store/_protocols.py +38 -0
- metaxy/metadata_store/base.py +1676 -0
- metaxy/metadata_store/clickhouse.py +161 -0
- metaxy/metadata_store/duckdb.py +167 -0
- metaxy/metadata_store/exceptions.py +43 -0
- metaxy/metadata_store/ibis.py +451 -0
- metaxy/metadata_store/memory.py +228 -0
- metaxy/metadata_store/sqlite.py +187 -0
- metaxy/metadata_store/system_tables.py +257 -0
- metaxy/migrations/__init__.py +34 -0
- metaxy/migrations/detector.py +153 -0
- metaxy/migrations/executor.py +208 -0
- metaxy/migrations/loader.py +260 -0
- metaxy/migrations/models.py +718 -0
- metaxy/migrations/ops.py +390 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +6 -0
- metaxy/models/constants.py +24 -0
- metaxy/models/feature.py +665 -0
- metaxy/models/feature_spec.py +105 -0
- metaxy/models/field.py +25 -0
- metaxy/models/plan.py +155 -0
- metaxy/models/types.py +157 -0
- metaxy/py.typed +0 -0
- metaxy-0.0.0.dist-info/METADATA +247 -0
- metaxy-0.0.0.dist-info/RECORD +75 -0
- metaxy-0.0.0.dist-info/WHEEL +4 -0
- metaxy-0.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Terminal renderer using Rich Tree for hierarchical display.
|
|
2
|
+
|
|
3
|
+
Requires rich library to be installed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from metaxy.graph.diff.models import NodeStatus
|
|
7
|
+
from metaxy.graph.diff.rendering.base import BaseRenderer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TerminalRenderer(BaseRenderer):
|
|
11
|
+
"""Renders graph using Rich Tree for terminal display.
|
|
12
|
+
|
|
13
|
+
Creates a hierarchical tree view with colors and icons.
|
|
14
|
+
Supports both normal and diff rendering via node status.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def render(self) -> str:
|
|
18
|
+
"""Render graph as Rich Tree for terminal.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Rendered tree as string with ANSI color codes
|
|
22
|
+
"""
|
|
23
|
+
from rich.console import Console
|
|
24
|
+
from rich.tree import Tree
|
|
25
|
+
|
|
26
|
+
console = Console()
|
|
27
|
+
|
|
28
|
+
# Get filtered graph data based on config
|
|
29
|
+
filtered_graph = self._get_filtered_graph_data()
|
|
30
|
+
|
|
31
|
+
# Create root node
|
|
32
|
+
if self.config.show_snapshot_version:
|
|
33
|
+
snapshot_version = self._format_hash(filtered_graph.snapshot_version)
|
|
34
|
+
root = Tree(
|
|
35
|
+
f"📊 [bold]Graph[/bold] [dim](snapshot: {snapshot_version})[/dim]"
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
root = Tree("📊 [bold]Graph[/bold]")
|
|
39
|
+
|
|
40
|
+
# Create walker for filtered graph and add features in topological order
|
|
41
|
+
from metaxy.graph.diff.traversal import GraphWalker
|
|
42
|
+
|
|
43
|
+
walker = GraphWalker(filtered_graph)
|
|
44
|
+
for node in walker.topological_sort():
|
|
45
|
+
self._render_feature_node(root, node)
|
|
46
|
+
|
|
47
|
+
# Render to string
|
|
48
|
+
with console.capture() as capture:
|
|
49
|
+
console.print(root)
|
|
50
|
+
return capture.get()
|
|
51
|
+
|
|
52
|
+
def _render_feature_node(self, parent, node):
|
|
53
|
+
"""Add a feature node to the tree.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
parent: Parent tree node
|
|
57
|
+
node: GraphNode
|
|
58
|
+
"""
|
|
59
|
+
# Get status color
|
|
60
|
+
status_color = self._get_status_color(node.status)
|
|
61
|
+
|
|
62
|
+
# Build feature label
|
|
63
|
+
label_parts = [
|
|
64
|
+
f"[{status_color}]{self._format_feature_key(node.key)}[/{status_color}]"
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
# Show version info
|
|
68
|
+
if self.config.show_feature_versions:
|
|
69
|
+
if node.status == NodeStatus.CHANGED and node.old_version is not None:
|
|
70
|
+
# Show version transition for changed nodes
|
|
71
|
+
version_transition = self._format_version_transition(
|
|
72
|
+
node.old_version, node.version
|
|
73
|
+
)
|
|
74
|
+
label_parts.append(version_transition)
|
|
75
|
+
else:
|
|
76
|
+
# Normal version display
|
|
77
|
+
version = self._format_hash(node.version)
|
|
78
|
+
label_parts.append(f"[yellow](v: {version})[/yellow]")
|
|
79
|
+
|
|
80
|
+
if self.config.show_code_versions and node.code_version is not None:
|
|
81
|
+
label_parts.append(f"[dim](cv: {node.code_version})[/dim]")
|
|
82
|
+
|
|
83
|
+
# Add status badge for diff mode
|
|
84
|
+
if node.status != NodeStatus.NORMAL:
|
|
85
|
+
status_badge = self._get_status_badge(node.status)
|
|
86
|
+
label_parts.append(status_badge)
|
|
87
|
+
|
|
88
|
+
label = " ".join(label_parts)
|
|
89
|
+
feature_branch = parent.add(label)
|
|
90
|
+
|
|
91
|
+
# Add fields
|
|
92
|
+
if self.config.show_fields and node.fields:
|
|
93
|
+
fields_branch = feature_branch.add("🔧 [green]fields[/green]")
|
|
94
|
+
for field_node in node.fields:
|
|
95
|
+
self._render_field_node(fields_branch, field_node)
|
|
96
|
+
|
|
97
|
+
# Add dependencies
|
|
98
|
+
if node.dependencies:
|
|
99
|
+
deps_branch = feature_branch.add("⬅️ [blue]depends on[/blue]")
|
|
100
|
+
for dep_key in node.dependencies:
|
|
101
|
+
dep_color = status_color # Use same color as parent for simplicity
|
|
102
|
+
deps_branch.add(
|
|
103
|
+
f"[{dep_color}]{self._format_feature_key(dep_key)}[/{dep_color}]"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _render_field_node(self, parent, field_node):
|
|
107
|
+
"""Add a field node to the tree.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
parent: Parent tree node
|
|
111
|
+
field_node: FieldNode
|
|
112
|
+
"""
|
|
113
|
+
# Get status color
|
|
114
|
+
status_color = self._get_status_color(field_node.status)
|
|
115
|
+
|
|
116
|
+
label_parts = [
|
|
117
|
+
f"[{status_color}]{self._format_field_key(field_node.key)}[/{status_color}]"
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
# Show version info
|
|
121
|
+
if self.config.show_field_versions:
|
|
122
|
+
if (
|
|
123
|
+
field_node.status == NodeStatus.CHANGED
|
|
124
|
+
and field_node.old_version is not None
|
|
125
|
+
):
|
|
126
|
+
# Show version transition for changed fields
|
|
127
|
+
version_transition = self._format_version_transition(
|
|
128
|
+
field_node.old_version, field_node.version
|
|
129
|
+
)
|
|
130
|
+
label_parts.append(version_transition)
|
|
131
|
+
else:
|
|
132
|
+
# Normal version display
|
|
133
|
+
version = self._format_hash(field_node.version)
|
|
134
|
+
label_parts.append(f"[yellow](v: {version})[/yellow]")
|
|
135
|
+
|
|
136
|
+
if self.config.show_code_versions and field_node.code_version is not None:
|
|
137
|
+
label_parts.append(f"[dim](cv: {field_node.code_version})[/dim]")
|
|
138
|
+
|
|
139
|
+
# Add status badge for diff mode
|
|
140
|
+
if field_node.status != NodeStatus.NORMAL:
|
|
141
|
+
status_badge = self._get_status_badge(field_node.status)
|
|
142
|
+
label_parts.append(status_badge)
|
|
143
|
+
|
|
144
|
+
label = " ".join(label_parts)
|
|
145
|
+
parent.add(label)
|
|
146
|
+
|
|
147
|
+
def _get_status_badge(self, status: NodeStatus) -> str:
|
|
148
|
+
"""Get status badge text with color.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
status: Node status
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Rich-formatted status badge
|
|
155
|
+
"""
|
|
156
|
+
if status == NodeStatus.ADDED:
|
|
157
|
+
return f"[{self.theme.added_color}][+][/{self.theme.added_color}]"
|
|
158
|
+
elif status == NodeStatus.REMOVED:
|
|
159
|
+
return f"[{self.theme.removed_color}][-][/{self.theme.removed_color}]"
|
|
160
|
+
elif status == NodeStatus.CHANGED:
|
|
161
|
+
return f"[{self.theme.changed_color}][~][/{self.theme.changed_color}]"
|
|
162
|
+
elif status == NodeStatus.UNCHANGED:
|
|
163
|
+
return "" # No badge for unchanged
|
|
164
|
+
else:
|
|
165
|
+
return "" # No badge for normal
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Theme system for graph rendering."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Theme(BaseModel):
|
|
7
|
+
"""Color theme for graph rendering.
|
|
8
|
+
|
|
9
|
+
Unified theme for all rendering backends (terminal, Mermaid, Graphviz).
|
|
10
|
+
All colors use hex format (e.g., "#FF5733") for consistency.
|
|
11
|
+
Rich terminal supports hex via markup: [#FF0000]text[/#FF0000]
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Normal mode colors
|
|
15
|
+
feature_color: str = Field(
|
|
16
|
+
default="#00FFFF", description="Feature node color (cyan)"
|
|
17
|
+
)
|
|
18
|
+
field_color: str = Field(default="#5F87AF", description="Field color (steel blue)")
|
|
19
|
+
version_color: str = Field(
|
|
20
|
+
default="#FFFF00", description="Version info color (yellow)"
|
|
21
|
+
)
|
|
22
|
+
edge_color: str = Field(
|
|
23
|
+
default="#808080", description="Edge/dependency color (gray)"
|
|
24
|
+
)
|
|
25
|
+
snapshot_color: str = Field(
|
|
26
|
+
default="#FF00FF", description="Snapshot info color (magenta)"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Diff mode - node/edge colors
|
|
30
|
+
added_color: str = Field(default="#00FF00", description="Added items (green)")
|
|
31
|
+
removed_color: str = Field(default="#FF0000", description="Removed items (red)")
|
|
32
|
+
changed_color: str = Field(default="#FFAA00", description="Changed items (orange)")
|
|
33
|
+
unchanged_color: str = Field(
|
|
34
|
+
default="#808080", description="Unchanged items (gray)"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Version transition colors (for showing old→new in diffs)
|
|
38
|
+
old_version_color: str = Field(
|
|
39
|
+
default="#FF0000", description="Old version color (red)"
|
|
40
|
+
)
|
|
41
|
+
new_version_color: str = Field(
|
|
42
|
+
default="#00FF00", description="New version color (green)"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def default(cls) -> "Theme":
|
|
47
|
+
"""Create default theme."""
|
|
48
|
+
return cls()
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""Graph traversal utilities."""
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
|
|
5
|
+
from metaxy.graph.diff.models import GraphData, GraphNode
|
|
6
|
+
from metaxy.models.types import FeatureKey
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraphWalker:
|
|
10
|
+
"""Traverses and filters graph data structures.
|
|
11
|
+
|
|
12
|
+
Provides various traversal strategies:
|
|
13
|
+
- Topological sort (dependencies first)
|
|
14
|
+
- BFS from starting node
|
|
15
|
+
- Subgraph extraction with up/down filtering
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, graph_data: GraphData):
|
|
19
|
+
"""Initialize walker with graph data.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
graph_data: Graph structure to traverse
|
|
23
|
+
"""
|
|
24
|
+
self.graph_data = graph_data
|
|
25
|
+
|
|
26
|
+
def topological_sort(
|
|
27
|
+
self, nodes_to_include: set[str] | None = None
|
|
28
|
+
) -> list[GraphNode]:
|
|
29
|
+
"""Get nodes in topological order (dependencies first).
|
|
30
|
+
|
|
31
|
+
Uses stable alphabetical ordering when multiple nodes are at the same level.
|
|
32
|
+
This ensures deterministic output for diff comparisons.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
nodes_to_include: Optional set of feature key strings to include.
|
|
36
|
+
If None, includes all nodes.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of nodes sorted so dependencies appear before dependents
|
|
40
|
+
"""
|
|
41
|
+
if nodes_to_include is None:
|
|
42
|
+
nodes_to_include = set(self.graph_data.nodes.keys())
|
|
43
|
+
|
|
44
|
+
visited = set()
|
|
45
|
+
result = []
|
|
46
|
+
|
|
47
|
+
def visit(key_str: str):
|
|
48
|
+
if key_str in visited or key_str not in nodes_to_include:
|
|
49
|
+
return
|
|
50
|
+
visited.add(key_str)
|
|
51
|
+
|
|
52
|
+
node = self.graph_data.nodes[key_str]
|
|
53
|
+
|
|
54
|
+
# Visit dependencies first, in sorted order for determinism
|
|
55
|
+
sorted_deps = sorted(
|
|
56
|
+
(dep_key.to_string() for dep_key in node.dependencies),
|
|
57
|
+
key=str.lower, # Case-insensitive sort
|
|
58
|
+
)
|
|
59
|
+
for dep_key_str in sorted_deps:
|
|
60
|
+
if dep_key_str in nodes_to_include:
|
|
61
|
+
visit(dep_key_str)
|
|
62
|
+
|
|
63
|
+
result.append(node)
|
|
64
|
+
|
|
65
|
+
# Visit all nodes in sorted order for deterministic traversal
|
|
66
|
+
for key_str in sorted(nodes_to_include, key=str.lower):
|
|
67
|
+
visit(key_str)
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
def bfs_from(
|
|
72
|
+
self, start_key: FeatureKey, max_depth: int | None = None
|
|
73
|
+
) -> list[GraphNode]:
|
|
74
|
+
"""BFS traversal starting from a node.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
start_key: Feature key to start from
|
|
78
|
+
max_depth: Maximum depth to traverse (None = unlimited)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
List of nodes in BFS order
|
|
82
|
+
"""
|
|
83
|
+
start_key_str = start_key.to_string()
|
|
84
|
+
if start_key_str not in self.graph_data.nodes:
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
visited = set()
|
|
88
|
+
result = []
|
|
89
|
+
queue = deque([(start_key_str, 0)]) # (key_str, depth)
|
|
90
|
+
|
|
91
|
+
while queue:
|
|
92
|
+
key_str, depth = queue.popleft()
|
|
93
|
+
|
|
94
|
+
if key_str in visited:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
if max_depth is not None and depth > max_depth:
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
visited.add(key_str)
|
|
101
|
+
node = self.graph_data.nodes[key_str]
|
|
102
|
+
result.append(node)
|
|
103
|
+
|
|
104
|
+
# Add dependencies
|
|
105
|
+
for dep_key in node.dependencies:
|
|
106
|
+
dep_key_str = dep_key.to_string()
|
|
107
|
+
if dep_key_str not in visited and dep_key_str in self.graph_data.nodes:
|
|
108
|
+
queue.append((dep_key_str, depth + 1))
|
|
109
|
+
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
def extract_subgraph(
|
|
113
|
+
self,
|
|
114
|
+
focus_key: FeatureKey,
|
|
115
|
+
up: int | None = None,
|
|
116
|
+
down: int | None = None,
|
|
117
|
+
) -> GraphData:
|
|
118
|
+
"""Extract a subgraph centered on a focus node.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
focus_key: Feature to focus on
|
|
122
|
+
up: Number of upstream levels (dependencies) to include.
|
|
123
|
+
None = all, 0 = none
|
|
124
|
+
down: Number of downstream levels (dependents) to include.
|
|
125
|
+
None = all, 0 = none
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
New GraphData with filtered nodes and edges
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
ValueError: If focus_key not found in graph
|
|
132
|
+
"""
|
|
133
|
+
focus_key_str = focus_key.to_string()
|
|
134
|
+
if focus_key_str not in self.graph_data.nodes:
|
|
135
|
+
raise ValueError(f"Feature '{focus_key_str}' not found in graph")
|
|
136
|
+
|
|
137
|
+
# Start with focus node
|
|
138
|
+
nodes_to_include = {focus_key_str}
|
|
139
|
+
|
|
140
|
+
# Add upstream (dependencies)
|
|
141
|
+
if up != 0:
|
|
142
|
+
max_up = None if up is None or up < 0 else up
|
|
143
|
+
upstream = self._get_upstream(focus_key_str, max_levels=max_up)
|
|
144
|
+
nodes_to_include.update(upstream)
|
|
145
|
+
|
|
146
|
+
# Add downstream (dependents)
|
|
147
|
+
if down != 0:
|
|
148
|
+
max_down = None if down is None or down < 0 else down
|
|
149
|
+
downstream = self._get_downstream(focus_key_str, max_levels=max_down)
|
|
150
|
+
nodes_to_include.update(downstream)
|
|
151
|
+
|
|
152
|
+
# Filter nodes and edges
|
|
153
|
+
filtered_nodes = {
|
|
154
|
+
k: v for k, v in self.graph_data.nodes.items() if k in nodes_to_include
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
filtered_edges = [
|
|
158
|
+
edge
|
|
159
|
+
for edge in self.graph_data.edges
|
|
160
|
+
if edge.from_key.to_string() in nodes_to_include
|
|
161
|
+
and edge.to_key.to_string() in nodes_to_include
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
return GraphData(
|
|
165
|
+
nodes=filtered_nodes,
|
|
166
|
+
edges=filtered_edges,
|
|
167
|
+
snapshot_version=self.graph_data.snapshot_version,
|
|
168
|
+
old_snapshot_version=self.graph_data.old_snapshot_version,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def _get_upstream(
|
|
172
|
+
self, start_key_str: str, max_levels: int | None = None
|
|
173
|
+
) -> set[str]:
|
|
174
|
+
"""Get upstream features (dependencies) recursively.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
start_key_str: Feature key string to start from
|
|
178
|
+
max_levels: Maximum levels to traverse (None = unlimited)
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Set of upstream feature key strings
|
|
182
|
+
"""
|
|
183
|
+
upstream = set()
|
|
184
|
+
|
|
185
|
+
def visit(key_str: str, level: int):
|
|
186
|
+
if key_str not in self.graph_data.nodes:
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
node = self.graph_data.nodes[key_str]
|
|
190
|
+
|
|
191
|
+
for dep_key in node.dependencies:
|
|
192
|
+
dep_key_str = dep_key.to_string()
|
|
193
|
+
if dep_key_str not in upstream and dep_key_str in self.graph_data.nodes:
|
|
194
|
+
upstream.add(dep_key_str)
|
|
195
|
+
# Only recurse if we haven't reached max level
|
|
196
|
+
if max_levels is None or level + 1 < max_levels:
|
|
197
|
+
visit(dep_key_str, level + 1)
|
|
198
|
+
|
|
199
|
+
visit(start_key_str, 0)
|
|
200
|
+
return upstream
|
|
201
|
+
|
|
202
|
+
def _get_downstream(
|
|
203
|
+
self, start_key_str: str, max_levels: int | None = None
|
|
204
|
+
) -> set[str]:
|
|
205
|
+
"""Get downstream features (dependents) recursively.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
start_key_str: Feature key string to start from
|
|
209
|
+
max_levels: Maximum levels to traverse (None = unlimited)
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Set of downstream feature key strings
|
|
213
|
+
"""
|
|
214
|
+
# Build reverse dependency map (feature -> dependents)
|
|
215
|
+
dependents_map: dict[str, list[str]] = {}
|
|
216
|
+
for node in self.graph_data.nodes.values():
|
|
217
|
+
for dep_key in node.dependencies:
|
|
218
|
+
dep_key_str = dep_key.to_string()
|
|
219
|
+
if dep_key_str not in dependents_map:
|
|
220
|
+
dependents_map[dep_key_str] = []
|
|
221
|
+
dependents_map[dep_key_str].append(node.key.to_string())
|
|
222
|
+
|
|
223
|
+
downstream = set()
|
|
224
|
+
|
|
225
|
+
def visit(key_str: str, level: int):
|
|
226
|
+
if key_str not in dependents_map:
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
for dependent_key_str in dependents_map[key_str]:
|
|
230
|
+
if dependent_key_str not in downstream:
|
|
231
|
+
downstream.add(dependent_key_str)
|
|
232
|
+
# Only recurse if we haven't reached max level
|
|
233
|
+
if max_levels is None or level + 1 < max_levels:
|
|
234
|
+
visit(dependent_key_str, level + 1)
|
|
235
|
+
|
|
236
|
+
visit(start_key_str, 0)
|
|
237
|
+
return downstream
|
|
238
|
+
|
|
239
|
+
def get_root_nodes(self) -> list[GraphNode]:
|
|
240
|
+
"""Get all root nodes (nodes with no dependencies).
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
List of root nodes
|
|
244
|
+
"""
|
|
245
|
+
return [
|
|
246
|
+
node for node in self.graph_data.nodes.values() if not node.dependencies
|
|
247
|
+
]
|
metaxy/graph/utils.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Shared utilities for graph rendering and formatting."""
|
|
2
|
+
|
|
3
|
+
from metaxy.models.types import FeatureKey, FieldKey
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def sanitize_mermaid_id(s: str) -> str:
|
|
7
|
+
"""Sanitize string for use as Mermaid node ID.
|
|
8
|
+
|
|
9
|
+
Replaces characters that are invalid in Mermaid identifiers.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
s: String to sanitize
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Sanitized string safe for use as Mermaid node ID
|
|
16
|
+
"""
|
|
17
|
+
return s.replace("/", "_").replace("-", "_").replace("__", "_")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def format_hash(hash_str: str, length: int = 8) -> str:
|
|
21
|
+
"""Format hash string with optional truncation.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
hash_str: Full hash string
|
|
25
|
+
length: Number of characters to show (0 for full hash)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Truncated or full hash string
|
|
29
|
+
"""
|
|
30
|
+
if length == 0 or length >= len(hash_str):
|
|
31
|
+
return hash_str
|
|
32
|
+
return hash_str[:length]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def format_feature_key(key: FeatureKey) -> str:
|
|
36
|
+
"""Format feature key for display.
|
|
37
|
+
|
|
38
|
+
Uses / separator for better readability.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
key: Feature key
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Formatted string like "my/feature/key"
|
|
45
|
+
"""
|
|
46
|
+
return "/".join(key)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def format_field_key(key: FieldKey) -> str:
|
|
50
|
+
"""Format field key for display.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
key: Field key
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Formatted string like "field_name"
|
|
57
|
+
"""
|
|
58
|
+
return "/".join(key)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Metadata store for feature pipeline management."""
|
|
2
|
+
|
|
3
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
4
|
+
from metaxy.metadata_store.exceptions import (
|
|
5
|
+
DependencyError,
|
|
6
|
+
FeatureNotFoundError,
|
|
7
|
+
FieldNotFoundError,
|
|
8
|
+
HashAlgorithmNotSupportedError,
|
|
9
|
+
MetadataSchemaError,
|
|
10
|
+
MetadataStoreError,
|
|
11
|
+
StoreNotOpenError,
|
|
12
|
+
)
|
|
13
|
+
from metaxy.metadata_store.memory import InMemoryMetadataStore
|
|
14
|
+
from metaxy.metadata_store.system_tables import (
|
|
15
|
+
FEATURE_VERSIONS_KEY,
|
|
16
|
+
allow_feature_version_override,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"MetadataStore",
|
|
21
|
+
"InMemoryMetadataStore",
|
|
22
|
+
"MetadataStoreError",
|
|
23
|
+
"FeatureNotFoundError",
|
|
24
|
+
"FieldNotFoundError",
|
|
25
|
+
"MetadataSchemaError",
|
|
26
|
+
"DependencyError",
|
|
27
|
+
"StoreNotOpenError",
|
|
28
|
+
"HashAlgorithmNotSupportedError",
|
|
29
|
+
"FEATURE_VERSIONS_KEY",
|
|
30
|
+
"allow_feature_version_override",
|
|
31
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Internal protocols for metadata store components."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
import narwhals as nw
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from metaxy.models.types import FeatureKey
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MetadataStoreProtocol(Protocol):
|
|
15
|
+
"""Protocol defining the interface needed by SystemTableStorage.
|
|
16
|
+
|
|
17
|
+
This protocol breaks the circular dependency between base.py and system_tables.py
|
|
18
|
+
by defining only the methods that SystemTableStorage actually uses.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def _write_metadata_impl(
|
|
22
|
+
self,
|
|
23
|
+
feature_key: FeatureKey,
|
|
24
|
+
df: pl.DataFrame,
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Write metadata for a feature key."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
def _read_metadata_native(
|
|
30
|
+
self,
|
|
31
|
+
feature: FeatureKey,
|
|
32
|
+
*,
|
|
33
|
+
feature_version: str | None = None,
|
|
34
|
+
filters: Sequence[nw.Expr] | None = None,
|
|
35
|
+
columns: Sequence[str] | None = None,
|
|
36
|
+
) -> nw.LazyFrame[Any] | None:
|
|
37
|
+
"""Read metadata from this store only (no fallback)."""
|
|
38
|
+
...
|