metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
metaxy/graph/describe.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""Graph description utilities for analyzing feature graphs."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from metaxy.models.feature import FeatureGraph
|
|
6
|
+
from metaxy.models.types import FeatureKey
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def describe_graph(
|
|
10
|
+
graph: FeatureGraph,
|
|
11
|
+
project: str | None = None,
|
|
12
|
+
) -> dict[str, Any]:
|
|
13
|
+
"""Generate comprehensive description of a feature graph.
|
|
14
|
+
|
|
15
|
+
Analyzes the graph structure and provides metrics including:
|
|
16
|
+
- Feature count (optionally filtered by project)
|
|
17
|
+
- Graph depth (longest dependency chain)
|
|
18
|
+
- Root features (features with no dependencies)
|
|
19
|
+
- Leaf features (features with no dependents)
|
|
20
|
+
- Feature breakdown by project (if multi-project)
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
graph: The FeatureGraph to analyze
|
|
24
|
+
project: Optional project filter to analyze only features from a specific project
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Dictionary containing graph metrics and analysis:
|
|
28
|
+
{
|
|
29
|
+
"metaxy_snapshot_version": str,
|
|
30
|
+
"total_features": int,
|
|
31
|
+
"filtered_features": int, # If project filter applied
|
|
32
|
+
"graph_depth": int,
|
|
33
|
+
"root_features": list[str],
|
|
34
|
+
"leaf_features": list[str],
|
|
35
|
+
"projects": dict[str, int], # Project -> feature count
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
```py
|
|
40
|
+
graph = FeatureGraph.get_active()
|
|
41
|
+
info = describe_graph(graph, project="my_project")
|
|
42
|
+
print(f"Graph has {info['filtered_features']} features from my_project")
|
|
43
|
+
```
|
|
44
|
+
"""
|
|
45
|
+
# Get all features, optionally filtered by project
|
|
46
|
+
if project is not None:
|
|
47
|
+
filtered_features = {
|
|
48
|
+
key: cls
|
|
49
|
+
for key, cls in graph.features_by_key.items()
|
|
50
|
+
if cls.project == project # type: ignore[attr-defined]
|
|
51
|
+
}
|
|
52
|
+
else:
|
|
53
|
+
filtered_features = graph.features_by_key
|
|
54
|
+
|
|
55
|
+
# Calculate graph depth (longest dependency chain)
|
|
56
|
+
def get_feature_depth(
|
|
57
|
+
feature_key: FeatureKey,
|
|
58
|
+
visited: set[FeatureKey] | None = None,
|
|
59
|
+
) -> int:
|
|
60
|
+
"""Calculate the depth of a feature in the dependency tree."""
|
|
61
|
+
if visited is None:
|
|
62
|
+
visited = set()
|
|
63
|
+
|
|
64
|
+
if feature_key in visited:
|
|
65
|
+
return 0 # Avoid cycles
|
|
66
|
+
|
|
67
|
+
visited.add(feature_key)
|
|
68
|
+
|
|
69
|
+
feature_cls = graph.features_by_key.get(feature_key)
|
|
70
|
+
if feature_cls is None:
|
|
71
|
+
return 1
|
|
72
|
+
|
|
73
|
+
deps = feature_cls.spec().deps
|
|
74
|
+
if not deps:
|
|
75
|
+
return 1
|
|
76
|
+
|
|
77
|
+
max_dep_depth = 0
|
|
78
|
+
for dep in deps:
|
|
79
|
+
dep_depth = get_feature_depth(dep.feature, visited.copy())
|
|
80
|
+
max_dep_depth = max(max_dep_depth, dep_depth)
|
|
81
|
+
|
|
82
|
+
return max_dep_depth + 1
|
|
83
|
+
|
|
84
|
+
# Calculate metrics for filtered features
|
|
85
|
+
max_depth = 0
|
|
86
|
+
for feature_key in filtered_features:
|
|
87
|
+
depth = get_feature_depth(feature_key)
|
|
88
|
+
max_depth = max(max_depth, depth)
|
|
89
|
+
|
|
90
|
+
# Find root features (no dependencies) in filtered set
|
|
91
|
+
root_features = [
|
|
92
|
+
key.to_string() for key, cls in filtered_features.items() if not cls.spec().deps
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Find leaf features (no dependents) in filtered set
|
|
96
|
+
leaf_features = []
|
|
97
|
+
for feature_key in filtered_features:
|
|
98
|
+
is_leaf = True
|
|
99
|
+
# Check if any other filtered feature depends on this one
|
|
100
|
+
for other_key, other_cls in filtered_features.items():
|
|
101
|
+
if other_key != feature_key:
|
|
102
|
+
deps = other_cls.spec().deps
|
|
103
|
+
if deps:
|
|
104
|
+
for dep in deps:
|
|
105
|
+
if dep.feature == feature_key:
|
|
106
|
+
is_leaf = False
|
|
107
|
+
break
|
|
108
|
+
if not is_leaf:
|
|
109
|
+
break
|
|
110
|
+
if is_leaf:
|
|
111
|
+
leaf_features.append(feature_key.to_string())
|
|
112
|
+
|
|
113
|
+
# Calculate project breakdown
|
|
114
|
+
projects: dict[str, int] = {}
|
|
115
|
+
for cls in graph.features_by_key.values():
|
|
116
|
+
project_name = cls.project # type: ignore[attr-defined]
|
|
117
|
+
projects[project_name] = projects.get(project_name, 0) + 1
|
|
118
|
+
|
|
119
|
+
# Build result
|
|
120
|
+
result: dict[str, Any] = {
|
|
121
|
+
"metaxy_snapshot_version": graph.snapshot_version,
|
|
122
|
+
"total_features": len(graph.features_by_key),
|
|
123
|
+
"graph_depth": max_depth,
|
|
124
|
+
"root_features": sorted(root_features),
|
|
125
|
+
"leaf_features": sorted(leaf_features),
|
|
126
|
+
"projects": projects,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Add filtered count if project filter was applied
|
|
130
|
+
if project is not None:
|
|
131
|
+
result["filtered_features"] = len(filtered_features)
|
|
132
|
+
result["filter_project"] = project
|
|
133
|
+
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_feature_dependencies(
|
|
138
|
+
graph: FeatureGraph,
|
|
139
|
+
feature_key: FeatureKey,
|
|
140
|
+
recursive: bool = False,
|
|
141
|
+
max_depth: int | None = None,
|
|
142
|
+
) -> dict[str, Any]:
|
|
143
|
+
"""Get dependencies of a specific feature.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
graph: The FeatureGraph to analyze
|
|
147
|
+
feature_key: The feature to analyze
|
|
148
|
+
recursive: If True, recursively get all upstream dependencies
|
|
149
|
+
max_depth: Maximum recursion depth (None for unlimited)
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Dictionary containing dependency information:
|
|
153
|
+
{
|
|
154
|
+
"direct_dependencies": list[str],
|
|
155
|
+
"all_dependencies": list[str], # If recursive=True
|
|
156
|
+
"dependency_tree": dict, # Nested structure if recursive=True
|
|
157
|
+
}
|
|
158
|
+
"""
|
|
159
|
+
feature_cls = graph.features_by_key.get(feature_key)
|
|
160
|
+
if feature_cls is None:
|
|
161
|
+
raise ValueError(f"Feature {feature_key.to_string()} not found in graph")
|
|
162
|
+
|
|
163
|
+
# Get direct dependencies
|
|
164
|
+
direct_deps = []
|
|
165
|
+
deps = feature_cls.spec().deps
|
|
166
|
+
if deps:
|
|
167
|
+
direct_deps = [dep.feature.to_string() for dep in deps]
|
|
168
|
+
|
|
169
|
+
result: dict[str, Any] = {
|
|
170
|
+
"direct_dependencies": direct_deps,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if recursive:
|
|
174
|
+
# Build full dependency tree
|
|
175
|
+
def build_dep_tree(
|
|
176
|
+
key: FeatureKey,
|
|
177
|
+
current_depth: int = 0,
|
|
178
|
+
visited: set[FeatureKey] | None = None,
|
|
179
|
+
) -> dict[str, Any]:
|
|
180
|
+
if visited is None:
|
|
181
|
+
visited = set()
|
|
182
|
+
|
|
183
|
+
if key in visited:
|
|
184
|
+
return {"circular": True, "key": key.to_string()}
|
|
185
|
+
|
|
186
|
+
if max_depth is not None and current_depth >= max_depth:
|
|
187
|
+
return {"truncated": True, "key": key.to_string()}
|
|
188
|
+
|
|
189
|
+
visited.add(key)
|
|
190
|
+
|
|
191
|
+
cls = graph.features_by_key.get(key)
|
|
192
|
+
if cls is None:
|
|
193
|
+
return {"key": key.to_string(), "dependencies": []}
|
|
194
|
+
|
|
195
|
+
spec_deps = cls.spec().deps
|
|
196
|
+
if not spec_deps:
|
|
197
|
+
return {"key": key.to_string(), "dependencies": []}
|
|
198
|
+
|
|
199
|
+
deps = []
|
|
200
|
+
for dep in spec_deps:
|
|
201
|
+
dep_tree = build_dep_tree(
|
|
202
|
+
dep.feature,
|
|
203
|
+
current_depth + 1,
|
|
204
|
+
visited.copy(),
|
|
205
|
+
)
|
|
206
|
+
deps.append(dep_tree)
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"key": key.to_string(),
|
|
210
|
+
"project": cls.project, # type: ignore[attr-defined]
|
|
211
|
+
"dependencies": deps,
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
tree = build_dep_tree(feature_key)
|
|
215
|
+
result["dependency_tree"] = tree
|
|
216
|
+
|
|
217
|
+
# Collect all unique dependencies
|
|
218
|
+
all_deps = set()
|
|
219
|
+
|
|
220
|
+
def collect_deps(node: dict[str, Any]) -> None:
|
|
221
|
+
if "dependencies" in node:
|
|
222
|
+
for dep in node["dependencies"]:
|
|
223
|
+
if "key" in dep:
|
|
224
|
+
all_deps.add(dep["key"])
|
|
225
|
+
collect_deps(dep)
|
|
226
|
+
|
|
227
|
+
collect_deps(tree)
|
|
228
|
+
result["all_dependencies"] = sorted(all_deps)
|
|
229
|
+
|
|
230
|
+
return result
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_feature_dependents(
|
|
234
|
+
graph: FeatureGraph,
|
|
235
|
+
feature_key: FeatureKey,
|
|
236
|
+
recursive: bool = False,
|
|
237
|
+
max_depth: int | None = None,
|
|
238
|
+
) -> dict[str, Any]:
|
|
239
|
+
"""Get features that depend on a specific feature (downstream).
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
graph: The FeatureGraph to analyze
|
|
243
|
+
feature_key: The feature to analyze
|
|
244
|
+
recursive: If True, recursively get all downstream dependents
|
|
245
|
+
max_depth: Maximum recursion depth (None for unlimited)
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Dictionary containing dependent information:
|
|
249
|
+
{
|
|
250
|
+
"direct_dependents": list[str],
|
|
251
|
+
"all_dependents": list[str], # If recursive=True
|
|
252
|
+
"dependent_tree": dict, # Nested structure if recursive=True
|
|
253
|
+
}
|
|
254
|
+
"""
|
|
255
|
+
# Find direct dependents
|
|
256
|
+
direct_dependents = []
|
|
257
|
+
for other_key, other_cls in graph.features_by_key.items():
|
|
258
|
+
deps = other_cls.spec().deps
|
|
259
|
+
if deps:
|
|
260
|
+
for dep in deps:
|
|
261
|
+
if dep.feature == feature_key:
|
|
262
|
+
direct_dependents.append(other_key.to_string())
|
|
263
|
+
break
|
|
264
|
+
|
|
265
|
+
result: dict[str, Any] = {
|
|
266
|
+
"direct_dependents": sorted(direct_dependents),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if recursive:
|
|
270
|
+
# Build full dependent tree
|
|
271
|
+
def build_dependent_tree(
|
|
272
|
+
key: FeatureKey,
|
|
273
|
+
current_depth: int = 0,
|
|
274
|
+
visited: set[FeatureKey] | None = None,
|
|
275
|
+
) -> dict[str, Any]:
|
|
276
|
+
if visited is None:
|
|
277
|
+
visited = set()
|
|
278
|
+
|
|
279
|
+
if key in visited:
|
|
280
|
+
return {"circular": True, "key": key.to_string()}
|
|
281
|
+
|
|
282
|
+
if max_depth is not None and current_depth >= max_depth:
|
|
283
|
+
return {"truncated": True, "key": key.to_string()}
|
|
284
|
+
|
|
285
|
+
visited.add(key)
|
|
286
|
+
|
|
287
|
+
# Find features that depend on this one
|
|
288
|
+
dependents = []
|
|
289
|
+
for other_key, other_cls in graph.features_by_key.items():
|
|
290
|
+
deps = other_cls.spec().deps
|
|
291
|
+
if deps:
|
|
292
|
+
for dep in deps:
|
|
293
|
+
if dep.feature == key:
|
|
294
|
+
dep_tree = build_dependent_tree(
|
|
295
|
+
other_key,
|
|
296
|
+
current_depth + 1,
|
|
297
|
+
visited.copy(),
|
|
298
|
+
)
|
|
299
|
+
dependents.append(dep_tree)
|
|
300
|
+
break
|
|
301
|
+
|
|
302
|
+
cls = graph.features_by_key.get(key)
|
|
303
|
+
return {
|
|
304
|
+
"key": key.to_string(),
|
|
305
|
+
"project": cls.project if cls else None, # type: ignore[attr-defined]
|
|
306
|
+
"dependents": dependents,
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
tree = build_dependent_tree(feature_key)
|
|
310
|
+
result["dependent_tree"] = tree
|
|
311
|
+
|
|
312
|
+
# Collect all unique dependents
|
|
313
|
+
all_dependents = set()
|
|
314
|
+
|
|
315
|
+
def collect_dependents(node: dict[str, Any]) -> None:
|
|
316
|
+
if "dependents" in node:
|
|
317
|
+
for dep in node["dependents"]:
|
|
318
|
+
if "key" in dep:
|
|
319
|
+
all_dependents.add(dep["key"])
|
|
320
|
+
collect_dependents(dep)
|
|
321
|
+
|
|
322
|
+
collect_dependents(tree)
|
|
323
|
+
result["all_dependents"] = sorted(all_dependents)
|
|
324
|
+
|
|
325
|
+
return result
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Graph diff infrastructure - models and traversal for graph comparison."""
|
|
2
|
+
|
|
3
|
+
from metaxy.graph.diff.models import (
|
|
4
|
+
EdgeData,
|
|
5
|
+
FieldNode,
|
|
6
|
+
GraphData,
|
|
7
|
+
GraphNode,
|
|
8
|
+
NodeStatus,
|
|
9
|
+
)
|
|
10
|
+
from metaxy.graph.diff.traversal import GraphWalker
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
# Core models
|
|
14
|
+
"EdgeData",
|
|
15
|
+
"FieldNode",
|
|
16
|
+
"GraphData",
|
|
17
|
+
"GraphNode",
|
|
18
|
+
"NodeStatus",
|
|
19
|
+
# Traversal
|
|
20
|
+
"GraphWalker",
|
|
21
|
+
]
|