metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,325 @@
1
+ """Graph description utilities for analyzing feature graphs."""
2
+
3
+ from typing import Any
4
+
5
+ from metaxy.models.feature import FeatureGraph
6
+ from metaxy.models.types import FeatureKey
7
+
8
+
9
+ def describe_graph(
10
+ graph: FeatureGraph,
11
+ project: str | None = None,
12
+ ) -> dict[str, Any]:
13
+ """Generate comprehensive description of a feature graph.
14
+
15
+ Analyzes the graph structure and provides metrics including:
16
+ - Feature count (optionally filtered by project)
17
+ - Graph depth (longest dependency chain)
18
+ - Root features (features with no dependencies)
19
+ - Leaf features (features with no dependents)
20
+ - Feature breakdown by project (if multi-project)
21
+
22
+ Args:
23
+ graph: The FeatureGraph to analyze
24
+ project: Optional project filter to analyze only features from a specific project
25
+
26
+ Returns:
27
+ Dictionary containing graph metrics and analysis:
28
+ {
29
+ "metaxy_snapshot_version": str,
30
+ "total_features": int,
31
+ "filtered_features": int, # If project filter applied
32
+ "graph_depth": int,
33
+ "root_features": list[str],
34
+ "leaf_features": list[str],
35
+ "projects": dict[str, int], # Project -> feature count
36
+ }
37
+
38
+ Example:
39
+ ```py
40
+ graph = FeatureGraph.get_active()
41
+ info = describe_graph(graph, project="my_project")
42
+ print(f"Graph has {info['filtered_features']} features from my_project")
43
+ ```
44
+ """
45
+ # Get all features, optionally filtered by project
46
+ if project is not None:
47
+ filtered_features = {
48
+ key: cls
49
+ for key, cls in graph.features_by_key.items()
50
+ if cls.project == project # type: ignore[attr-defined]
51
+ }
52
+ else:
53
+ filtered_features = graph.features_by_key
54
+
55
+ # Calculate graph depth (longest dependency chain)
56
+ def get_feature_depth(
57
+ feature_key: FeatureKey,
58
+ visited: set[FeatureKey] | None = None,
59
+ ) -> int:
60
+ """Calculate the depth of a feature in the dependency tree."""
61
+ if visited is None:
62
+ visited = set()
63
+
64
+ if feature_key in visited:
65
+ return 0 # Avoid cycles
66
+
67
+ visited.add(feature_key)
68
+
69
+ feature_cls = graph.features_by_key.get(feature_key)
70
+ if feature_cls is None:
71
+ return 1
72
+
73
+ deps = feature_cls.spec().deps
74
+ if not deps:
75
+ return 1
76
+
77
+ max_dep_depth = 0
78
+ for dep in deps:
79
+ dep_depth = get_feature_depth(dep.feature, visited.copy())
80
+ max_dep_depth = max(max_dep_depth, dep_depth)
81
+
82
+ return max_dep_depth + 1
83
+
84
+ # Calculate metrics for filtered features
85
+ max_depth = 0
86
+ for feature_key in filtered_features:
87
+ depth = get_feature_depth(feature_key)
88
+ max_depth = max(max_depth, depth)
89
+
90
+ # Find root features (no dependencies) in filtered set
91
+ root_features = [
92
+ key.to_string() for key, cls in filtered_features.items() if not cls.spec().deps
93
+ ]
94
+
95
+ # Find leaf features (no dependents) in filtered set
96
+ leaf_features = []
97
+ for feature_key in filtered_features:
98
+ is_leaf = True
99
+ # Check if any other filtered feature depends on this one
100
+ for other_key, other_cls in filtered_features.items():
101
+ if other_key != feature_key:
102
+ deps = other_cls.spec().deps
103
+ if deps:
104
+ for dep in deps:
105
+ if dep.feature == feature_key:
106
+ is_leaf = False
107
+ break
108
+ if not is_leaf:
109
+ break
110
+ if is_leaf:
111
+ leaf_features.append(feature_key.to_string())
112
+
113
+ # Calculate project breakdown
114
+ projects: dict[str, int] = {}
115
+ for cls in graph.features_by_key.values():
116
+ project_name = cls.project # type: ignore[attr-defined]
117
+ projects[project_name] = projects.get(project_name, 0) + 1
118
+
119
+ # Build result
120
+ result: dict[str, Any] = {
121
+ "metaxy_snapshot_version": graph.snapshot_version,
122
+ "total_features": len(graph.features_by_key),
123
+ "graph_depth": max_depth,
124
+ "root_features": sorted(root_features),
125
+ "leaf_features": sorted(leaf_features),
126
+ "projects": projects,
127
+ }
128
+
129
+ # Add filtered count if project filter was applied
130
+ if project is not None:
131
+ result["filtered_features"] = len(filtered_features)
132
+ result["filter_project"] = project
133
+
134
+ return result
135
+
136
+
137
+ def get_feature_dependencies(
138
+ graph: FeatureGraph,
139
+ feature_key: FeatureKey,
140
+ recursive: bool = False,
141
+ max_depth: int | None = None,
142
+ ) -> dict[str, Any]:
143
+ """Get dependencies of a specific feature.
144
+
145
+ Args:
146
+ graph: The FeatureGraph to analyze
147
+ feature_key: The feature to analyze
148
+ recursive: If True, recursively get all upstream dependencies
149
+ max_depth: Maximum recursion depth (None for unlimited)
150
+
151
+ Returns:
152
+ Dictionary containing dependency information:
153
+ {
154
+ "direct_dependencies": list[str],
155
+ "all_dependencies": list[str], # If recursive=True
156
+ "dependency_tree": dict, # Nested structure if recursive=True
157
+ }
158
+ """
159
+ feature_cls = graph.features_by_key.get(feature_key)
160
+ if feature_cls is None:
161
+ raise ValueError(f"Feature {feature_key.to_string()} not found in graph")
162
+
163
+ # Get direct dependencies
164
+ direct_deps = []
165
+ deps = feature_cls.spec().deps
166
+ if deps:
167
+ direct_deps = [dep.feature.to_string() for dep in deps]
168
+
169
+ result: dict[str, Any] = {
170
+ "direct_dependencies": direct_deps,
171
+ }
172
+
173
+ if recursive:
174
+ # Build full dependency tree
175
+ def build_dep_tree(
176
+ key: FeatureKey,
177
+ current_depth: int = 0,
178
+ visited: set[FeatureKey] | None = None,
179
+ ) -> dict[str, Any]:
180
+ if visited is None:
181
+ visited = set()
182
+
183
+ if key in visited:
184
+ return {"circular": True, "key": key.to_string()}
185
+
186
+ if max_depth is not None and current_depth >= max_depth:
187
+ return {"truncated": True, "key": key.to_string()}
188
+
189
+ visited.add(key)
190
+
191
+ cls = graph.features_by_key.get(key)
192
+ if cls is None:
193
+ return {"key": key.to_string(), "dependencies": []}
194
+
195
+ spec_deps = cls.spec().deps
196
+ if not spec_deps:
197
+ return {"key": key.to_string(), "dependencies": []}
198
+
199
+ deps = []
200
+ for dep in spec_deps:
201
+ dep_tree = build_dep_tree(
202
+ dep.feature,
203
+ current_depth + 1,
204
+ visited.copy(),
205
+ )
206
+ deps.append(dep_tree)
207
+
208
+ return {
209
+ "key": key.to_string(),
210
+ "project": cls.project, # type: ignore[attr-defined]
211
+ "dependencies": deps,
212
+ }
213
+
214
+ tree = build_dep_tree(feature_key)
215
+ result["dependency_tree"] = tree
216
+
217
+ # Collect all unique dependencies
218
+ all_deps = set()
219
+
220
+ def collect_deps(node: dict[str, Any]) -> None:
221
+ if "dependencies" in node:
222
+ for dep in node["dependencies"]:
223
+ if "key" in dep:
224
+ all_deps.add(dep["key"])
225
+ collect_deps(dep)
226
+
227
+ collect_deps(tree)
228
+ result["all_dependencies"] = sorted(all_deps)
229
+
230
+ return result
231
+
232
+
233
+ def get_feature_dependents(
234
+ graph: FeatureGraph,
235
+ feature_key: FeatureKey,
236
+ recursive: bool = False,
237
+ max_depth: int | None = None,
238
+ ) -> dict[str, Any]:
239
+ """Get features that depend on a specific feature (downstream).
240
+
241
+ Args:
242
+ graph: The FeatureGraph to analyze
243
+ feature_key: The feature to analyze
244
+ recursive: If True, recursively get all downstream dependents
245
+ max_depth: Maximum recursion depth (None for unlimited)
246
+
247
+ Returns:
248
+ Dictionary containing dependent information:
249
+ {
250
+ "direct_dependents": list[str],
251
+ "all_dependents": list[str], # If recursive=True
252
+ "dependent_tree": dict, # Nested structure if recursive=True
253
+ }
254
+ """
255
+ # Find direct dependents
256
+ direct_dependents = []
257
+ for other_key, other_cls in graph.features_by_key.items():
258
+ deps = other_cls.spec().deps
259
+ if deps:
260
+ for dep in deps:
261
+ if dep.feature == feature_key:
262
+ direct_dependents.append(other_key.to_string())
263
+ break
264
+
265
+ result: dict[str, Any] = {
266
+ "direct_dependents": sorted(direct_dependents),
267
+ }
268
+
269
+ if recursive:
270
+ # Build full dependent tree
271
+ def build_dependent_tree(
272
+ key: FeatureKey,
273
+ current_depth: int = 0,
274
+ visited: set[FeatureKey] | None = None,
275
+ ) -> dict[str, Any]:
276
+ if visited is None:
277
+ visited = set()
278
+
279
+ if key in visited:
280
+ return {"circular": True, "key": key.to_string()}
281
+
282
+ if max_depth is not None and current_depth >= max_depth:
283
+ return {"truncated": True, "key": key.to_string()}
284
+
285
+ visited.add(key)
286
+
287
+ # Find features that depend on this one
288
+ dependents = []
289
+ for other_key, other_cls in graph.features_by_key.items():
290
+ deps = other_cls.spec().deps
291
+ if deps:
292
+ for dep in deps:
293
+ if dep.feature == key:
294
+ dep_tree = build_dependent_tree(
295
+ other_key,
296
+ current_depth + 1,
297
+ visited.copy(),
298
+ )
299
+ dependents.append(dep_tree)
300
+ break
301
+
302
+ cls = graph.features_by_key.get(key)
303
+ return {
304
+ "key": key.to_string(),
305
+ "project": cls.project if cls else None, # type: ignore[attr-defined]
306
+ "dependents": dependents,
307
+ }
308
+
309
+ tree = build_dependent_tree(feature_key)
310
+ result["dependent_tree"] = tree
311
+
312
+ # Collect all unique dependents
313
+ all_dependents = set()
314
+
315
+ def collect_dependents(node: dict[str, Any]) -> None:
316
+ if "dependents" in node:
317
+ for dep in node["dependents"]:
318
+ if "key" in dep:
319
+ all_dependents.add(dep["key"])
320
+ collect_dependents(dep)
321
+
322
+ collect_dependents(tree)
323
+ result["all_dependents"] = sorted(all_dependents)
324
+
325
+ return result
@@ -0,0 +1,21 @@
1
+ """Graph diff infrastructure - models and traversal for graph comparison."""
2
+
3
+ from metaxy.graph.diff.models import (
4
+ EdgeData,
5
+ FieldNode,
6
+ GraphData,
7
+ GraphNode,
8
+ NodeStatus,
9
+ )
10
+ from metaxy.graph.diff.traversal import GraphWalker
11
+
12
+ __all__ = [
13
+ # Core models
14
+ "EdgeData",
15
+ "FieldNode",
16
+ "GraphData",
17
+ "GraphNode",
18
+ "NodeStatus",
19
+ # Traversal
20
+ "GraphWalker",
21
+ ]