metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Migration generation."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
import narwhals as nw
|
|
7
|
+
|
|
8
|
+
from metaxy.graph.diff.differ import GraphDiffer
|
|
9
|
+
from metaxy.metadata_store.exceptions import FeatureNotFoundError
|
|
10
|
+
from metaxy.metadata_store.system import SystemTableStorage
|
|
11
|
+
from metaxy.migrations.models import DiffMigration
|
|
12
|
+
from metaxy.migrations.ops import DataVersionReconciliation
|
|
13
|
+
from metaxy.models.types import FeatureKey
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
17
|
+
from metaxy.models.feature import FeatureGraph
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _is_upstream_of(
|
|
21
|
+
upstream_key: FeatureKey, downstream_key: FeatureKey, graph: "FeatureGraph"
|
|
22
|
+
) -> bool:
|
|
23
|
+
"""Check if upstream_key is in the dependency chain of downstream_key.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
upstream_key: Potential upstream feature
|
|
27
|
+
downstream_key: Feature to check dependencies for
|
|
28
|
+
graph: Feature graph
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
True if upstream_key is a direct or transitive dependency of downstream_key
|
|
32
|
+
"""
|
|
33
|
+
plan = graph.get_feature_plan(downstream_key)
|
|
34
|
+
|
|
35
|
+
if plan.deps is None:
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
# Check direct dependencies
|
|
39
|
+
for dep in plan.deps:
|
|
40
|
+
if dep.key == upstream_key:
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
# Check transitive dependencies (recursive)
|
|
44
|
+
for dep in plan.deps:
|
|
45
|
+
if _is_upstream_of(upstream_key, dep.key, graph):
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def generate_migration(
|
|
52
|
+
store: "MetadataStore",
|
|
53
|
+
*,
|
|
54
|
+
project: str,
|
|
55
|
+
from_snapshot_version: str | None = None,
|
|
56
|
+
to_snapshot_version: str | None = None,
|
|
57
|
+
class_path_overrides: dict[str, str] | None = None,
|
|
58
|
+
) -> DiffMigration | None:
|
|
59
|
+
"""Generate migration from detected feature changes or between snapshots.
|
|
60
|
+
|
|
61
|
+
Two modes of operation:
|
|
62
|
+
|
|
63
|
+
1. **Default mode** (both snapshot_versions None):
|
|
64
|
+
- Compares latest recorded snapshot (store) vs current active graph (code)
|
|
65
|
+
- This is the normal workflow: detect code changes
|
|
66
|
+
|
|
67
|
+
2. **Historical mode** (both snapshot_versions provided):
|
|
68
|
+
- Reconstructs from_graph from from_snapshot_version
|
|
69
|
+
- Reconstructs to_graph from to_snapshot_version
|
|
70
|
+
- Compares these two historical registries
|
|
71
|
+
- Useful for: backfilling migrations, testing, recovery
|
|
72
|
+
|
|
73
|
+
Generates explicit operations for ALL affected features (root + downstream).
|
|
74
|
+
Each downstream feature gets its own DataVersionReconciliation operation.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
store: Metadata store to check
|
|
78
|
+
project: Project name for filtering snapshots
|
|
79
|
+
from_snapshot_version: Optional snapshot version to compare from (historical mode)
|
|
80
|
+
to_snapshot_version: Optional snapshot version to compare to (historical mode)
|
|
81
|
+
class_path_overrides: Optional overrides for moved/renamed feature classes
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Migration object, or None if no changes detected
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
ValueError: If only one snapshot_version is provided, or snapshots not found
|
|
88
|
+
|
|
89
|
+
Example (default mode):
|
|
90
|
+
```py
|
|
91
|
+
migration = generate_migration(store, project="my_project")
|
|
92
|
+
if migration:
|
|
93
|
+
migration.to_yaml("migrations/001_update.yaml")
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
Example (historical mode):
|
|
97
|
+
```py
|
|
98
|
+
migration = generate_migration(
|
|
99
|
+
store,
|
|
100
|
+
project="my_project",
|
|
101
|
+
from_snapshot_version="abc123...",
|
|
102
|
+
to_snapshot_version="def456...",
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
"""
|
|
106
|
+
from metaxy.models.feature import FeatureGraph
|
|
107
|
+
|
|
108
|
+
if from_snapshot_version is None:
|
|
109
|
+
# Default mode: get from store's latest snapshot
|
|
110
|
+
from metaxy.metadata_store.system.keys import FEATURE_VERSIONS_KEY
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
feature_versions = store.read_metadata(
|
|
114
|
+
FEATURE_VERSIONS_KEY, current_only=False
|
|
115
|
+
)
|
|
116
|
+
# Get most recent snapshot - only collect the top row
|
|
117
|
+
latest_snapshot = nw.from_native(
|
|
118
|
+
feature_versions.sort("recorded_at", descending=True).head(1).collect()
|
|
119
|
+
)
|
|
120
|
+
if latest_snapshot.shape[0] > 0:
|
|
121
|
+
from_snapshot_version = latest_snapshot["metaxy_snapshot_version"][0]
|
|
122
|
+
print(f"From: latest snapshot {from_snapshot_version}...")
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"No feature graph snapshot found in metadata store. "
|
|
126
|
+
"Run 'metaxy graph push' first to record feature versions before generating migrations."
|
|
127
|
+
)
|
|
128
|
+
except FeatureNotFoundError:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"No feature versions recorded yet. "
|
|
131
|
+
"Run 'metaxy graph push' first to record the feature graph snapshot."
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
print(f"From: snapshot {from_snapshot_version}...")
|
|
135
|
+
|
|
136
|
+
# Step 2: Determine to_graph and to_snapshot_version
|
|
137
|
+
if to_snapshot_version is None:
|
|
138
|
+
# Default mode: record current active graph and use its snapshot
|
|
139
|
+
# This ensures the to_snapshot is available in the store for comparison
|
|
140
|
+
snapshot_result = SystemTableStorage(store).push_graph_snapshot()
|
|
141
|
+
to_snapshot_version = snapshot_result.snapshot_version
|
|
142
|
+
was_already_pushed = snapshot_result.already_pushed
|
|
143
|
+
to_graph = FeatureGraph.get_active()
|
|
144
|
+
if was_already_pushed:
|
|
145
|
+
print(
|
|
146
|
+
f"To: current active graph (snapshot {to_snapshot_version}... already pushed)"
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
print(
|
|
150
|
+
f"To: current active graph (snapshot {to_snapshot_version}... pushed)"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# Historical mode: load from snapshot with force_reload
|
|
155
|
+
# force_reload ensures we get current code from disk, not cached imports
|
|
156
|
+
to_graph = SystemTableStorage(store).load_graph_from_snapshot(
|
|
157
|
+
snapshot_version=to_snapshot_version,
|
|
158
|
+
class_path_overrides=class_path_overrides,
|
|
159
|
+
force_reload=True,
|
|
160
|
+
)
|
|
161
|
+
print(f"To: snapshot {to_snapshot_version}...")
|
|
162
|
+
|
|
163
|
+
# Step 3: Detect changes by comparing snapshot_versions directly
|
|
164
|
+
# We don't reconstruct from_graph - just compare snapshot_versions from the store
|
|
165
|
+
# This avoids issues with stale cached imports when files have changed
|
|
166
|
+
assert from_snapshot_version is not None, "from_snapshot_version must be set by now"
|
|
167
|
+
assert to_snapshot_version is not None, "to_snapshot_version must be set by now"
|
|
168
|
+
|
|
169
|
+
# Use GraphDiffer to detect changes
|
|
170
|
+
differ = GraphDiffer()
|
|
171
|
+
|
|
172
|
+
# Load snapshot data using GraphDiffer
|
|
173
|
+
try:
|
|
174
|
+
from_snapshot_data = differ.load_snapshot_data(store, from_snapshot_version)
|
|
175
|
+
except ValueError:
|
|
176
|
+
# Snapshot not found - nothing to migrate from
|
|
177
|
+
print("No from_snapshot found in store.")
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
# Build snapshot data for to_snapshot
|
|
181
|
+
to_snapshot_data = to_graph.to_snapshot()
|
|
182
|
+
|
|
183
|
+
# Compute GraphDiff using GraphDiffer
|
|
184
|
+
graph_diff = differ.diff(
|
|
185
|
+
from_snapshot_data,
|
|
186
|
+
to_snapshot_data,
|
|
187
|
+
from_snapshot_version,
|
|
188
|
+
to_snapshot_version,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Check if there are any changes
|
|
192
|
+
if not graph_diff.has_changes:
|
|
193
|
+
print("No feature changes detected. All features up to date!")
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
# Create operations for root changed features
|
|
197
|
+
root_operations = []
|
|
198
|
+
for node in graph_diff.changed_nodes:
|
|
199
|
+
feature_key_str = node.feature_key.to_string()
|
|
200
|
+
feature_key_str.replace("/", "_")
|
|
201
|
+
|
|
202
|
+
root_operations.append(DataVersionReconciliation())
|
|
203
|
+
|
|
204
|
+
if not root_operations:
|
|
205
|
+
print("No feature changes detected. All features up to date!")
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
# Generate migration ID and timestamp
|
|
209
|
+
timestamp = datetime.now()
|
|
210
|
+
timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
|
|
211
|
+
migration_id = f"migration_{timestamp_str}"
|
|
212
|
+
|
|
213
|
+
# Show detected root changes
|
|
214
|
+
print(f"\nDetected {len(root_operations)} root feature change(s):")
|
|
215
|
+
for op in root_operations:
|
|
216
|
+
feature_key_str = FeatureKey(op.feature_key).to_string()
|
|
217
|
+
print(f" ✓ {feature_key_str}")
|
|
218
|
+
|
|
219
|
+
# Discover downstream features that need reconciliation (use to_graph)
|
|
220
|
+
root_keys = [FeatureKey(op.feature_key) for op in root_operations]
|
|
221
|
+
downstream_keys = to_graph.get_downstream_features(root_keys)
|
|
222
|
+
|
|
223
|
+
# Create explicit operations for downstream features
|
|
224
|
+
downstream_operations = []
|
|
225
|
+
|
|
226
|
+
if downstream_keys:
|
|
227
|
+
print(
|
|
228
|
+
f"\nGenerating explicit operations for {len(downstream_keys)} downstream feature(s):"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
for downstream_key in downstream_keys:
|
|
232
|
+
feature_key_str = downstream_key.to_string()
|
|
233
|
+
feature_cls = to_graph.features_by_key[downstream_key]
|
|
234
|
+
|
|
235
|
+
# Check if feature exists in from_snapshot (if not, it's new - skip)
|
|
236
|
+
try:
|
|
237
|
+
from_metadata = store.read_metadata(
|
|
238
|
+
feature_cls,
|
|
239
|
+
current_only=False,
|
|
240
|
+
allow_fallback=False,
|
|
241
|
+
filters=[nw.col("metaxy_snapshot_version") == from_snapshot_version],
|
|
242
|
+
)
|
|
243
|
+
# Only collect head(1) to check existence
|
|
244
|
+
from_metadata_sample = nw.from_native(from_metadata.head(1).collect())
|
|
245
|
+
if from_metadata_sample.shape[0] == 0:
|
|
246
|
+
# Feature doesn't exist in from_snapshot - it's new, skip
|
|
247
|
+
print(f" ⊘ {feature_key_str} (new feature, skipping)")
|
|
248
|
+
continue
|
|
249
|
+
except FeatureNotFoundError:
|
|
250
|
+
# Feature not materialized yet
|
|
251
|
+
print(f" ⊘ {feature_key_str} (not materialized yet, skipping)")
|
|
252
|
+
continue
|
|
253
|
+
|
|
254
|
+
# Determine which root changes affect this downstream feature
|
|
255
|
+
to_graph.get_feature_plan(downstream_key)
|
|
256
|
+
affected_by = []
|
|
257
|
+
|
|
258
|
+
for root_op in root_operations:
|
|
259
|
+
root_key = FeatureKey(root_op.feature_key)
|
|
260
|
+
# Check if this root is in the upstream dependency chain
|
|
261
|
+
if _is_upstream_of(root_key, downstream_key, to_graph):
|
|
262
|
+
affected_by.append(root_key.to_string())
|
|
263
|
+
|
|
264
|
+
# Build informative reason
|
|
265
|
+
if len(affected_by) == 1:
|
|
266
|
+
f"Reconcile field_provenance due to changes in: {affected_by[0]}"
|
|
267
|
+
else:
|
|
268
|
+
(f"Reconcile field_provenance due to changes in: {', '.join(affected_by)}")
|
|
269
|
+
|
|
270
|
+
# Create operation (feature versions derived from snapshots)
|
|
271
|
+
# DataVersionReconciliation doesn't have id, feature_key, or reason params
|
|
272
|
+
# It only has a type field since it applies to all affected features
|
|
273
|
+
downstream_operations.append(DataVersionReconciliation())
|
|
274
|
+
|
|
275
|
+
print(f" ✓ {feature_key_str}")
|
|
276
|
+
|
|
277
|
+
# Combine all operations
|
|
278
|
+
all_operations = root_operations + downstream_operations
|
|
279
|
+
|
|
280
|
+
print(
|
|
281
|
+
f"\nGenerated {len(all_operations)} total operations "
|
|
282
|
+
f"({len(root_operations)} root + {len(downstream_operations)} downstream)"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Find the latest migration to set as parent
|
|
286
|
+
from metaxy.metadata_store.system import EVENTS_KEY
|
|
287
|
+
|
|
288
|
+
parent_migration_id = None
|
|
289
|
+
try:
|
|
290
|
+
existing_migrations = store.read_metadata(EVENTS_KEY, current_only=False)
|
|
291
|
+
# Get most recent migration by timestamp - only collect the top row
|
|
292
|
+
latest = nw.from_native(
|
|
293
|
+
existing_migrations.sort("timestamp", descending=True).head(1).collect()
|
|
294
|
+
)
|
|
295
|
+
if latest.shape[0] > 0:
|
|
296
|
+
parent_migration_id = latest["migration_id"][0]
|
|
297
|
+
except FeatureNotFoundError:
|
|
298
|
+
# No migrations yet
|
|
299
|
+
pass
|
|
300
|
+
|
|
301
|
+
# Note: from_snapshot_version and to_snapshot_version were already resolved earlier
|
|
302
|
+
|
|
303
|
+
# Create migration (serialize operations to dicts)
|
|
304
|
+
len(root_operations)
|
|
305
|
+
|
|
306
|
+
# DiffMigration expects 'ops' as list of dicts with 'type' field
|
|
307
|
+
# Since all operations are DataVersionReconciliation, create a single operation dict
|
|
308
|
+
ops = [{"type": "metaxy.migrations.ops.DataVersionReconciliation"}]
|
|
309
|
+
|
|
310
|
+
migration = DiffMigration(
|
|
311
|
+
migration_id=migration_id,
|
|
312
|
+
parent=parent_migration_id or "initial",
|
|
313
|
+
from_snapshot_version=from_snapshot_version,
|
|
314
|
+
to_snapshot_version=to_snapshot_version,
|
|
315
|
+
created_at=timestamp,
|
|
316
|
+
ops=ops,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return migration
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Load migrations from YAML files."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from metaxy.migrations.models import Migration
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_migration_from_yaml(yaml_path: Path) -> "Migration":
|
|
11
|
+
"""Load migration from YAML file.
|
|
12
|
+
|
|
13
|
+
Uses Pydantic's discriminated unions for automatic polymorphic deserialization
|
|
14
|
+
based on the migration_type field.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
yaml_path: Path to migration YAML file
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Migration instance (DiffMigration or FullGraphMigration)
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
FileNotFoundError: If YAML file doesn't exist
|
|
24
|
+
ValueError: If YAML is invalid or migration type is not supported
|
|
25
|
+
"""
|
|
26
|
+
import yaml
|
|
27
|
+
|
|
28
|
+
from metaxy.migrations.models import MigrationAdapter
|
|
29
|
+
|
|
30
|
+
if not yaml_path.exists():
|
|
31
|
+
raise FileNotFoundError(f"Migration YAML not found: {yaml_path}")
|
|
32
|
+
|
|
33
|
+
with open(yaml_path) as f:
|
|
34
|
+
data = yaml.safe_load(f)
|
|
35
|
+
|
|
36
|
+
# Use Pydantic's discriminated union to automatically deserialize
|
|
37
|
+
try:
|
|
38
|
+
migration = MigrationAdapter.validate_python(data)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
raise ValueError(f"Failed to load migration from {yaml_path}: {e}") from e
|
|
41
|
+
|
|
42
|
+
return migration
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def find_migration_yaml(migration_id: str, migrations_dir: Path | None = None) -> Path:
|
|
46
|
+
"""Find YAML file for a migration ID by searching all YAML files.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
migration_id: Migration ID (e.g., "20250127_120000" or "20250127_120000_feature_update")
|
|
50
|
+
migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Path to migration YAML file
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
FileNotFoundError: If migration YAML not found
|
|
57
|
+
"""
|
|
58
|
+
if migrations_dir is None:
|
|
59
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
60
|
+
|
|
61
|
+
if not migrations_dir.exists():
|
|
62
|
+
raise FileNotFoundError(
|
|
63
|
+
f"Migration '{migration_id}' not found. "
|
|
64
|
+
f"Migrations directory does not exist: {migrations_dir}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Search through all YAML files to find the one with matching ID
|
|
68
|
+
for yaml_file in migrations_dir.glob("*.yaml"):
|
|
69
|
+
try:
|
|
70
|
+
migration = load_migration_from_yaml(yaml_file)
|
|
71
|
+
if migration.migration_id == migration_id:
|
|
72
|
+
return yaml_file
|
|
73
|
+
except Exception:
|
|
74
|
+
# Skip files that can't be loaded
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Not found - list available migrations
|
|
78
|
+
available = []
|
|
79
|
+
for yaml_file in migrations_dir.glob("*.yaml"):
|
|
80
|
+
try:
|
|
81
|
+
migration = load_migration_from_yaml(yaml_file)
|
|
82
|
+
available.append(migration.migration_id)
|
|
83
|
+
except Exception:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
raise FileNotFoundError(
|
|
87
|
+
f"Migration '{migration_id}' not found in {migrations_dir}.\n"
|
|
88
|
+
f"Available migrations: {available}"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def list_migrations(migrations_dir: Path | None = None) -> list[str]:
|
|
93
|
+
"""List all available migration IDs.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
List of migration IDs sorted by creation time
|
|
100
|
+
"""
|
|
101
|
+
if migrations_dir is None:
|
|
102
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
103
|
+
|
|
104
|
+
if not migrations_dir.exists():
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
yaml_files = sorted(migrations_dir.glob("*.yaml"))
|
|
108
|
+
return [f.stem for f in yaml_files]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def find_latest_migration(migrations_dir: Path | None = None) -> str | None:
|
|
112
|
+
"""Find the latest migration ID (head of the chain).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Migration ID of the head, or None if no migrations exist
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
ValueError: If multiple heads detected (conflict)
|
|
122
|
+
"""
|
|
123
|
+
from metaxy.migrations.models import Migration
|
|
124
|
+
|
|
125
|
+
if migrations_dir is None:
|
|
126
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
127
|
+
|
|
128
|
+
if not migrations_dir.exists():
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
# Load all migrations - all migrations form chains via parent IDs
|
|
132
|
+
migrations: dict[str, Migration] = {}
|
|
133
|
+
for yaml_file in migrations_dir.glob("*.yaml"):
|
|
134
|
+
migration = load_migration_from_yaml(yaml_file)
|
|
135
|
+
migrations[migration.migration_id] = migration
|
|
136
|
+
|
|
137
|
+
if not migrations:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
# Find migrations that are parents of others
|
|
141
|
+
all_parents = {m.parent for m in migrations.values() if m.parent != "initial"}
|
|
142
|
+
|
|
143
|
+
# Find heads (migrations that are not parents of any other migration)
|
|
144
|
+
heads = [mid for mid in migrations.keys() if mid not in all_parents]
|
|
145
|
+
|
|
146
|
+
if len(heads) == 0:
|
|
147
|
+
# This means there's a cycle or orphaned migrations
|
|
148
|
+
raise ValueError(
|
|
149
|
+
"No head migration found - possible cycle in migration chain. "
|
|
150
|
+
f"All migrations: {list(migrations.keys())}"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if len(heads) > 1:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
f"Multiple migration heads detected: {heads}. "
|
|
156
|
+
"This usually means two migrations were created in parallel. "
|
|
157
|
+
"Please merge them by creating a new migration that depends on one head, "
|
|
158
|
+
"or delete one of the conflicting migrations."
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return heads[0]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def build_migration_chain(
|
|
165
|
+
migrations_dir: Path | None = None,
|
|
166
|
+
) -> list["Migration"]:
|
|
167
|
+
"""Build ordered migration chain from parent IDs.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
List of migrations in order from oldest to newest
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
ValueError: If chain is invalid (cycles, orphans, multiple heads)
|
|
177
|
+
"""
|
|
178
|
+
from metaxy.migrations.models import Migration
|
|
179
|
+
|
|
180
|
+
if migrations_dir is None:
|
|
181
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
182
|
+
|
|
183
|
+
if not migrations_dir.exists():
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
# Load all migrations - all migrations form chains via parent IDs
|
|
187
|
+
migrations: dict[str, Migration] = {}
|
|
188
|
+
for yaml_file in sorted(migrations_dir.glob("*.yaml")):
|
|
189
|
+
migration = load_migration_from_yaml(yaml_file)
|
|
190
|
+
migrations[migration.migration_id] = migration
|
|
191
|
+
|
|
192
|
+
if not migrations:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
# Validate single head
|
|
196
|
+
head_id = find_latest_migration(migrations_dir)
|
|
197
|
+
if head_id is None:
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
# Build chain by following parent links backwards
|
|
201
|
+
chain = []
|
|
202
|
+
current_id: str | None = head_id
|
|
203
|
+
|
|
204
|
+
visited = set()
|
|
205
|
+
while current_id is not None and current_id != "initial":
|
|
206
|
+
if current_id in visited:
|
|
207
|
+
raise ValueError(f"Cycle detected in migration chain at: {current_id}")
|
|
208
|
+
|
|
209
|
+
if current_id not in migrations:
|
|
210
|
+
raise ValueError(
|
|
211
|
+
f"Migration '{current_id}' referenced as parent but YAML not found. "
|
|
212
|
+
f"Available migrations: {list(migrations.keys())}"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
visited.add(current_id)
|
|
216
|
+
migration = migrations[current_id]
|
|
217
|
+
chain.append(migration)
|
|
218
|
+
current_id = migration.parent
|
|
219
|
+
|
|
220
|
+
# Reverse to get oldest-first order
|
|
221
|
+
chain.reverse()
|
|
222
|
+
|
|
223
|
+
# Validate all migrations are in the chain (no orphans)
|
|
224
|
+
if len(chain) != len(migrations):
|
|
225
|
+
orphans = set(migrations.keys()) - set(m.migration_id for m in chain)
|
|
226
|
+
raise ValueError(
|
|
227
|
+
f"Orphaned migrations detected (not in main chain): {orphans}. "
|
|
228
|
+
"Each migration must have parent pointing to previous migration or 'initial'."
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return chain
|