metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,319 @@
1
+ """Migration generation."""
2
+
3
+ from datetime import datetime
4
+ from typing import TYPE_CHECKING
5
+
6
+ import narwhals as nw
7
+
8
+ from metaxy.graph.diff.differ import GraphDiffer
9
+ from metaxy.metadata_store.exceptions import FeatureNotFoundError
10
+ from metaxy.metadata_store.system import SystemTableStorage
11
+ from metaxy.migrations.models import DiffMigration
12
+ from metaxy.migrations.ops import DataVersionReconciliation
13
+ from metaxy.models.types import FeatureKey
14
+
15
+ if TYPE_CHECKING:
16
+ from metaxy.metadata_store.base import MetadataStore
17
+ from metaxy.models.feature import FeatureGraph
18
+
19
+
20
+ def _is_upstream_of(
21
+ upstream_key: FeatureKey, downstream_key: FeatureKey, graph: "FeatureGraph"
22
+ ) -> bool:
23
+ """Check if upstream_key is in the dependency chain of downstream_key.
24
+
25
+ Args:
26
+ upstream_key: Potential upstream feature
27
+ downstream_key: Feature to check dependencies for
28
+ graph: Feature graph
29
+
30
+ Returns:
31
+ True if upstream_key is a direct or transitive dependency of downstream_key
32
+ """
33
+ plan = graph.get_feature_plan(downstream_key)
34
+
35
+ if plan.deps is None:
36
+ return False
37
+
38
+ # Check direct dependencies
39
+ for dep in plan.deps:
40
+ if dep.key == upstream_key:
41
+ return True
42
+
43
+ # Check transitive dependencies (recursive)
44
+ for dep in plan.deps:
45
+ if _is_upstream_of(upstream_key, dep.key, graph):
46
+ return True
47
+
48
+ return False
49
+
50
+
51
+ def generate_migration(
52
+ store: "MetadataStore",
53
+ *,
54
+ project: str,
55
+ from_snapshot_version: str | None = None,
56
+ to_snapshot_version: str | None = None,
57
+ class_path_overrides: dict[str, str] | None = None,
58
+ ) -> DiffMigration | None:
59
+ """Generate migration from detected feature changes or between snapshots.
60
+
61
+ Two modes of operation:
62
+
63
+ 1. **Default mode** (both snapshot_versions None):
64
+ - Compares latest recorded snapshot (store) vs current active graph (code)
65
+ - This is the normal workflow: detect code changes
66
+
67
+ 2. **Historical mode** (both snapshot_versions provided):
68
+ - Reconstructs from_graph from from_snapshot_version
69
+ - Reconstructs to_graph from to_snapshot_version
70
+ - Compares these two historical registries
71
+ - Useful for: backfilling migrations, testing, recovery
72
+
73
+ Generates explicit operations for ALL affected features (root + downstream).
74
+ Each downstream feature gets its own DataVersionReconciliation operation.
75
+
76
+ Args:
77
+ store: Metadata store to check
78
+ project: Project name for filtering snapshots
79
+ from_snapshot_version: Optional snapshot version to compare from (historical mode)
80
+ to_snapshot_version: Optional snapshot version to compare to (historical mode)
81
+ class_path_overrides: Optional overrides for moved/renamed feature classes
82
+
83
+ Returns:
84
+ Migration object, or None if no changes detected
85
+
86
+ Raises:
87
+ ValueError: If only one snapshot_version is provided, or snapshots not found
88
+
89
+ Example (default mode):
90
+ ```py
91
+ migration = generate_migration(store, project="my_project")
92
+ if migration:
93
+ migration.to_yaml("migrations/001_update.yaml")
94
+
95
+ ```
96
+ Example (historical mode):
97
+ ```py
98
+ migration = generate_migration(
99
+ store,
100
+ project="my_project",
101
+ from_snapshot_version="abc123...",
102
+ to_snapshot_version="def456...",
103
+ )
104
+ ```
105
+ """
106
+ from metaxy.models.feature import FeatureGraph
107
+
108
+ if from_snapshot_version is None:
109
+ # Default mode: get from store's latest snapshot
110
+ from metaxy.metadata_store.system.keys import FEATURE_VERSIONS_KEY
111
+
112
+ try:
113
+ feature_versions = store.read_metadata(
114
+ FEATURE_VERSIONS_KEY, current_only=False
115
+ )
116
+ # Get most recent snapshot - only collect the top row
117
+ latest_snapshot = nw.from_native(
118
+ feature_versions.sort("recorded_at", descending=True).head(1).collect()
119
+ )
120
+ if latest_snapshot.shape[0] > 0:
121
+ from_snapshot_version = latest_snapshot["metaxy_snapshot_version"][0]
122
+ print(f"From: latest snapshot {from_snapshot_version}...")
123
+ else:
124
+ raise ValueError(
125
+ "No feature graph snapshot found in metadata store. "
126
+ "Run 'metaxy graph push' first to record feature versions before generating migrations."
127
+ )
128
+ except FeatureNotFoundError:
129
+ raise ValueError(
130
+ "No feature versions recorded yet. "
131
+ "Run 'metaxy graph push' first to record the feature graph snapshot."
132
+ )
133
+ else:
134
+ print(f"From: snapshot {from_snapshot_version}...")
135
+
136
+ # Step 2: Determine to_graph and to_snapshot_version
137
+ if to_snapshot_version is None:
138
+ # Default mode: record current active graph and use its snapshot
139
+ # This ensures the to_snapshot is available in the store for comparison
140
+ snapshot_result = SystemTableStorage(store).push_graph_snapshot()
141
+ to_snapshot_version = snapshot_result.snapshot_version
142
+ was_already_pushed = snapshot_result.already_pushed
143
+ to_graph = FeatureGraph.get_active()
144
+ if was_already_pushed:
145
+ print(
146
+ f"To: current active graph (snapshot {to_snapshot_version}... already pushed)"
147
+ )
148
+ else:
149
+ print(
150
+ f"To: current active graph (snapshot {to_snapshot_version}... pushed)"
151
+ )
152
+
153
+ else:
154
+ # Historical mode: load from snapshot with force_reload
155
+ # force_reload ensures we get current code from disk, not cached imports
156
+ to_graph = SystemTableStorage(store).load_graph_from_snapshot(
157
+ snapshot_version=to_snapshot_version,
158
+ class_path_overrides=class_path_overrides,
159
+ force_reload=True,
160
+ )
161
+ print(f"To: snapshot {to_snapshot_version}...")
162
+
163
+ # Step 3: Detect changes by comparing snapshot_versions directly
164
+ # We don't reconstruct from_graph - just compare snapshot_versions from the store
165
+ # This avoids issues with stale cached imports when files have changed
166
+ assert from_snapshot_version is not None, "from_snapshot_version must be set by now"
167
+ assert to_snapshot_version is not None, "to_snapshot_version must be set by now"
168
+
169
+ # Use GraphDiffer to detect changes
170
+ differ = GraphDiffer()
171
+
172
+ # Load snapshot data using GraphDiffer
173
+ try:
174
+ from_snapshot_data = differ.load_snapshot_data(store, from_snapshot_version)
175
+ except ValueError:
176
+ # Snapshot not found - nothing to migrate from
177
+ print("No from_snapshot found in store.")
178
+ return None
179
+
180
+ # Build snapshot data for to_snapshot
181
+ to_snapshot_data = to_graph.to_snapshot()
182
+
183
+ # Compute GraphDiff using GraphDiffer
184
+ graph_diff = differ.diff(
185
+ from_snapshot_data,
186
+ to_snapshot_data,
187
+ from_snapshot_version,
188
+ to_snapshot_version,
189
+ )
190
+
191
+ # Check if there are any changes
192
+ if not graph_diff.has_changes:
193
+ print("No feature changes detected. All features up to date!")
194
+ return None
195
+
196
+ # Create operations for root changed features
197
+ root_operations = []
198
+ for node in graph_diff.changed_nodes:
199
+ feature_key_str = node.feature_key.to_string()
200
+ feature_key_str.replace("/", "_")
201
+
202
+ root_operations.append(DataVersionReconciliation())
203
+
204
+ if not root_operations:
205
+ print("No feature changes detected. All features up to date!")
206
+ return None
207
+
208
+ # Generate migration ID and timestamp
209
+ timestamp = datetime.now()
210
+ timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
211
+ migration_id = f"migration_{timestamp_str}"
212
+
213
+ # Show detected root changes
214
+ print(f"\nDetected {len(root_operations)} root feature change(s):")
215
+ for op in root_operations:
216
+ feature_key_str = FeatureKey(op.feature_key).to_string()
217
+ print(f" ✓ {feature_key_str}")
218
+
219
+ # Discover downstream features that need reconciliation (use to_graph)
220
+ root_keys = [FeatureKey(op.feature_key) for op in root_operations]
221
+ downstream_keys = to_graph.get_downstream_features(root_keys)
222
+
223
+ # Create explicit operations for downstream features
224
+ downstream_operations = []
225
+
226
+ if downstream_keys:
227
+ print(
228
+ f"\nGenerating explicit operations for {len(downstream_keys)} downstream feature(s):"
229
+ )
230
+
231
+ for downstream_key in downstream_keys:
232
+ feature_key_str = downstream_key.to_string()
233
+ feature_cls = to_graph.features_by_key[downstream_key]
234
+
235
+ # Check if feature exists in from_snapshot (if not, it's new - skip)
236
+ try:
237
+ from_metadata = store.read_metadata(
238
+ feature_cls,
239
+ current_only=False,
240
+ allow_fallback=False,
241
+ filters=[nw.col("metaxy_snapshot_version") == from_snapshot_version],
242
+ )
243
+ # Only collect head(1) to check existence
244
+ from_metadata_sample = nw.from_native(from_metadata.head(1).collect())
245
+ if from_metadata_sample.shape[0] == 0:
246
+ # Feature doesn't exist in from_snapshot - it's new, skip
247
+ print(f" ⊘ {feature_key_str} (new feature, skipping)")
248
+ continue
249
+ except FeatureNotFoundError:
250
+ # Feature not materialized yet
251
+ print(f" ⊘ {feature_key_str} (not materialized yet, skipping)")
252
+ continue
253
+
254
+ # Determine which root changes affect this downstream feature
255
+ to_graph.get_feature_plan(downstream_key)
256
+ affected_by = []
257
+
258
+ for root_op in root_operations:
259
+ root_key = FeatureKey(root_op.feature_key)
260
+ # Check if this root is in the upstream dependency chain
261
+ if _is_upstream_of(root_key, downstream_key, to_graph):
262
+ affected_by.append(root_key.to_string())
263
+
264
+ # Build informative reason
265
+ if len(affected_by) == 1:
266
+ f"Reconcile field_provenance due to changes in: {affected_by[0]}"
267
+ else:
268
+ (f"Reconcile field_provenance due to changes in: {', '.join(affected_by)}")
269
+
270
+ # Create operation (feature versions derived from snapshots)
271
+ # DataVersionReconciliation doesn't have id, feature_key, or reason params
272
+ # It only has a type field since it applies to all affected features
273
+ downstream_operations.append(DataVersionReconciliation())
274
+
275
+ print(f" ✓ {feature_key_str}")
276
+
277
+ # Combine all operations
278
+ all_operations = root_operations + downstream_operations
279
+
280
+ print(
281
+ f"\nGenerated {len(all_operations)} total operations "
282
+ f"({len(root_operations)} root + {len(downstream_operations)} downstream)"
283
+ )
284
+
285
+ # Find the latest migration to set as parent
286
+ from metaxy.metadata_store.system import EVENTS_KEY
287
+
288
+ parent_migration_id = None
289
+ try:
290
+ existing_migrations = store.read_metadata(EVENTS_KEY, current_only=False)
291
+ # Get most recent migration by timestamp - only collect the top row
292
+ latest = nw.from_native(
293
+ existing_migrations.sort("timestamp", descending=True).head(1).collect()
294
+ )
295
+ if latest.shape[0] > 0:
296
+ parent_migration_id = latest["migration_id"][0]
297
+ except FeatureNotFoundError:
298
+ # No migrations yet
299
+ pass
300
+
301
+ # Note: from_snapshot_version and to_snapshot_version were already resolved earlier
302
+
303
+ # Create migration (serialize operations to dicts)
304
+ len(root_operations)
305
+
306
+ # DiffMigration expects 'ops' as list of dicts with 'type' field
307
+ # Since all operations are DataVersionReconciliation, create a single operation dict
308
+ ops = [{"type": "metaxy.migrations.ops.DataVersionReconciliation"}]
309
+
310
+ migration = DiffMigration(
311
+ migration_id=migration_id,
312
+ parent=parent_migration_id or "initial",
313
+ from_snapshot_version=from_snapshot_version,
314
+ to_snapshot_version=to_snapshot_version,
315
+ created_at=timestamp,
316
+ ops=ops,
317
+ )
318
+
319
+ return migration
@@ -0,0 +1,231 @@
1
+ """Load migrations from YAML files."""
2
+
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from metaxy.migrations.models import Migration
8
+
9
+
10
+ def load_migration_from_yaml(yaml_path: Path) -> "Migration":
11
+ """Load migration from YAML file.
12
+
13
+ Uses Pydantic's discriminated unions for automatic polymorphic deserialization
14
+ based on the migration_type field.
15
+
16
+ Args:
17
+ yaml_path: Path to migration YAML file
18
+
19
+ Returns:
20
+ Migration instance (DiffMigration or FullGraphMigration)
21
+
22
+ Raises:
23
+ FileNotFoundError: If YAML file doesn't exist
24
+ ValueError: If YAML is invalid or migration type is not supported
25
+ """
26
+ import yaml
27
+
28
+ from metaxy.migrations.models import MigrationAdapter
29
+
30
+ if not yaml_path.exists():
31
+ raise FileNotFoundError(f"Migration YAML not found: {yaml_path}")
32
+
33
+ with open(yaml_path) as f:
34
+ data = yaml.safe_load(f)
35
+
36
+ # Use Pydantic's discriminated union to automatically deserialize
37
+ try:
38
+ migration = MigrationAdapter.validate_python(data)
39
+ except Exception as e:
40
+ raise ValueError(f"Failed to load migration from {yaml_path}: {e}") from e
41
+
42
+ return migration
43
+
44
+
45
+ def find_migration_yaml(migration_id: str, migrations_dir: Path | None = None) -> Path:
46
+ """Find YAML file for a migration ID by searching all YAML files.
47
+
48
+ Args:
49
+ migration_id: Migration ID (e.g., "20250127_120000" or "20250127_120000_feature_update")
50
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
51
+
52
+ Returns:
53
+ Path to migration YAML file
54
+
55
+ Raises:
56
+ FileNotFoundError: If migration YAML not found
57
+ """
58
+ if migrations_dir is None:
59
+ migrations_dir = Path(".metaxy/migrations")
60
+
61
+ if not migrations_dir.exists():
62
+ raise FileNotFoundError(
63
+ f"Migration '{migration_id}' not found. "
64
+ f"Migrations directory does not exist: {migrations_dir}"
65
+ )
66
+
67
+ # Search through all YAML files to find the one with matching ID
68
+ for yaml_file in migrations_dir.glob("*.yaml"):
69
+ try:
70
+ migration = load_migration_from_yaml(yaml_file)
71
+ if migration.migration_id == migration_id:
72
+ return yaml_file
73
+ except Exception:
74
+ # Skip files that can't be loaded
75
+ continue
76
+
77
+ # Not found - list available migrations
78
+ available = []
79
+ for yaml_file in migrations_dir.glob("*.yaml"):
80
+ try:
81
+ migration = load_migration_from_yaml(yaml_file)
82
+ available.append(migration.migration_id)
83
+ except Exception:
84
+ continue
85
+
86
+ raise FileNotFoundError(
87
+ f"Migration '{migration_id}' not found in {migrations_dir}.\n"
88
+ f"Available migrations: {available}"
89
+ )
90
+
91
+
92
+ def list_migrations(migrations_dir: Path | None = None) -> list[str]:
93
+ """List all available migration IDs.
94
+
95
+ Args:
96
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
97
+
98
+ Returns:
99
+ List of migration IDs sorted by creation time
100
+ """
101
+ if migrations_dir is None:
102
+ migrations_dir = Path(".metaxy/migrations")
103
+
104
+ if not migrations_dir.exists():
105
+ return []
106
+
107
+ yaml_files = sorted(migrations_dir.glob("*.yaml"))
108
+ return [f.stem for f in yaml_files]
109
+
110
+
111
+ def find_latest_migration(migrations_dir: Path | None = None) -> str | None:
112
+ """Find the latest migration ID (head of the chain).
113
+
114
+ Args:
115
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
116
+
117
+ Returns:
118
+ Migration ID of the head, or None if no migrations exist
119
+
120
+ Raises:
121
+ ValueError: If multiple heads detected (conflict)
122
+ """
123
+ from metaxy.migrations.models import Migration
124
+
125
+ if migrations_dir is None:
126
+ migrations_dir = Path(".metaxy/migrations")
127
+
128
+ if not migrations_dir.exists():
129
+ return None
130
+
131
+ # Load all migrations - all migrations form chains via parent IDs
132
+ migrations: dict[str, Migration] = {}
133
+ for yaml_file in migrations_dir.glob("*.yaml"):
134
+ migration = load_migration_from_yaml(yaml_file)
135
+ migrations[migration.migration_id] = migration
136
+
137
+ if not migrations:
138
+ return None
139
+
140
+ # Find migrations that are parents of others
141
+ all_parents = {m.parent for m in migrations.values() if m.parent != "initial"}
142
+
143
+ # Find heads (migrations that are not parents of any other migration)
144
+ heads = [mid for mid in migrations.keys() if mid not in all_parents]
145
+
146
+ if len(heads) == 0:
147
+ # This means there's a cycle or orphaned migrations
148
+ raise ValueError(
149
+ "No head migration found - possible cycle in migration chain. "
150
+ f"All migrations: {list(migrations.keys())}"
151
+ )
152
+
153
+ if len(heads) > 1:
154
+ raise ValueError(
155
+ f"Multiple migration heads detected: {heads}. "
156
+ "This usually means two migrations were created in parallel. "
157
+ "Please merge them by creating a new migration that depends on one head, "
158
+ "or delete one of the conflicting migrations."
159
+ )
160
+
161
+ return heads[0]
162
+
163
+
164
+ def build_migration_chain(
165
+ migrations_dir: Path | None = None,
166
+ ) -> list["Migration"]:
167
+ """Build ordered migration chain from parent IDs.
168
+
169
+ Args:
170
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
171
+
172
+ Returns:
173
+ List of migrations in order from oldest to newest
174
+
175
+ Raises:
176
+ ValueError: If chain is invalid (cycles, orphans, multiple heads)
177
+ """
178
+ from metaxy.migrations.models import Migration
179
+
180
+ if migrations_dir is None:
181
+ migrations_dir = Path(".metaxy/migrations")
182
+
183
+ if not migrations_dir.exists():
184
+ return []
185
+
186
+ # Load all migrations - all migrations form chains via parent IDs
187
+ migrations: dict[str, Migration] = {}
188
+ for yaml_file in sorted(migrations_dir.glob("*.yaml")):
189
+ migration = load_migration_from_yaml(yaml_file)
190
+ migrations[migration.migration_id] = migration
191
+
192
+ if not migrations:
193
+ return []
194
+
195
+ # Validate single head
196
+ head_id = find_latest_migration(migrations_dir)
197
+ if head_id is None:
198
+ return []
199
+
200
+ # Build chain by following parent links backwards
201
+ chain = []
202
+ current_id: str | None = head_id
203
+
204
+ visited = set()
205
+ while current_id is not None and current_id != "initial":
206
+ if current_id in visited:
207
+ raise ValueError(f"Cycle detected in migration chain at: {current_id}")
208
+
209
+ if current_id not in migrations:
210
+ raise ValueError(
211
+ f"Migration '{current_id}' referenced as parent but YAML not found. "
212
+ f"Available migrations: {list(migrations.keys())}"
213
+ )
214
+
215
+ visited.add(current_id)
216
+ migration = migrations[current_id]
217
+ chain.append(migration)
218
+ current_id = migration.parent
219
+
220
+ # Reverse to get oldest-first order
221
+ chain.reverse()
222
+
223
+ # Validate all migrations are in the chain (no orphans)
224
+ if len(chain) != len(migrations):
225
+ orphans = set(migrations.keys()) - set(m.migration_id for m in chain)
226
+ raise ValueError(
227
+ f"Orphaned migrations detected (not in main chain): {orphans}. "
228
+ "Each migration must have parent pointing to previous migration or 'initial'."
229
+ )
230
+
231
+ return chain