metaxy 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metaxy might be problematic. Click here for more details.

Files changed (75) hide show
  1. metaxy/__init__.py +61 -0
  2. metaxy/_testing.py +542 -0
  3. metaxy/_utils.py +16 -0
  4. metaxy/_version.py +1 -0
  5. metaxy/cli/app.py +76 -0
  6. metaxy/cli/context.py +71 -0
  7. metaxy/cli/graph.py +576 -0
  8. metaxy/cli/graph_diff.py +290 -0
  9. metaxy/cli/list.py +42 -0
  10. metaxy/cli/metadata.py +271 -0
  11. metaxy/cli/migrations.py +862 -0
  12. metaxy/cli/push.py +55 -0
  13. metaxy/config.py +450 -0
  14. metaxy/data_versioning/__init__.py +24 -0
  15. metaxy/data_versioning/calculators/__init__.py +13 -0
  16. metaxy/data_versioning/calculators/base.py +97 -0
  17. metaxy/data_versioning/calculators/duckdb.py +186 -0
  18. metaxy/data_versioning/calculators/ibis.py +225 -0
  19. metaxy/data_versioning/calculators/polars.py +135 -0
  20. metaxy/data_versioning/diff/__init__.py +15 -0
  21. metaxy/data_versioning/diff/base.py +150 -0
  22. metaxy/data_versioning/diff/narwhals.py +108 -0
  23. metaxy/data_versioning/hash_algorithms.py +19 -0
  24. metaxy/data_versioning/joiners/__init__.py +9 -0
  25. metaxy/data_versioning/joiners/base.py +70 -0
  26. metaxy/data_versioning/joiners/narwhals.py +235 -0
  27. metaxy/entrypoints.py +309 -0
  28. metaxy/ext/__init__.py +1 -0
  29. metaxy/ext/alembic.py +326 -0
  30. metaxy/ext/sqlmodel.py +172 -0
  31. metaxy/ext/sqlmodel_system_tables.py +139 -0
  32. metaxy/graph/__init__.py +21 -0
  33. metaxy/graph/diff/__init__.py +21 -0
  34. metaxy/graph/diff/diff_models.py +399 -0
  35. metaxy/graph/diff/differ.py +740 -0
  36. metaxy/graph/diff/models.py +418 -0
  37. metaxy/graph/diff/rendering/__init__.py +18 -0
  38. metaxy/graph/diff/rendering/base.py +274 -0
  39. metaxy/graph/diff/rendering/cards.py +188 -0
  40. metaxy/graph/diff/rendering/formatter.py +805 -0
  41. metaxy/graph/diff/rendering/graphviz.py +246 -0
  42. metaxy/graph/diff/rendering/mermaid.py +320 -0
  43. metaxy/graph/diff/rendering/rich.py +165 -0
  44. metaxy/graph/diff/rendering/theme.py +48 -0
  45. metaxy/graph/diff/traversal.py +247 -0
  46. metaxy/graph/utils.py +58 -0
  47. metaxy/metadata_store/__init__.py +31 -0
  48. metaxy/metadata_store/_protocols.py +38 -0
  49. metaxy/metadata_store/base.py +1676 -0
  50. metaxy/metadata_store/clickhouse.py +161 -0
  51. metaxy/metadata_store/duckdb.py +167 -0
  52. metaxy/metadata_store/exceptions.py +43 -0
  53. metaxy/metadata_store/ibis.py +451 -0
  54. metaxy/metadata_store/memory.py +228 -0
  55. metaxy/metadata_store/sqlite.py +187 -0
  56. metaxy/metadata_store/system_tables.py +257 -0
  57. metaxy/migrations/__init__.py +34 -0
  58. metaxy/migrations/detector.py +153 -0
  59. metaxy/migrations/executor.py +208 -0
  60. metaxy/migrations/loader.py +260 -0
  61. metaxy/migrations/models.py +718 -0
  62. metaxy/migrations/ops.py +390 -0
  63. metaxy/models/__init__.py +0 -0
  64. metaxy/models/bases.py +6 -0
  65. metaxy/models/constants.py +24 -0
  66. metaxy/models/feature.py +665 -0
  67. metaxy/models/feature_spec.py +105 -0
  68. metaxy/models/field.py +25 -0
  69. metaxy/models/plan.py +155 -0
  70. metaxy/models/types.py +157 -0
  71. metaxy/py.typed +0 -0
  72. metaxy-0.0.0.dist-info/METADATA +247 -0
  73. metaxy-0.0.0.dist-info/RECORD +75 -0
  74. metaxy-0.0.0.dist-info/WHEEL +4 -0
  75. metaxy-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,187 @@
1
+ """SQLite metadata store - thin wrapper around IbisMetadataStore."""
2
+
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ import polars as pl
7
+
8
+ if TYPE_CHECKING:
9
+ from metaxy.metadata_store.base import MetadataStore
10
+
11
+ from metaxy.data_versioning.hash_algorithms import HashAlgorithm
12
+ from metaxy.metadata_store.ibis import IbisMetadataStore
13
+
14
+
15
+ class SQLiteMetadataStore(IbisMetadataStore):
16
+ """
17
+ SQLite metadata store using Ibis backend.
18
+
19
+ Convenience wrapper that configures IbisMetadataStore for SQLite.
20
+
21
+ Hash algorithm support:
22
+ - MD5: Available (built-in SQLite function via extension)
23
+
24
+ Components:
25
+ - joiner: NarwhalsJoiner (works with any backend)
26
+ - calculator: PolarsDataVersionCalculator (SQLite always uses Polars, no native compute)
27
+ - diff_resolver: NarwhalsDiffResolver
28
+
29
+ Examples:
30
+ >>> # Local file database
31
+ >>> with SQLiteMetadataStore("metadata.db") as store:
32
+ ... store.write_metadata(MyFeature, df)
33
+
34
+ >>> # In-memory database
35
+ >>> with SQLiteMetadataStore(":memory:") as store:
36
+ ... store.write_metadata(MyFeature, df)
37
+
38
+ >>> # Explicit path
39
+ >>> store = SQLiteMetadataStore(Path("/path/to/metadata.db"))
40
+ >>> with store:
41
+ ... store.write_metadata(MyFeature, df)
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ database: str | Path,
47
+ *,
48
+ fallback_stores: list["MetadataStore"] | None = None,
49
+ **kwargs,
50
+ ):
51
+ """
52
+ Initialize SQLite metadata store.
53
+
54
+ Args:
55
+ database: Database connection string or path.
56
+ - File path: "metadata.db" or Path("metadata.db")
57
+ - In-memory: ":memory:"
58
+
59
+ Note: Parent directories are NOT created automatically. Ensure paths exist
60
+ before initializing the store.
61
+ fallback_stores: Ordered list of read-only fallback stores.
62
+ **kwargs: Passed to IbisMetadataStore (e.g., hash_algorithm, prefer_native)
63
+ """
64
+ database_str = str(database)
65
+
66
+ # Build connection params for Ibis SQLite backend
67
+ connection_params = {"database": database_str}
68
+
69
+ self.database = database_str
70
+
71
+ # Initialize Ibis store with SQLite backend
72
+ super().__init__(
73
+ backend="sqlite",
74
+ connection_params=connection_params,
75
+ fallback_stores=fallback_stores,
76
+ **kwargs,
77
+ )
78
+
79
+ def _get_default_hash_algorithm(self) -> HashAlgorithm:
80
+ """Get default hash algorithm for SQLite stores.
81
+
82
+ Uses MD5 which is universally supported in SQLite.
83
+ """
84
+ return HashAlgorithm.MD5
85
+
86
+ @classmethod
87
+ def supports_structs(cls) -> bool:
88
+ """SQLite does not support struct types natively.
89
+
90
+ Returns:
91
+ False - SQLite stores structs as JSON strings
92
+ """
93
+ return False
94
+
95
+ def _supports_native_components(self) -> bool:
96
+ """SQLite stores do not support native data version calculations.
97
+
98
+ SQLite doesn't have built-in hash functions (MD5, SHA256, etc.),
99
+ so we always use Polars components for data versioning.
100
+ """
101
+ return False
102
+
103
+ def _serialize_for_storage(self, df: pl.DataFrame) -> pl.DataFrame:
104
+ """Serialize structs and arrays to JSON strings for SQLite storage.
105
+
106
+ SQLite doesn't support struct or array types, so we convert them to JSON strings.
107
+
108
+ Args:
109
+ df: DataFrame with potential struct/array columns
110
+
111
+ Returns:
112
+ DataFrame with struct/array columns converted to JSON strings
113
+ """
114
+ # Convert struct and array columns to JSON strings
115
+ for col_name in df.columns:
116
+ dtype = df.schema[col_name]
117
+ if isinstance(dtype, pl.Struct):
118
+ # Convert struct to JSON string
119
+ df = df.with_columns(
120
+ pl.col(col_name).struct.json_encode().alias(col_name)
121
+ )
122
+ elif isinstance(dtype, pl.List):
123
+ # Convert array/list to JSON string
124
+ # Note: Polars doesn't have native list.json_encode(), so we use map_elements
125
+ import json
126
+
127
+ df = df.with_columns(
128
+ pl.col(col_name)
129
+ .map_elements(
130
+ lambda x: None if x is None else json.dumps(x),
131
+ return_dtype=pl.Utf8,
132
+ )
133
+ .alias(col_name)
134
+ )
135
+
136
+ return df
137
+
138
+ def _deserialize_from_storage(self, df: pl.DataFrame) -> pl.DataFrame:
139
+ """Deserialize JSON strings back to structs and arrays.
140
+
141
+ Converts JSON string columns back to their original struct/array types.
142
+
143
+ Args:
144
+ df: DataFrame with JSON string columns
145
+
146
+ Returns:
147
+ DataFrame with JSON strings converted back to structs/arrays
148
+ """
149
+ # Known struct and array columns with their expected dtypes
150
+ # data_version is a struct, fields is a list of structs
151
+ # Migration system columns: operation_ids, expected_steps (list of strings),
152
+ # migration_yaml (struct), affected_features (list of strings)
153
+
154
+ # Columns that need JSON deserialization with specific dtypes
155
+ json_columns = {
156
+ "data_version": None, # Infer from data
157
+ "migration_yaml": None, # Infer from data
158
+ # "feature_spec": Leave as JSON string - contains enum values that can't be parsed
159
+ "operation_ids": pl.List(pl.Utf8), # List of strings
160
+ "expected_steps": pl.List(pl.Utf8), # List of strings
161
+ "affected_features": pl.List(pl.Utf8), # List of strings
162
+ }
163
+
164
+ # Deserialize JSON columns
165
+ for col_name, dtype in json_columns.items():
166
+ if col_name in df.columns and df.schema[col_name] == pl.Utf8:
167
+ if len(df) > 0:
168
+ if dtype is None:
169
+ # Infer dtype from sample value
170
+ sample_value = df[col_name].drop_nulls().head(1)
171
+ if len(sample_value) > 0:
172
+ inferred_series = sample_value.str.json_decode()
173
+ inferred_dtype = inferred_series.dtype
174
+ df = df.with_columns(
175
+ pl.col(col_name)
176
+ .str.json_decode(dtype=inferred_dtype)
177
+ .alias(col_name)
178
+ )
179
+ else:
180
+ # Use provided dtype
181
+ df = df.with_columns(
182
+ pl.col(col_name)
183
+ .str.json_decode(dtype=dtype)
184
+ .alias(col_name)
185
+ )
186
+
187
+ return df
@@ -0,0 +1,257 @@
1
+ """System table storage layer for metadata store.
2
+
3
+ Provides type-safe access to migration system tables using struct-based storage.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from collections.abc import Iterator
9
+ from contextlib import contextmanager
10
+ from contextvars import ContextVar
11
+ from datetime import datetime, timezone
12
+ from typing import Any
13
+
14
+ import narwhals as nw
15
+ import polars as pl
16
+
17
+ from metaxy.metadata_store._protocols import MetadataStoreProtocol
18
+ from metaxy.models.types import FeatureKey
19
+
20
+ # System namespace
21
+ SYSTEM_NAMESPACE = "metaxy-system"
22
+
23
+ # System table keys
24
+ FEATURE_VERSIONS_KEY = FeatureKey([SYSTEM_NAMESPACE, "feature_versions"])
25
+ MIGRATION_EVENTS_KEY = FeatureKey([SYSTEM_NAMESPACE, "migration_events"])
26
+ # Note: No migrations table - definitions live in YAML files, only events are stored
27
+
28
+ # Context variable for suppressing feature_version warning in migrations
29
+ _suppress_feature_version_warning: ContextVar[bool] = ContextVar(
30
+ "_suppress_feature_version_warning", default=False
31
+ )
32
+
33
+
34
+ @contextmanager
35
+ def allow_feature_version_override() -> Iterator[None]:
36
+ """
37
+ Context manager to suppress warnings when writing metadata with pre-existing feature_version.
38
+
39
+ This should only be used in migration code where writing historical feature versions
40
+ is intentional and necessary.
41
+
42
+ Example:
43
+ >>> with allow_feature_version_override():
44
+ ... # DataFrame already has feature_version column from migration
45
+ ... store.write_metadata(MyFeature, df_with_feature_version)
46
+ """
47
+ token = _suppress_feature_version_warning.set(True)
48
+ try:
49
+ yield
50
+ finally:
51
+ _suppress_feature_version_warning.reset(token)
52
+
53
+
54
+ # Common Polars schemas for system tables
55
+ # TODO: Migrate to use METAXY_*_COL constants instead of plain names
56
+ FEATURE_VERSIONS_SCHEMA = {
57
+ "feature_key": pl.String,
58
+ "feature_version": pl.String, # TODO: Use METAXY_FEATURE_VERSION_COL
59
+ "recorded_at": pl.Datetime("us"),
60
+ "feature_spec": pl.String,
61
+ "feature_class_path": pl.String,
62
+ "snapshot_version": pl.String, # TODO: Use METAXY_SNAPSHOT_ID_COL
63
+ }
64
+
65
+ MIGRATION_EVENTS_SCHEMA = {
66
+ "migration_id": pl.String,
67
+ "event_type": pl.String, # "started", "feature_started", "feature_completed", "completed", "failed"
68
+ "timestamp": pl.Datetime("us"),
69
+ "feature_key": pl.String, # Empty for migration-level events
70
+ "rows_affected": pl.Int64,
71
+ "error_message": pl.String, # Empty if no error
72
+ }
73
+
74
+
75
+ class SystemTableStorage:
76
+ """Storage layer for migration system tables.
77
+
78
+ Provides type-safe access to migration snapshots, migrations, and events.
79
+ Uses struct-based storage (not JSON/bytes) for efficient queries.
80
+
81
+ Status is computed at query-time from events (append-only).
82
+ """
83
+
84
+ def __init__(self, store: MetadataStoreProtocol):
85
+ """Initialize storage layer.
86
+
87
+ Args:
88
+ store: Metadata store to use for system tables
89
+ """
90
+ self.store = store
91
+
92
+ # ========== Migrations ==========
93
+ # Note: Migration definitions are stored in YAML files (git), not in the database.
94
+ # Only execution events are stored in DB for tracking progress and state.
95
+
96
+ def list_executed_migrations(self) -> list[str]:
97
+ """List all migration IDs that have execution events.
98
+
99
+ Returns:
100
+ List of migration IDs that have been started/executed
101
+ """
102
+ lazy = self.store._read_metadata_native(MIGRATION_EVENTS_KEY)
103
+
104
+ if lazy is None:
105
+ return []
106
+
107
+ df = lazy.select("migration_id").unique().collect().to_polars()
108
+ return df["migration_id"].to_list()
109
+
110
+ # ========== Events ==========
111
+
112
+ def write_event(
113
+ self,
114
+ migration_id: str,
115
+ event_type: str,
116
+ feature_key: str = "",
117
+ rows_affected: int = 0,
118
+ error_message: str = "",
119
+ ) -> None:
120
+ """Write migration event to system table (append-only).
121
+
122
+ Args:
123
+ migration_id: Migration this event belongs to
124
+ event_type: Event type ("started", "feature_started", "feature_completed", "completed", "failed")
125
+ feature_key: Feature key (empty for migration-level events)
126
+ rows_affected: Number of rows affected (for feature events)
127
+ error_message: Error message (empty if no error)
128
+ """
129
+ record = pl.DataFrame(
130
+ {
131
+ "migration_id": [migration_id],
132
+ "event_type": [event_type],
133
+ "timestamp": [datetime.now(timezone.utc)],
134
+ "feature_key": [feature_key],
135
+ "rows_affected": [rows_affected],
136
+ "error_message": [error_message],
137
+ },
138
+ schema=MIGRATION_EVENTS_SCHEMA,
139
+ )
140
+ self.store._write_metadata_impl(MIGRATION_EVENTS_KEY, record)
141
+
142
+ def get_migration_events(self, migration_id: str) -> nw.LazyFrame[Any]:
143
+ """Get all events for a migration.
144
+
145
+ Args:
146
+ migration_id: Migration ID
147
+
148
+ Returns:
149
+ Lazy frame with events sorted by timestamp
150
+ """
151
+ lazy = self.store._read_metadata_native(
152
+ MIGRATION_EVENTS_KEY,
153
+ filters=[nw.col("migration_id") == migration_id],
154
+ )
155
+
156
+ if lazy is None:
157
+ # No events yet
158
+ return nw.from_native(pl.DataFrame(schema=MIGRATION_EVENTS_SCHEMA).lazy())
159
+
160
+ return lazy.sort("timestamp", descending=False)
161
+
162
+ def get_migration_status(self, migration_id: str) -> str:
163
+ """Compute migration status from events at query-time.
164
+
165
+ Args:
166
+ migration_id: Migration ID
167
+
168
+ Returns:
169
+ Status: "not_started", "in_progress", "completed", "failed"
170
+ """
171
+ events_lazy = self.get_migration_events(migration_id)
172
+ events_df = events_lazy.collect().to_polars()
173
+
174
+ if events_df.height == 0:
175
+ return "not_started"
176
+
177
+ # Get latest event
178
+ latest_event = events_df.sort("timestamp", descending=True).head(1)
179
+ latest_event_type = latest_event["event_type"][0]
180
+
181
+ if latest_event_type == "completed":
182
+ return "completed"
183
+ elif latest_event_type == "failed":
184
+ return "failed"
185
+ elif latest_event_type in ("started", "feature_started", "feature_completed"):
186
+ return "in_progress"
187
+
188
+ return "not_started"
189
+
190
+ def is_feature_completed(self, migration_id: str, feature_key: str) -> bool:
191
+ """Check if a specific feature completed successfully in a migration.
192
+
193
+ Args:
194
+ migration_id: Migration ID
195
+ feature_key: Feature key to check
196
+
197
+ Returns:
198
+ True if feature completed without errors
199
+ """
200
+ events_lazy = self.get_migration_events(migration_id)
201
+ events_df = (
202
+ events_lazy.filter(
203
+ (nw.col("feature_key") == feature_key)
204
+ & (nw.col("event_type") == "feature_completed")
205
+ & (nw.col("error_message") == "")
206
+ )
207
+ .collect()
208
+ .to_polars()
209
+ )
210
+
211
+ return events_df.height > 0
212
+
213
+ def get_completed_features(self, migration_id: str) -> list[str]:
214
+ """Get list of features that completed successfully in a migration.
215
+
216
+ Args:
217
+ migration_id: Migration ID
218
+
219
+ Returns:
220
+ List of feature keys
221
+ """
222
+ events_lazy = self.get_migration_events(migration_id)
223
+ events_df = (
224
+ events_lazy.filter(
225
+ (nw.col("event_type") == "feature_completed")
226
+ & (nw.col("error_message") == "")
227
+ )
228
+ .collect()
229
+ .to_polars()
230
+ )
231
+
232
+ return events_df["feature_key"].unique().to_list()
233
+
234
+ def get_failed_features(self, migration_id: str) -> dict[str, str]:
235
+ """Get features that failed in a migration with error messages.
236
+
237
+ Args:
238
+ migration_id: Migration ID
239
+
240
+ Returns:
241
+ Dict mapping feature key to error message
242
+ """
243
+ events_lazy = self.get_migration_events(migration_id)
244
+ events_df = (
245
+ events_lazy.filter(
246
+ (nw.col("event_type") == "feature_completed")
247
+ & (nw.col("error_message") != "")
248
+ )
249
+ .collect()
250
+ .to_polars()
251
+ )
252
+
253
+ result = {}
254
+ for row in events_df.iter_rows(named=True):
255
+ result[row["feature_key"]] = row["error_message"]
256
+
257
+ return result
@@ -0,0 +1,34 @@
1
+ """Migration system for metadata version updates."""
2
+
3
+ from metaxy.metadata_store.system_tables import SystemTableStorage
4
+ from metaxy.migrations.detector import detect_migration
5
+ from metaxy.migrations.executor import MigrationExecutor
6
+ from metaxy.migrations.models import (
7
+ CustomMigration,
8
+ DiffMigration,
9
+ FullGraphMigration,
10
+ Migration,
11
+ MigrationResult,
12
+ )
13
+ from metaxy.migrations.ops import (
14
+ BaseOperation,
15
+ DataVersionReconciliation,
16
+ MetadataBackfill,
17
+ )
18
+
19
+ __all__ = [
20
+ # Core migration types
21
+ "Migration",
22
+ "DiffMigration",
23
+ "FullGraphMigration",
24
+ "CustomMigration",
25
+ "MigrationResult",
26
+ # Operations (for custom migrations)
27
+ "BaseOperation",
28
+ "DataVersionReconciliation",
29
+ "MetadataBackfill",
30
+ # Migration workflow
31
+ "detect_migration",
32
+ "MigrationExecutor",
33
+ "SystemTableStorage",
34
+ ]
@@ -0,0 +1,153 @@
1
+ """Feature change detection for automatic migration generation."""
2
+
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from metaxy.graph.diff.differ import GraphDiffer
8
+ from metaxy.models.feature import FeatureGraph
9
+
10
+ if TYPE_CHECKING:
11
+ from metaxy.metadata_store.base import MetadataStore
12
+ from metaxy.migrations.models import DiffMigration
13
+
14
+
15
+ def detect_migration(
16
+ store: "MetadataStore",
17
+ from_snapshot_version: str | None = None,
18
+ ops: list[dict[str, Any]] | None = None,
19
+ migrations_dir: Path | None = None,
20
+ name: str | None = None,
21
+ ) -> "DiffMigration | None":
22
+ """Detect migration needed between snapshots and write YAML file.
23
+
24
+ Compares the latest snapshot in the store (or specified from_snapshot_version)
25
+ with the current active graph to detect changes and generate a migration YAML file.
26
+
27
+ Args:
28
+ store: Metadata store containing snapshot metadata
29
+ from_snapshot_version: Source snapshot version (defaults to latest in store)
30
+ ops: List of operation dicts with "type" field (defaults to [{"type": "metaxy.migrations.ops.DataVersionReconciliation"}])
31
+ migrations_dir: Directory to write migration YAML (defaults to .metaxy/migrations/)
32
+ name: Migration name (creates {timestamp}_{name} ID and filename)
33
+
34
+ Returns:
35
+ DiffMigration if changes detected and written, None otherwise
36
+
37
+ Example:
38
+ >>> # Compare latest snapshot in store vs current graph
39
+ >>> with store:
40
+ ... migration = detect_migration(store)
41
+ ... if migration:
42
+ ... print(f"Migration written to {migration.yaml_path}")
43
+
44
+ >>> # Use custom operation
45
+ >>> migration = detect_migration(store, ops=[{"type": "myproject.ops.CustomOp"}])
46
+
47
+ >>> # Use custom name
48
+ >>> migration = detect_migration(store, name="example_migration")
49
+ """
50
+ from metaxy.migrations.models import DiffMigration
51
+
52
+ differ = GraphDiffer()
53
+
54
+ # Get from_snapshot_version (use latest if not specified)
55
+ if from_snapshot_version is None:
56
+ snapshots = store.read_graph_snapshots()
57
+ if snapshots.height == 0:
58
+ # No snapshots in store - nothing to migrate from
59
+ return None
60
+ from_snapshot_version = snapshots["snapshot_version"][0]
61
+
62
+ # At this point, from_snapshot_version is guaranteed to be a str
63
+ assert from_snapshot_version is not None # Type narrowing for type checker
64
+
65
+ # Get to_snapshot_version from current active graph
66
+ active_graph = FeatureGraph.get_active()
67
+ if len(active_graph.features_by_key) == 0:
68
+ # No features in active graph - nothing to migrate to
69
+ return None
70
+
71
+ to_snapshot_version = active_graph.snapshot_version
72
+
73
+ # Check if versions are the same (no changes)
74
+ if from_snapshot_version == to_snapshot_version:
75
+ return None
76
+
77
+ # Load snapshot data using GraphDiffer
78
+ try:
79
+ from_snapshot_data = differ.load_snapshot_data(store, from_snapshot_version)
80
+ except ValueError:
81
+ # Snapshot not found - nothing to migrate from
82
+ return None
83
+
84
+ # Build snapshot data for to_snapshot (current graph)
85
+ to_snapshot_data = active_graph.to_snapshot()
86
+
87
+ # Compute GraphDiff using GraphDiffer
88
+ graph_diff = differ.diff(
89
+ from_snapshot_data,
90
+ to_snapshot_data,
91
+ from_snapshot_version,
92
+ to_snapshot_version,
93
+ )
94
+
95
+ # Check if there are any changes
96
+ if not graph_diff.has_changes:
97
+ return None
98
+
99
+ # Generate migration ID (timestamp first for sorting)
100
+ timestamp = datetime.now(timezone.utc)
101
+ timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
102
+ if name is not None:
103
+ migration_id = f"{timestamp_str}_{name}"
104
+ else:
105
+ migration_id = f"{timestamp_str}"
106
+
107
+ # ops is required - caller must specify
108
+ if ops is None:
109
+ raise ValueError(
110
+ "ops parameter is required - must explicitly specify migration operations. "
111
+ "Example: ops=[{'type': 'metaxy.migrations.ops.DataVersionReconciliation'}]"
112
+ )
113
+
114
+ # Default migrations directory
115
+ if migrations_dir is None:
116
+ migrations_dir = Path(".metaxy/migrations")
117
+
118
+ migrations_dir.mkdir(parents=True, exist_ok=True)
119
+
120
+ # Find parent migration (latest migration in chain)
121
+ from metaxy.migrations.loader import find_latest_migration
122
+
123
+ parent = find_latest_migration(migrations_dir)
124
+ if parent is None:
125
+ parent = "initial"
126
+
127
+ # Create minimal DiffMigration - affected_features and description are computed on-demand
128
+ migration = DiffMigration(
129
+ migration_id=migration_id,
130
+ created_at=timestamp,
131
+ parent=parent,
132
+ from_snapshot_version=from_snapshot_version,
133
+ to_snapshot_version=to_snapshot_version,
134
+ ops=ops,
135
+ )
136
+
137
+ # Write migration YAML file
138
+ import yaml
139
+
140
+ yaml_path = migrations_dir / f"{migration_id}.yaml"
141
+ migration_yaml = {
142
+ "id": migration.migration_id,
143
+ "created_at": migration.created_at.isoformat(),
144
+ "parent": migration.parent,
145
+ "from_snapshot_version": migration.from_snapshot_version,
146
+ "to_snapshot_version": migration.to_snapshot_version,
147
+ "ops": migration.ops,
148
+ }
149
+
150
+ with open(yaml_path, "w") as f:
151
+ yaml.safe_dump(migration_yaml, f, sort_keys=False, default_flow_style=False)
152
+
153
+ return migration