metaxy 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of metaxy might be problematic. Click here for more details.
- metaxy/__init__.py +61 -0
- metaxy/_testing.py +542 -0
- metaxy/_utils.py +16 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +76 -0
- metaxy/cli/context.py +71 -0
- metaxy/cli/graph.py +576 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +42 -0
- metaxy/cli/metadata.py +271 -0
- metaxy/cli/migrations.py +862 -0
- metaxy/cli/push.py +55 -0
- metaxy/config.py +450 -0
- metaxy/data_versioning/__init__.py +24 -0
- metaxy/data_versioning/calculators/__init__.py +13 -0
- metaxy/data_versioning/calculators/base.py +97 -0
- metaxy/data_versioning/calculators/duckdb.py +186 -0
- metaxy/data_versioning/calculators/ibis.py +225 -0
- metaxy/data_versioning/calculators/polars.py +135 -0
- metaxy/data_versioning/diff/__init__.py +15 -0
- metaxy/data_versioning/diff/base.py +150 -0
- metaxy/data_versioning/diff/narwhals.py +108 -0
- metaxy/data_versioning/hash_algorithms.py +19 -0
- metaxy/data_versioning/joiners/__init__.py +9 -0
- metaxy/data_versioning/joiners/base.py +70 -0
- metaxy/data_versioning/joiners/narwhals.py +235 -0
- metaxy/entrypoints.py +309 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/alembic.py +326 -0
- metaxy/ext/sqlmodel.py +172 -0
- metaxy/ext/sqlmodel_system_tables.py +139 -0
- metaxy/graph/__init__.py +21 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +399 -0
- metaxy/graph/diff/differ.py +740 -0
- metaxy/graph/diff/models.py +418 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +274 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +320 -0
- metaxy/graph/diff/rendering/rich.py +165 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +31 -0
- metaxy/metadata_store/_protocols.py +38 -0
- metaxy/metadata_store/base.py +1676 -0
- metaxy/metadata_store/clickhouse.py +161 -0
- metaxy/metadata_store/duckdb.py +167 -0
- metaxy/metadata_store/exceptions.py +43 -0
- metaxy/metadata_store/ibis.py +451 -0
- metaxy/metadata_store/memory.py +228 -0
- metaxy/metadata_store/sqlite.py +187 -0
- metaxy/metadata_store/system_tables.py +257 -0
- metaxy/migrations/__init__.py +34 -0
- metaxy/migrations/detector.py +153 -0
- metaxy/migrations/executor.py +208 -0
- metaxy/migrations/loader.py +260 -0
- metaxy/migrations/models.py +718 -0
- metaxy/migrations/ops.py +390 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +6 -0
- metaxy/models/constants.py +24 -0
- metaxy/models/feature.py +665 -0
- metaxy/models/feature_spec.py +105 -0
- metaxy/models/field.py +25 -0
- metaxy/models/plan.py +155 -0
- metaxy/models/types.py +157 -0
- metaxy/py.typed +0 -0
- metaxy-0.0.0.dist-info/METADATA +247 -0
- metaxy-0.0.0.dist-info/RECORD +75 -0
- metaxy-0.0.0.dist-info/WHEEL +4 -0
- metaxy-0.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
"""Type-safe migration models with Python class paths.
|
|
2
|
+
|
|
3
|
+
Refactored migration system using:
|
|
4
|
+
- Python class paths for polymorphic deserialization
|
|
5
|
+
- Struct-based storage for graph data
|
|
6
|
+
- Event-based status tracking
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
import pydantic
|
|
14
|
+
from pydantic.types import AwareDatetime
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from metaxy.graph.diff.diff_models import GraphDiff
|
|
18
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Migration(pydantic.BaseModel, ABC): # pyright: ignore[reportUnsafeMultipleInheritance]
|
|
22
|
+
"""Abstract base class for all migrations.
|
|
23
|
+
|
|
24
|
+
Subclasses must define:
|
|
25
|
+
- migration_type: Class path as Literal for polymorphic deserialization
|
|
26
|
+
- execute(): Migration logic
|
|
27
|
+
|
|
28
|
+
The migration_type field is used for storage and deserialization.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
migration_id: str
|
|
32
|
+
created_at: AwareDatetime
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def migration_type(self) -> str:
|
|
37
|
+
"""Get migration type (Python class path).
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Full Python class path (e.g., "metaxy.migrations.models.DiffMigration")
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def execute(
|
|
46
|
+
self,
|
|
47
|
+
store: "MetadataStore",
|
|
48
|
+
*,
|
|
49
|
+
dry_run: bool = False,
|
|
50
|
+
) -> "MigrationResult":
|
|
51
|
+
"""Execute the migration.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
store: Metadata store to operate on
|
|
55
|
+
dry_run: If True, only validate without executing
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
MigrationResult with execution details
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
Exception: If migration fails
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def get_affected_features(self, store: "MetadataStore") -> list[str]:
|
|
67
|
+
"""Get list of affected feature keys in topological order.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
store: Metadata store for computing affected features
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
List of feature key strings
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
def to_storage_dict(self) -> dict[str, Any]:
|
|
78
|
+
"""Convert to dict for storage.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Dict with all fields including migration_type
|
|
82
|
+
"""
|
|
83
|
+
data = self.model_dump(mode="python")
|
|
84
|
+
data["migration_type"] = self.migration_type
|
|
85
|
+
return data
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def from_storage_dict(data: dict[str, Any]) -> "Migration":
|
|
89
|
+
"""Deserialize migration from storage dict.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
data: Dict with migration_type and other fields
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Migration instance of appropriate subclass
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If migration_type is invalid or class not found
|
|
99
|
+
"""
|
|
100
|
+
migration_type = data.get("migration_type")
|
|
101
|
+
if not migration_type:
|
|
102
|
+
raise ValueError("Missing migration_type field")
|
|
103
|
+
|
|
104
|
+
# Dynamically import the class
|
|
105
|
+
try:
|
|
106
|
+
module_path, class_name = migration_type.rsplit(".", 1)
|
|
107
|
+
module = __import__(module_path, fromlist=[class_name])
|
|
108
|
+
cls = getattr(module, class_name)
|
|
109
|
+
|
|
110
|
+
if not issubclass(cls, Migration):
|
|
111
|
+
raise TypeError(
|
|
112
|
+
f"{migration_type} must be a subclass of Migration, got {cls}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return cls.model_validate(data)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"Failed to load migration class {migration_type}: {e}"
|
|
119
|
+
) from e
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class DiffMigration(Migration):
|
|
123
|
+
"""Migration based on graph diff between two snapshots.
|
|
124
|
+
|
|
125
|
+
Migrations form a chain via parent IDs (like git commits):
|
|
126
|
+
- id: Unique identifier for this migration
|
|
127
|
+
- parent: ID of parent migration ("initial" for first migration)
|
|
128
|
+
- from_snapshot_version: Source snapshot version
|
|
129
|
+
- to_snapshot_version: Target snapshot version
|
|
130
|
+
- ops: List of operation dicts with "type" field
|
|
131
|
+
|
|
132
|
+
The parent chain ensures migrations are applied in correct order.
|
|
133
|
+
Multiple heads (two migrations with no children) is an error.
|
|
134
|
+
|
|
135
|
+
All other information is computed on-demand:
|
|
136
|
+
- affected_features: Computed from GraphDiff when accessed
|
|
137
|
+
- operations: Instantiated from ops
|
|
138
|
+
- description: Auto-generated from affected features count
|
|
139
|
+
|
|
140
|
+
The graph diff is computed on-demand when needed using GraphDiffer.
|
|
141
|
+
|
|
142
|
+
Examples:
|
|
143
|
+
First migration:
|
|
144
|
+
DiffMigration(
|
|
145
|
+
migration_id="20250113_120000",
|
|
146
|
+
parent="initial",
|
|
147
|
+
from_snapshot_version="abc123...",
|
|
148
|
+
to_snapshot_version="def456...",
|
|
149
|
+
created_at=datetime.now(timezone.utc),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
Subsequent migration:
|
|
153
|
+
DiffMigration(
|
|
154
|
+
migration_id="20250113_130000",
|
|
155
|
+
parent="20250113_120000",
|
|
156
|
+
from_snapshot_version="def456...",
|
|
157
|
+
to_snapshot_version="ghi789...",
|
|
158
|
+
created_at=datetime.now(timezone.utc),
|
|
159
|
+
)
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
# Stored fields - persisted to YAML in git
|
|
163
|
+
parent: str # Parent migration ID or "initial"
|
|
164
|
+
from_snapshot_version: str
|
|
165
|
+
to_snapshot_version: str
|
|
166
|
+
ops: list[dict[str, Any]] # Required - must explicitly specify operations
|
|
167
|
+
|
|
168
|
+
# Private attribute for caching computed graph diff
|
|
169
|
+
_graph_diff_cache: "GraphDiff | None" = pydantic.PrivateAttr(default=None)
|
|
170
|
+
|
|
171
|
+
@pydantic.model_validator(mode="before")
|
|
172
|
+
@classmethod
|
|
173
|
+
def deserialize_json_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
174
|
+
"""Deserialize JSON strings for ops (from storage).
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
data: Raw migration data
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Data with deserialized JSON fields
|
|
181
|
+
"""
|
|
182
|
+
import json
|
|
183
|
+
|
|
184
|
+
data = dict(data)
|
|
185
|
+
|
|
186
|
+
# Deserialize ops from JSON string (from storage)
|
|
187
|
+
if isinstance(data.get("ops"), str):
|
|
188
|
+
data["ops"] = json.loads(data["ops"])
|
|
189
|
+
|
|
190
|
+
return data
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def migration_type(self) -> str:
|
|
194
|
+
"""Get migration type."""
|
|
195
|
+
return "metaxy.migrations.models.DiffMigration"
|
|
196
|
+
|
|
197
|
+
def _get_graph_diff(self, store: "MetadataStore") -> "GraphDiff":
|
|
198
|
+
"""Get or compute graph diff (cached).
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
store: Metadata store containing snapshots
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
GraphDiff between snapshots
|
|
205
|
+
"""
|
|
206
|
+
if self._graph_diff_cache is None:
|
|
207
|
+
self._graph_diff_cache = self.compute_graph_diff(store)
|
|
208
|
+
return self._graph_diff_cache
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def operations(self) -> list[Any]:
|
|
212
|
+
"""Get operations for this migration.
|
|
213
|
+
|
|
214
|
+
Instantiates operations from stored ops (list of dicts with "type" field).
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List of operation instances
|
|
218
|
+
"""
|
|
219
|
+
operations = []
|
|
220
|
+
for op_dict in self.ops:
|
|
221
|
+
op_type = op_dict.get("type")
|
|
222
|
+
if not op_type:
|
|
223
|
+
raise ValueError(f"Operation dict missing 'type' field: {op_dict}")
|
|
224
|
+
try:
|
|
225
|
+
# Dynamically import and instantiate the operation class
|
|
226
|
+
module_path, class_name = op_type.rsplit(".", 1)
|
|
227
|
+
module = __import__(module_path, fromlist=[class_name])
|
|
228
|
+
op_cls = getattr(module, class_name)
|
|
229
|
+
operations.append(op_cls())
|
|
230
|
+
except Exception as e:
|
|
231
|
+
raise ValueError(
|
|
232
|
+
f"Failed to instantiate operation {op_type}: {e}"
|
|
233
|
+
) from e
|
|
234
|
+
|
|
235
|
+
return operations
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def description(self) -> str:
|
|
239
|
+
"""Get auto-generated description for migration.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Human-readable description based on affected features count
|
|
243
|
+
"""
|
|
244
|
+
# Note: This accesses affected_features property which needs store access
|
|
245
|
+
# For display purposes, this is called after affected_features is computed
|
|
246
|
+
return self.auto_description
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def auto_description(self) -> str:
|
|
250
|
+
"""Generate automatic description (requires store context).
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Human-readable description based on affected features
|
|
254
|
+
"""
|
|
255
|
+
# This is used internally - callers should use get_description(store)
|
|
256
|
+
return "Migration: snapshot reconciliation"
|
|
257
|
+
|
|
258
|
+
def get_description(self, store: "MetadataStore") -> str:
|
|
259
|
+
"""Get description for migration.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
store: Metadata store for computing affected features
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Description string
|
|
266
|
+
"""
|
|
267
|
+
affected = self.get_affected_features(store)
|
|
268
|
+
num_features = len(affected)
|
|
269
|
+
if num_features == 0:
|
|
270
|
+
return "No features affected"
|
|
271
|
+
elif num_features == 1:
|
|
272
|
+
return f"Migration: {affected[0]}"
|
|
273
|
+
else:
|
|
274
|
+
return f"Migration: {num_features} features affected"
|
|
275
|
+
|
|
276
|
+
def get_affected_features(self, store: "MetadataStore") -> list[str]:
|
|
277
|
+
"""Get affected features in topological order (computed on-demand).
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
store: Metadata store containing snapshots (required for computation)
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
List of feature key strings in topological order
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
graph_diff = self._get_graph_diff(store)
|
|
287
|
+
|
|
288
|
+
# Get changed feature keys (root changes)
|
|
289
|
+
changed_keys = {
|
|
290
|
+
node.feature_key.to_string() for node in graph_diff.changed_nodes
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
# Also include added nodes (though they typically don't have existing data to migrate)
|
|
294
|
+
for node in graph_diff.added_nodes:
|
|
295
|
+
changed_keys.add(node.feature_key.to_string())
|
|
296
|
+
|
|
297
|
+
# Build dependency map from the GraphDiff added/changed nodes
|
|
298
|
+
# We need to compute downstream dependencies to find all affected features
|
|
299
|
+
from metaxy.graph.diff.models import GraphData, GraphNode
|
|
300
|
+
from metaxy.graph.diff.traversal import GraphWalker
|
|
301
|
+
from metaxy.models.feature import FeatureGraph
|
|
302
|
+
|
|
303
|
+
# Get the active graph to extract dependencies
|
|
304
|
+
active_graph = FeatureGraph.get_active()
|
|
305
|
+
|
|
306
|
+
# Build GraphData from active graph for dependency analysis
|
|
307
|
+
nodes_dict = {}
|
|
308
|
+
for feature_key, feature_cls in active_graph.features_by_key.items():
|
|
309
|
+
plan = active_graph.get_feature_plan(feature_key)
|
|
310
|
+
|
|
311
|
+
# Extract dependencies from plan
|
|
312
|
+
dependencies = []
|
|
313
|
+
if plan.deps:
|
|
314
|
+
for dep in plan.deps:
|
|
315
|
+
dependencies.append(dep.key)
|
|
316
|
+
|
|
317
|
+
nodes_dict[feature_key.to_string()] = GraphNode(
|
|
318
|
+
key=feature_key,
|
|
319
|
+
version=feature_cls.feature_version(),
|
|
320
|
+
dependencies=dependencies,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
to_graph_data = GraphData(
|
|
324
|
+
nodes=nodes_dict, snapshot_version=self.to_snapshot_version
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Build reverse dependency map (feature -> dependents)
|
|
328
|
+
dependents_map: dict[str, set[str]] = {}
|
|
329
|
+
for node in to_graph_data.nodes.values():
|
|
330
|
+
for dep_key in node.dependencies:
|
|
331
|
+
dep_key_str = dep_key.to_string()
|
|
332
|
+
if dep_key_str not in dependents_map:
|
|
333
|
+
dependents_map[dep_key_str] = set()
|
|
334
|
+
dependents_map[dep_key_str].add(node.key.to_string())
|
|
335
|
+
|
|
336
|
+
# Find all features affected (changed + their downstream)
|
|
337
|
+
affected = set(changed_keys)
|
|
338
|
+
queue = list(changed_keys)
|
|
339
|
+
while queue:
|
|
340
|
+
key_str = queue.pop(0)
|
|
341
|
+
if key_str in dependents_map:
|
|
342
|
+
for dependent in dependents_map[key_str]:
|
|
343
|
+
if dependent not in affected:
|
|
344
|
+
affected.add(dependent)
|
|
345
|
+
queue.append(dependent)
|
|
346
|
+
|
|
347
|
+
# Get topological order for affected features
|
|
348
|
+
walker = GraphWalker(to_graph_data)
|
|
349
|
+
sorted_nodes = walker.topological_sort(nodes_to_include=affected)
|
|
350
|
+
|
|
351
|
+
return [node.key.to_string() for node in sorted_nodes]
|
|
352
|
+
|
|
353
|
+
def compute_graph_diff(self, store: "MetadataStore") -> "GraphDiff":
|
|
354
|
+
"""Compute GraphDiff on-demand from snapshot versions.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
store: Metadata store containing snapshots
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
GraphDiff between from_snapshot_version and to_snapshot_version
|
|
361
|
+
|
|
362
|
+
Raises:
|
|
363
|
+
ValueError: If snapshots cannot be loaded
|
|
364
|
+
"""
|
|
365
|
+
from metaxy.graph.diff.differ import GraphDiffer
|
|
366
|
+
from metaxy.models.feature import FeatureGraph
|
|
367
|
+
|
|
368
|
+
differ = GraphDiffer()
|
|
369
|
+
|
|
370
|
+
# Load from_snapshot data from store
|
|
371
|
+
from_snapshot_data = differ.load_snapshot_data(
|
|
372
|
+
store, self.from_snapshot_version
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Try to load to_snapshot from store, if it doesn't exist use active graph
|
|
376
|
+
try:
|
|
377
|
+
to_snapshot_data = differ.load_snapshot_data(
|
|
378
|
+
store, self.to_snapshot_version
|
|
379
|
+
)
|
|
380
|
+
except ValueError:
|
|
381
|
+
# Snapshot not recorded yet, use active graph
|
|
382
|
+
active_graph = FeatureGraph.get_active()
|
|
383
|
+
if active_graph.snapshot_version != self.to_snapshot_version:
|
|
384
|
+
raise ValueError(
|
|
385
|
+
f"to_snapshot {self.to_snapshot_version} not found in store "
|
|
386
|
+
f"and doesn't match active graph ({active_graph.snapshot_version})"
|
|
387
|
+
)
|
|
388
|
+
to_snapshot_data = active_graph.to_snapshot()
|
|
389
|
+
|
|
390
|
+
# Compute diff
|
|
391
|
+
return differ.diff(
|
|
392
|
+
from_snapshot_data,
|
|
393
|
+
to_snapshot_data,
|
|
394
|
+
self.from_snapshot_version,
|
|
395
|
+
self.to_snapshot_version,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def execute(
|
|
399
|
+
self,
|
|
400
|
+
store: "MetadataStore",
|
|
401
|
+
*,
|
|
402
|
+
dry_run: bool = False,
|
|
403
|
+
) -> "MigrationResult":
|
|
404
|
+
"""Execute diff-based migration.
|
|
405
|
+
|
|
406
|
+
Process:
|
|
407
|
+
1. Execute each operation in the operations list
|
|
408
|
+
2. For each operation:
|
|
409
|
+
- Check if feature already completed (resume support)
|
|
410
|
+
- Execute operation
|
|
411
|
+
- Record event
|
|
412
|
+
3. Return result
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
store: Metadata store
|
|
416
|
+
dry_run: If True, only validate
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
MigrationResult
|
|
420
|
+
"""
|
|
421
|
+
from metaxy.metadata_store.system_tables import SystemTableStorage
|
|
422
|
+
|
|
423
|
+
storage = SystemTableStorage(store)
|
|
424
|
+
start_time = datetime.now(timezone.utc)
|
|
425
|
+
|
|
426
|
+
if not dry_run:
|
|
427
|
+
# Write started event
|
|
428
|
+
storage.write_event(self.migration_id, "started")
|
|
429
|
+
|
|
430
|
+
affected_features_list = []
|
|
431
|
+
errors = {}
|
|
432
|
+
rows_affected_total = 0
|
|
433
|
+
|
|
434
|
+
# Execute operations (currently only DataVersionReconciliation is supported)
|
|
435
|
+
from metaxy.migrations.ops import DataVersionReconciliation
|
|
436
|
+
|
|
437
|
+
# Get affected features (computed on-demand)
|
|
438
|
+
affected_features_to_process = self.get_affected_features(store)
|
|
439
|
+
|
|
440
|
+
if len(self.operations) == 1 and isinstance(
|
|
441
|
+
self.operations[0], DataVersionReconciliation
|
|
442
|
+
):
|
|
443
|
+
# DataVersionReconciliation applies to all affected features
|
|
444
|
+
op = self.operations[0]
|
|
445
|
+
|
|
446
|
+
for feature_key_str in affected_features_to_process:
|
|
447
|
+
# Check if already completed (resume support)
|
|
448
|
+
if not dry_run and storage.is_feature_completed(
|
|
449
|
+
self.migration_id, feature_key_str
|
|
450
|
+
):
|
|
451
|
+
affected_features_list.append(feature_key_str)
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
# Log feature started
|
|
455
|
+
if not dry_run:
|
|
456
|
+
storage.write_event(
|
|
457
|
+
self.migration_id,
|
|
458
|
+
"feature_started",
|
|
459
|
+
feature_key=feature_key_str,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
# Execute operation for this feature
|
|
464
|
+
rows_affected = op.execute_for_feature(
|
|
465
|
+
store,
|
|
466
|
+
feature_key_str,
|
|
467
|
+
from_snapshot_version=self.from_snapshot_version,
|
|
468
|
+
to_snapshot_version=self.to_snapshot_version,
|
|
469
|
+
dry_run=dry_run,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Log feature completed
|
|
473
|
+
if not dry_run:
|
|
474
|
+
storage.write_event(
|
|
475
|
+
self.migration_id,
|
|
476
|
+
"feature_completed",
|
|
477
|
+
feature_key=feature_key_str,
|
|
478
|
+
rows_affected=rows_affected,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
affected_features_list.append(feature_key_str)
|
|
482
|
+
rows_affected_total += rows_affected
|
|
483
|
+
|
|
484
|
+
except Exception as e:
|
|
485
|
+
error_msg = str(e)
|
|
486
|
+
errors[feature_key_str] = error_msg
|
|
487
|
+
|
|
488
|
+
# Log feature failed
|
|
489
|
+
if not dry_run:
|
|
490
|
+
storage.write_event(
|
|
491
|
+
self.migration_id,
|
|
492
|
+
"feature_completed",
|
|
493
|
+
feature_key=feature_key_str,
|
|
494
|
+
error_message=error_msg,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
continue
|
|
498
|
+
else:
|
|
499
|
+
# Future: Support other operation types here
|
|
500
|
+
raise NotImplementedError(
|
|
501
|
+
"Only DataVersionReconciliation is currently supported"
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
# Determine status
|
|
505
|
+
if dry_run:
|
|
506
|
+
status = "skipped"
|
|
507
|
+
elif len(errors) == 0:
|
|
508
|
+
status = "completed"
|
|
509
|
+
if not dry_run:
|
|
510
|
+
storage.write_event(self.migration_id, "completed")
|
|
511
|
+
else:
|
|
512
|
+
status = "failed"
|
|
513
|
+
if not dry_run:
|
|
514
|
+
storage.write_event(self.migration_id, "failed")
|
|
515
|
+
|
|
516
|
+
duration = (datetime.now(timezone.utc) - start_time).total_seconds()
|
|
517
|
+
|
|
518
|
+
return MigrationResult(
|
|
519
|
+
migration_id=self.migration_id,
|
|
520
|
+
status=status,
|
|
521
|
+
features_completed=len(affected_features_list),
|
|
522
|
+
features_failed=len(errors),
|
|
523
|
+
affected_features=affected_features_list,
|
|
524
|
+
errors=errors,
|
|
525
|
+
rows_affected=rows_affected_total,
|
|
526
|
+
duration_seconds=duration,
|
|
527
|
+
timestamp=start_time,
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class FullGraphMigration(Migration):
|
|
532
|
+
"""Migration that operates within a single snapshot.
|
|
533
|
+
|
|
534
|
+
Used for operations that don't involve graph structure changes,
|
|
535
|
+
such as backfills or custom transformations on existing features.
|
|
536
|
+
"""
|
|
537
|
+
|
|
538
|
+
snapshot_version: str
|
|
539
|
+
affected_features: list[str] = pydantic.Field(
|
|
540
|
+
default_factory=list
|
|
541
|
+
) # Features to process
|
|
542
|
+
operations: list[Any] = pydantic.Field(default_factory=list) # Custom operations
|
|
543
|
+
description: str | None = None
|
|
544
|
+
metadata: dict[str, Any] = pydantic.Field(default_factory=dict)
|
|
545
|
+
|
|
546
|
+
@pydantic.model_validator(mode="before")
|
|
547
|
+
@classmethod
|
|
548
|
+
def deserialize_json_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
549
|
+
"""Deserialize JSON strings for operations and metadata (from storage).
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
data: Raw migration data
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
Data with deserialized JSON fields
|
|
556
|
+
"""
|
|
557
|
+
import json
|
|
558
|
+
|
|
559
|
+
data = dict(data)
|
|
560
|
+
|
|
561
|
+
# Deserialize JSON strings (from storage)
|
|
562
|
+
if isinstance(data.get("operations"), str):
|
|
563
|
+
data["operations"] = json.loads(data["operations"])
|
|
564
|
+
|
|
565
|
+
if isinstance(data.get("metadata"), str):
|
|
566
|
+
data["metadata"] = json.loads(data["metadata"])
|
|
567
|
+
|
|
568
|
+
return data
|
|
569
|
+
|
|
570
|
+
@property
|
|
571
|
+
def migration_type(self) -> str:
|
|
572
|
+
"""Get migration type."""
|
|
573
|
+
return "metaxy.migrations.models.FullGraphMigration"
|
|
574
|
+
|
|
575
|
+
def get_affected_features(self, store: "MetadataStore") -> list[str]:
|
|
576
|
+
"""Get affected features.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
store: Metadata store (not used for FullGraphMigration)
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
List of feature key strings
|
|
583
|
+
"""
|
|
584
|
+
return self.affected_features
|
|
585
|
+
|
|
586
|
+
def execute(
|
|
587
|
+
self,
|
|
588
|
+
store: "MetadataStore",
|
|
589
|
+
*,
|
|
590
|
+
dry_run: bool = False,
|
|
591
|
+
) -> "MigrationResult":
|
|
592
|
+
"""Execute full graph migration.
|
|
593
|
+
|
|
594
|
+
Subclasses should implement custom logic here.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
store: Metadata store
|
|
598
|
+
dry_run: If True, only validate
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
MigrationResult
|
|
602
|
+
"""
|
|
603
|
+
# Base implementation: no-op
|
|
604
|
+
return MigrationResult(
|
|
605
|
+
migration_id=self.migration_id,
|
|
606
|
+
status="completed",
|
|
607
|
+
features_completed=0,
|
|
608
|
+
features_failed=0,
|
|
609
|
+
affected_features=[],
|
|
610
|
+
errors={},
|
|
611
|
+
rows_affected=0,
|
|
612
|
+
duration_seconds=0.0,
|
|
613
|
+
timestamp=datetime.now(timezone.utc),
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class CustomMigration(Migration):
|
|
618
|
+
"""Base class for user-defined custom migrations.
|
|
619
|
+
|
|
620
|
+
Users can subclass this to implement completely custom migration logic.
|
|
621
|
+
|
|
622
|
+
Example:
|
|
623
|
+
class S3BackfillMigration(CustomMigration):
|
|
624
|
+
s3_bucket: str
|
|
625
|
+
s3_prefix: str
|
|
626
|
+
|
|
627
|
+
@property
|
|
628
|
+
def migration_type(self) -> str:
|
|
629
|
+
return "myproject.migrations.S3BackfillMigration"
|
|
630
|
+
|
|
631
|
+
def execute(self, store, *, dry_run=False):
|
|
632
|
+
# Custom logic here
|
|
633
|
+
...
|
|
634
|
+
"""
|
|
635
|
+
|
|
636
|
+
@property
|
|
637
|
+
def migration_type(self) -> str:
|
|
638
|
+
"""Get migration type.
|
|
639
|
+
|
|
640
|
+
Subclasses must override this to return their full class path.
|
|
641
|
+
"""
|
|
642
|
+
return f"{self.__class__.__module__}.{self.__class__.__name__}"
|
|
643
|
+
|
|
644
|
+
def get_affected_features(self, store: "MetadataStore") -> list[str]:
|
|
645
|
+
"""Get affected features.
|
|
646
|
+
|
|
647
|
+
Args:
|
|
648
|
+
store: Metadata store (not used for CustomMigration base class)
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
Empty list (subclasses should override)
|
|
652
|
+
"""
|
|
653
|
+
return []
|
|
654
|
+
|
|
655
|
+
def execute(
|
|
656
|
+
self,
|
|
657
|
+
store: "MetadataStore",
|
|
658
|
+
*,
|
|
659
|
+
dry_run: bool = False,
|
|
660
|
+
) -> "MigrationResult":
|
|
661
|
+
"""Execute custom migration.
|
|
662
|
+
|
|
663
|
+
Subclasses must override this to implement custom logic.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
store: Metadata store
|
|
667
|
+
dry_run: If True, only validate
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
MigrationResult
|
|
671
|
+
|
|
672
|
+
Raises:
|
|
673
|
+
NotImplementedError: If not overridden by subclass
|
|
674
|
+
"""
|
|
675
|
+
raise NotImplementedError(
|
|
676
|
+
f"{self.__class__.__name__} must implement execute() method"
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
class MigrationResult(pydantic.BaseModel):
|
|
681
|
+
"""Result of executing a migration."""
|
|
682
|
+
|
|
683
|
+
migration_id: str
|
|
684
|
+
status: str # "completed", "failed", "skipped"
|
|
685
|
+
features_completed: int
|
|
686
|
+
features_failed: int
|
|
687
|
+
affected_features: list[str]
|
|
688
|
+
errors: dict[str, str] # feature_key -> error message
|
|
689
|
+
rows_affected: int
|
|
690
|
+
duration_seconds: float
|
|
691
|
+
timestamp: AwareDatetime
|
|
692
|
+
|
|
693
|
+
def summary(self) -> str:
|
|
694
|
+
"""Human-readable summary of migration result.
|
|
695
|
+
|
|
696
|
+
Returns:
|
|
697
|
+
Multi-line summary string
|
|
698
|
+
"""
|
|
699
|
+
lines = [
|
|
700
|
+
f"Migration: {self.migration_id}",
|
|
701
|
+
f"Status: {self.status.upper()}",
|
|
702
|
+
f"Timestamp: {self.timestamp.isoformat()}",
|
|
703
|
+
f"Duration: {self.duration_seconds:.2f}s",
|
|
704
|
+
f"Features: {self.features_completed} completed, {self.features_failed} failed",
|
|
705
|
+
f"Rows affected: {self.rows_affected}",
|
|
706
|
+
]
|
|
707
|
+
|
|
708
|
+
if self.affected_features:
|
|
709
|
+
lines.append("\nFeatures processed:")
|
|
710
|
+
for feature in self.affected_features:
|
|
711
|
+
lines.append(f" ✓ {feature}")
|
|
712
|
+
|
|
713
|
+
if self.errors:
|
|
714
|
+
lines.append("\nErrors:")
|
|
715
|
+
for feature, error in self.errors.items():
|
|
716
|
+
lines.append(f" ✗ {feature}: {error}")
|
|
717
|
+
|
|
718
|
+
return "\n".join(lines)
|