metaxy 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metaxy might be problematic. Click here for more details.

Files changed (75) hide show
  1. metaxy/__init__.py +61 -0
  2. metaxy/_testing.py +542 -0
  3. metaxy/_utils.py +16 -0
  4. metaxy/_version.py +1 -0
  5. metaxy/cli/app.py +76 -0
  6. metaxy/cli/context.py +71 -0
  7. metaxy/cli/graph.py +576 -0
  8. metaxy/cli/graph_diff.py +290 -0
  9. metaxy/cli/list.py +42 -0
  10. metaxy/cli/metadata.py +271 -0
  11. metaxy/cli/migrations.py +862 -0
  12. metaxy/cli/push.py +55 -0
  13. metaxy/config.py +450 -0
  14. metaxy/data_versioning/__init__.py +24 -0
  15. metaxy/data_versioning/calculators/__init__.py +13 -0
  16. metaxy/data_versioning/calculators/base.py +97 -0
  17. metaxy/data_versioning/calculators/duckdb.py +186 -0
  18. metaxy/data_versioning/calculators/ibis.py +225 -0
  19. metaxy/data_versioning/calculators/polars.py +135 -0
  20. metaxy/data_versioning/diff/__init__.py +15 -0
  21. metaxy/data_versioning/diff/base.py +150 -0
  22. metaxy/data_versioning/diff/narwhals.py +108 -0
  23. metaxy/data_versioning/hash_algorithms.py +19 -0
  24. metaxy/data_versioning/joiners/__init__.py +9 -0
  25. metaxy/data_versioning/joiners/base.py +70 -0
  26. metaxy/data_versioning/joiners/narwhals.py +235 -0
  27. metaxy/entrypoints.py +309 -0
  28. metaxy/ext/__init__.py +1 -0
  29. metaxy/ext/alembic.py +326 -0
  30. metaxy/ext/sqlmodel.py +172 -0
  31. metaxy/ext/sqlmodel_system_tables.py +139 -0
  32. metaxy/graph/__init__.py +21 -0
  33. metaxy/graph/diff/__init__.py +21 -0
  34. metaxy/graph/diff/diff_models.py +399 -0
  35. metaxy/graph/diff/differ.py +740 -0
  36. metaxy/graph/diff/models.py +418 -0
  37. metaxy/graph/diff/rendering/__init__.py +18 -0
  38. metaxy/graph/diff/rendering/base.py +274 -0
  39. metaxy/graph/diff/rendering/cards.py +188 -0
  40. metaxy/graph/diff/rendering/formatter.py +805 -0
  41. metaxy/graph/diff/rendering/graphviz.py +246 -0
  42. metaxy/graph/diff/rendering/mermaid.py +320 -0
  43. metaxy/graph/diff/rendering/rich.py +165 -0
  44. metaxy/graph/diff/rendering/theme.py +48 -0
  45. metaxy/graph/diff/traversal.py +247 -0
  46. metaxy/graph/utils.py +58 -0
  47. metaxy/metadata_store/__init__.py +31 -0
  48. metaxy/metadata_store/_protocols.py +38 -0
  49. metaxy/metadata_store/base.py +1676 -0
  50. metaxy/metadata_store/clickhouse.py +161 -0
  51. metaxy/metadata_store/duckdb.py +167 -0
  52. metaxy/metadata_store/exceptions.py +43 -0
  53. metaxy/metadata_store/ibis.py +451 -0
  54. metaxy/metadata_store/memory.py +228 -0
  55. metaxy/metadata_store/sqlite.py +187 -0
  56. metaxy/metadata_store/system_tables.py +257 -0
  57. metaxy/migrations/__init__.py +34 -0
  58. metaxy/migrations/detector.py +153 -0
  59. metaxy/migrations/executor.py +208 -0
  60. metaxy/migrations/loader.py +260 -0
  61. metaxy/migrations/models.py +718 -0
  62. metaxy/migrations/ops.py +390 -0
  63. metaxy/models/__init__.py +0 -0
  64. metaxy/models/bases.py +6 -0
  65. metaxy/models/constants.py +24 -0
  66. metaxy/models/feature.py +665 -0
  67. metaxy/models/feature_spec.py +105 -0
  68. metaxy/models/field.py +25 -0
  69. metaxy/models/plan.py +155 -0
  70. metaxy/models/types.py +157 -0
  71. metaxy/py.typed +0 -0
  72. metaxy-0.0.0.dist-info/METADATA +247 -0
  73. metaxy-0.0.0.dist-info/RECORD +75 -0
  74. metaxy-0.0.0.dist-info/WHEEL +4 -0
  75. metaxy-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,718 @@
1
+ """Type-safe migration models with Python class paths.
2
+
3
+ Refactored migration system using:
4
+ - Python class paths for polymorphic deserialization
5
+ - Struct-based storage for graph data
6
+ - Event-based status tracking
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from datetime import datetime, timezone
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ import pydantic
14
+ from pydantic.types import AwareDatetime
15
+
16
+ if TYPE_CHECKING:
17
+ from metaxy.graph.diff.diff_models import GraphDiff
18
+ from metaxy.metadata_store.base import MetadataStore
19
+
20
+
21
+ class Migration(pydantic.BaseModel, ABC): # pyright: ignore[reportUnsafeMultipleInheritance]
22
+ """Abstract base class for all migrations.
23
+
24
+ Subclasses must define:
25
+ - migration_type: Class path as Literal for polymorphic deserialization
26
+ - execute(): Migration logic
27
+
28
+ The migration_type field is used for storage and deserialization.
29
+ """
30
+
31
+ migration_id: str
32
+ created_at: AwareDatetime
33
+
34
+ @property
35
+ @abstractmethod
36
+ def migration_type(self) -> str:
37
+ """Get migration type (Python class path).
38
+
39
+ Returns:
40
+ Full Python class path (e.g., "metaxy.migrations.models.DiffMigration")
41
+ """
42
+ pass
43
+
44
+ @abstractmethod
45
+ def execute(
46
+ self,
47
+ store: "MetadataStore",
48
+ *,
49
+ dry_run: bool = False,
50
+ ) -> "MigrationResult":
51
+ """Execute the migration.
52
+
53
+ Args:
54
+ store: Metadata store to operate on
55
+ dry_run: If True, only validate without executing
56
+
57
+ Returns:
58
+ MigrationResult with execution details
59
+
60
+ Raises:
61
+ Exception: If migration fails
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ def get_affected_features(self, store: "MetadataStore") -> list[str]:
67
+ """Get list of affected feature keys in topological order.
68
+
69
+ Args:
70
+ store: Metadata store for computing affected features
71
+
72
+ Returns:
73
+ List of feature key strings
74
+ """
75
+ pass
76
+
77
+ def to_storage_dict(self) -> dict[str, Any]:
78
+ """Convert to dict for storage.
79
+
80
+ Returns:
81
+ Dict with all fields including migration_type
82
+ """
83
+ data = self.model_dump(mode="python")
84
+ data["migration_type"] = self.migration_type
85
+ return data
86
+
87
+ @staticmethod
88
+ def from_storage_dict(data: dict[str, Any]) -> "Migration":
89
+ """Deserialize migration from storage dict.
90
+
91
+ Args:
92
+ data: Dict with migration_type and other fields
93
+
94
+ Returns:
95
+ Migration instance of appropriate subclass
96
+
97
+ Raises:
98
+ ValueError: If migration_type is invalid or class not found
99
+ """
100
+ migration_type = data.get("migration_type")
101
+ if not migration_type:
102
+ raise ValueError("Missing migration_type field")
103
+
104
+ # Dynamically import the class
105
+ try:
106
+ module_path, class_name = migration_type.rsplit(".", 1)
107
+ module = __import__(module_path, fromlist=[class_name])
108
+ cls = getattr(module, class_name)
109
+
110
+ if not issubclass(cls, Migration):
111
+ raise TypeError(
112
+ f"{migration_type} must be a subclass of Migration, got {cls}"
113
+ )
114
+
115
+ return cls.model_validate(data)
116
+ except Exception as e:
117
+ raise ValueError(
118
+ f"Failed to load migration class {migration_type}: {e}"
119
+ ) from e
120
+
121
+
122
+ class DiffMigration(Migration):
123
+ """Migration based on graph diff between two snapshots.
124
+
125
+ Migrations form a chain via parent IDs (like git commits):
126
+ - id: Unique identifier for this migration
127
+ - parent: ID of parent migration ("initial" for first migration)
128
+ - from_snapshot_version: Source snapshot version
129
+ - to_snapshot_version: Target snapshot version
130
+ - ops: List of operation dicts with "type" field
131
+
132
+ The parent chain ensures migrations are applied in correct order.
133
+ Multiple heads (two migrations with no children) is an error.
134
+
135
+ All other information is computed on-demand:
136
+ - affected_features: Computed from GraphDiff when accessed
137
+ - operations: Instantiated from ops
138
+ - description: Auto-generated from affected features count
139
+
140
+ The graph diff is computed on-demand when needed using GraphDiffer.
141
+
142
+ Examples:
143
+ First migration:
144
+ DiffMigration(
145
+ migration_id="20250113_120000",
146
+ parent="initial",
147
+ from_snapshot_version="abc123...",
148
+ to_snapshot_version="def456...",
149
+ created_at=datetime.now(timezone.utc),
150
+ )
151
+
152
+ Subsequent migration:
153
+ DiffMigration(
154
+ migration_id="20250113_130000",
155
+ parent="20250113_120000",
156
+ from_snapshot_version="def456...",
157
+ to_snapshot_version="ghi789...",
158
+ created_at=datetime.now(timezone.utc),
159
+ )
160
+ """
161
+
162
+ # Stored fields - persisted to YAML in git
163
+ parent: str # Parent migration ID or "initial"
164
+ from_snapshot_version: str
165
+ to_snapshot_version: str
166
+ ops: list[dict[str, Any]] # Required - must explicitly specify operations
167
+
168
+ # Private attribute for caching computed graph diff
169
+ _graph_diff_cache: "GraphDiff | None" = pydantic.PrivateAttr(default=None)
170
+
171
+ @pydantic.model_validator(mode="before")
172
+ @classmethod
173
+ def deserialize_json_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
174
+ """Deserialize JSON strings for ops (from storage).
175
+
176
+ Args:
177
+ data: Raw migration data
178
+
179
+ Returns:
180
+ Data with deserialized JSON fields
181
+ """
182
+ import json
183
+
184
+ data = dict(data)
185
+
186
+ # Deserialize ops from JSON string (from storage)
187
+ if isinstance(data.get("ops"), str):
188
+ data["ops"] = json.loads(data["ops"])
189
+
190
+ return data
191
+
192
+ @property
193
+ def migration_type(self) -> str:
194
+ """Get migration type."""
195
+ return "metaxy.migrations.models.DiffMigration"
196
+
197
+ def _get_graph_diff(self, store: "MetadataStore") -> "GraphDiff":
198
+ """Get or compute graph diff (cached).
199
+
200
+ Args:
201
+ store: Metadata store containing snapshots
202
+
203
+ Returns:
204
+ GraphDiff between snapshots
205
+ """
206
+ if self._graph_diff_cache is None:
207
+ self._graph_diff_cache = self.compute_graph_diff(store)
208
+ return self._graph_diff_cache
209
+
210
+ @property
211
+ def operations(self) -> list[Any]:
212
+ """Get operations for this migration.
213
+
214
+ Instantiates operations from stored ops (list of dicts with "type" field).
215
+
216
+ Returns:
217
+ List of operation instances
218
+ """
219
+ operations = []
220
+ for op_dict in self.ops:
221
+ op_type = op_dict.get("type")
222
+ if not op_type:
223
+ raise ValueError(f"Operation dict missing 'type' field: {op_dict}")
224
+ try:
225
+ # Dynamically import and instantiate the operation class
226
+ module_path, class_name = op_type.rsplit(".", 1)
227
+ module = __import__(module_path, fromlist=[class_name])
228
+ op_cls = getattr(module, class_name)
229
+ operations.append(op_cls())
230
+ except Exception as e:
231
+ raise ValueError(
232
+ f"Failed to instantiate operation {op_type}: {e}"
233
+ ) from e
234
+
235
+ return operations
236
+
237
+ @property
238
+ def description(self) -> str:
239
+ """Get auto-generated description for migration.
240
+
241
+ Returns:
242
+ Human-readable description based on affected features count
243
+ """
244
+ # Note: This accesses affected_features property which needs store access
245
+ # For display purposes, this is called after affected_features is computed
246
+ return self.auto_description
247
+
248
+ @property
249
+ def auto_description(self) -> str:
250
+ """Generate automatic description (requires store context).
251
+
252
+ Returns:
253
+ Human-readable description based on affected features
254
+ """
255
+ # This is used internally - callers should use get_description(store)
256
+ return "Migration: snapshot reconciliation"
257
+
258
+ def get_description(self, store: "MetadataStore") -> str:
259
+ """Get description for migration.
260
+
261
+ Args:
262
+ store: Metadata store for computing affected features
263
+
264
+ Returns:
265
+ Description string
266
+ """
267
+ affected = self.get_affected_features(store)
268
+ num_features = len(affected)
269
+ if num_features == 0:
270
+ return "No features affected"
271
+ elif num_features == 1:
272
+ return f"Migration: {affected[0]}"
273
+ else:
274
+ return f"Migration: {num_features} features affected"
275
+
276
+ def get_affected_features(self, store: "MetadataStore") -> list[str]:
277
+ """Get affected features in topological order (computed on-demand).
278
+
279
+ Args:
280
+ store: Metadata store containing snapshots (required for computation)
281
+
282
+ Returns:
283
+ List of feature key strings in topological order
284
+ """
285
+
286
+ graph_diff = self._get_graph_diff(store)
287
+
288
+ # Get changed feature keys (root changes)
289
+ changed_keys = {
290
+ node.feature_key.to_string() for node in graph_diff.changed_nodes
291
+ }
292
+
293
+ # Also include added nodes (though they typically don't have existing data to migrate)
294
+ for node in graph_diff.added_nodes:
295
+ changed_keys.add(node.feature_key.to_string())
296
+
297
+ # Build dependency map from the GraphDiff added/changed nodes
298
+ # We need to compute downstream dependencies to find all affected features
299
+ from metaxy.graph.diff.models import GraphData, GraphNode
300
+ from metaxy.graph.diff.traversal import GraphWalker
301
+ from metaxy.models.feature import FeatureGraph
302
+
303
+ # Get the active graph to extract dependencies
304
+ active_graph = FeatureGraph.get_active()
305
+
306
+ # Build GraphData from active graph for dependency analysis
307
+ nodes_dict = {}
308
+ for feature_key, feature_cls in active_graph.features_by_key.items():
309
+ plan = active_graph.get_feature_plan(feature_key)
310
+
311
+ # Extract dependencies from plan
312
+ dependencies = []
313
+ if plan.deps:
314
+ for dep in plan.deps:
315
+ dependencies.append(dep.key)
316
+
317
+ nodes_dict[feature_key.to_string()] = GraphNode(
318
+ key=feature_key,
319
+ version=feature_cls.feature_version(),
320
+ dependencies=dependencies,
321
+ )
322
+
323
+ to_graph_data = GraphData(
324
+ nodes=nodes_dict, snapshot_version=self.to_snapshot_version
325
+ )
326
+
327
+ # Build reverse dependency map (feature -> dependents)
328
+ dependents_map: dict[str, set[str]] = {}
329
+ for node in to_graph_data.nodes.values():
330
+ for dep_key in node.dependencies:
331
+ dep_key_str = dep_key.to_string()
332
+ if dep_key_str not in dependents_map:
333
+ dependents_map[dep_key_str] = set()
334
+ dependents_map[dep_key_str].add(node.key.to_string())
335
+
336
+ # Find all features affected (changed + their downstream)
337
+ affected = set(changed_keys)
338
+ queue = list(changed_keys)
339
+ while queue:
340
+ key_str = queue.pop(0)
341
+ if key_str in dependents_map:
342
+ for dependent in dependents_map[key_str]:
343
+ if dependent not in affected:
344
+ affected.add(dependent)
345
+ queue.append(dependent)
346
+
347
+ # Get topological order for affected features
348
+ walker = GraphWalker(to_graph_data)
349
+ sorted_nodes = walker.topological_sort(nodes_to_include=affected)
350
+
351
+ return [node.key.to_string() for node in sorted_nodes]
352
+
353
+ def compute_graph_diff(self, store: "MetadataStore") -> "GraphDiff":
354
+ """Compute GraphDiff on-demand from snapshot versions.
355
+
356
+ Args:
357
+ store: Metadata store containing snapshots
358
+
359
+ Returns:
360
+ GraphDiff between from_snapshot_version and to_snapshot_version
361
+
362
+ Raises:
363
+ ValueError: If snapshots cannot be loaded
364
+ """
365
+ from metaxy.graph.diff.differ import GraphDiffer
366
+ from metaxy.models.feature import FeatureGraph
367
+
368
+ differ = GraphDiffer()
369
+
370
+ # Load from_snapshot data from store
371
+ from_snapshot_data = differ.load_snapshot_data(
372
+ store, self.from_snapshot_version
373
+ )
374
+
375
+ # Try to load to_snapshot from store, if it doesn't exist use active graph
376
+ try:
377
+ to_snapshot_data = differ.load_snapshot_data(
378
+ store, self.to_snapshot_version
379
+ )
380
+ except ValueError:
381
+ # Snapshot not recorded yet, use active graph
382
+ active_graph = FeatureGraph.get_active()
383
+ if active_graph.snapshot_version != self.to_snapshot_version:
384
+ raise ValueError(
385
+ f"to_snapshot {self.to_snapshot_version} not found in store "
386
+ f"and doesn't match active graph ({active_graph.snapshot_version})"
387
+ )
388
+ to_snapshot_data = active_graph.to_snapshot()
389
+
390
+ # Compute diff
391
+ return differ.diff(
392
+ from_snapshot_data,
393
+ to_snapshot_data,
394
+ self.from_snapshot_version,
395
+ self.to_snapshot_version,
396
+ )
397
+
398
+ def execute(
399
+ self,
400
+ store: "MetadataStore",
401
+ *,
402
+ dry_run: bool = False,
403
+ ) -> "MigrationResult":
404
+ """Execute diff-based migration.
405
+
406
+ Process:
407
+ 1. Execute each operation in the operations list
408
+ 2. For each operation:
409
+ - Check if feature already completed (resume support)
410
+ - Execute operation
411
+ - Record event
412
+ 3. Return result
413
+
414
+ Args:
415
+ store: Metadata store
416
+ dry_run: If True, only validate
417
+
418
+ Returns:
419
+ MigrationResult
420
+ """
421
+ from metaxy.metadata_store.system_tables import SystemTableStorage
422
+
423
+ storage = SystemTableStorage(store)
424
+ start_time = datetime.now(timezone.utc)
425
+
426
+ if not dry_run:
427
+ # Write started event
428
+ storage.write_event(self.migration_id, "started")
429
+
430
+ affected_features_list = []
431
+ errors = {}
432
+ rows_affected_total = 0
433
+
434
+ # Execute operations (currently only DataVersionReconciliation is supported)
435
+ from metaxy.migrations.ops import DataVersionReconciliation
436
+
437
+ # Get affected features (computed on-demand)
438
+ affected_features_to_process = self.get_affected_features(store)
439
+
440
+ if len(self.operations) == 1 and isinstance(
441
+ self.operations[0], DataVersionReconciliation
442
+ ):
443
+ # DataVersionReconciliation applies to all affected features
444
+ op = self.operations[0]
445
+
446
+ for feature_key_str in affected_features_to_process:
447
+ # Check if already completed (resume support)
448
+ if not dry_run and storage.is_feature_completed(
449
+ self.migration_id, feature_key_str
450
+ ):
451
+ affected_features_list.append(feature_key_str)
452
+ continue
453
+
454
+ # Log feature started
455
+ if not dry_run:
456
+ storage.write_event(
457
+ self.migration_id,
458
+ "feature_started",
459
+ feature_key=feature_key_str,
460
+ )
461
+
462
+ try:
463
+ # Execute operation for this feature
464
+ rows_affected = op.execute_for_feature(
465
+ store,
466
+ feature_key_str,
467
+ from_snapshot_version=self.from_snapshot_version,
468
+ to_snapshot_version=self.to_snapshot_version,
469
+ dry_run=dry_run,
470
+ )
471
+
472
+ # Log feature completed
473
+ if not dry_run:
474
+ storage.write_event(
475
+ self.migration_id,
476
+ "feature_completed",
477
+ feature_key=feature_key_str,
478
+ rows_affected=rows_affected,
479
+ )
480
+
481
+ affected_features_list.append(feature_key_str)
482
+ rows_affected_total += rows_affected
483
+
484
+ except Exception as e:
485
+ error_msg = str(e)
486
+ errors[feature_key_str] = error_msg
487
+
488
+ # Log feature failed
489
+ if not dry_run:
490
+ storage.write_event(
491
+ self.migration_id,
492
+ "feature_completed",
493
+ feature_key=feature_key_str,
494
+ error_message=error_msg,
495
+ )
496
+
497
+ continue
498
+ else:
499
+ # Future: Support other operation types here
500
+ raise NotImplementedError(
501
+ "Only DataVersionReconciliation is currently supported"
502
+ )
503
+
504
+ # Determine status
505
+ if dry_run:
506
+ status = "skipped"
507
+ elif len(errors) == 0:
508
+ status = "completed"
509
+ if not dry_run:
510
+ storage.write_event(self.migration_id, "completed")
511
+ else:
512
+ status = "failed"
513
+ if not dry_run:
514
+ storage.write_event(self.migration_id, "failed")
515
+
516
+ duration = (datetime.now(timezone.utc) - start_time).total_seconds()
517
+
518
+ return MigrationResult(
519
+ migration_id=self.migration_id,
520
+ status=status,
521
+ features_completed=len(affected_features_list),
522
+ features_failed=len(errors),
523
+ affected_features=affected_features_list,
524
+ errors=errors,
525
+ rows_affected=rows_affected_total,
526
+ duration_seconds=duration,
527
+ timestamp=start_time,
528
+ )
529
+
530
+
531
+ class FullGraphMigration(Migration):
532
+ """Migration that operates within a single snapshot.
533
+
534
+ Used for operations that don't involve graph structure changes,
535
+ such as backfills or custom transformations on existing features.
536
+ """
537
+
538
+ snapshot_version: str
539
+ affected_features: list[str] = pydantic.Field(
540
+ default_factory=list
541
+ ) # Features to process
542
+ operations: list[Any] = pydantic.Field(default_factory=list) # Custom operations
543
+ description: str | None = None
544
+ metadata: dict[str, Any] = pydantic.Field(default_factory=dict)
545
+
546
+ @pydantic.model_validator(mode="before")
547
+ @classmethod
548
+ def deserialize_json_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
549
+ """Deserialize JSON strings for operations and metadata (from storage).
550
+
551
+ Args:
552
+ data: Raw migration data
553
+
554
+ Returns:
555
+ Data with deserialized JSON fields
556
+ """
557
+ import json
558
+
559
+ data = dict(data)
560
+
561
+ # Deserialize JSON strings (from storage)
562
+ if isinstance(data.get("operations"), str):
563
+ data["operations"] = json.loads(data["operations"])
564
+
565
+ if isinstance(data.get("metadata"), str):
566
+ data["metadata"] = json.loads(data["metadata"])
567
+
568
+ return data
569
+
570
+ @property
571
+ def migration_type(self) -> str:
572
+ """Get migration type."""
573
+ return "metaxy.migrations.models.FullGraphMigration"
574
+
575
+ def get_affected_features(self, store: "MetadataStore") -> list[str]:
576
+ """Get affected features.
577
+
578
+ Args:
579
+ store: Metadata store (not used for FullGraphMigration)
580
+
581
+ Returns:
582
+ List of feature key strings
583
+ """
584
+ return self.affected_features
585
+
586
+ def execute(
587
+ self,
588
+ store: "MetadataStore",
589
+ *,
590
+ dry_run: bool = False,
591
+ ) -> "MigrationResult":
592
+ """Execute full graph migration.
593
+
594
+ Subclasses should implement custom logic here.
595
+
596
+ Args:
597
+ store: Metadata store
598
+ dry_run: If True, only validate
599
+
600
+ Returns:
601
+ MigrationResult
602
+ """
603
+ # Base implementation: no-op
604
+ return MigrationResult(
605
+ migration_id=self.migration_id,
606
+ status="completed",
607
+ features_completed=0,
608
+ features_failed=0,
609
+ affected_features=[],
610
+ errors={},
611
+ rows_affected=0,
612
+ duration_seconds=0.0,
613
+ timestamp=datetime.now(timezone.utc),
614
+ )
615
+
616
+
617
+ class CustomMigration(Migration):
618
+ """Base class for user-defined custom migrations.
619
+
620
+ Users can subclass this to implement completely custom migration logic.
621
+
622
+ Example:
623
+ class S3BackfillMigration(CustomMigration):
624
+ s3_bucket: str
625
+ s3_prefix: str
626
+
627
+ @property
628
+ def migration_type(self) -> str:
629
+ return "myproject.migrations.S3BackfillMigration"
630
+
631
+ def execute(self, store, *, dry_run=False):
632
+ # Custom logic here
633
+ ...
634
+ """
635
+
636
+ @property
637
+ def migration_type(self) -> str:
638
+ """Get migration type.
639
+
640
+ Subclasses must override this to return their full class path.
641
+ """
642
+ return f"{self.__class__.__module__}.{self.__class__.__name__}"
643
+
644
+ def get_affected_features(self, store: "MetadataStore") -> list[str]:
645
+ """Get affected features.
646
+
647
+ Args:
648
+ store: Metadata store (not used for CustomMigration base class)
649
+
650
+ Returns:
651
+ Empty list (subclasses should override)
652
+ """
653
+ return []
654
+
655
+ def execute(
656
+ self,
657
+ store: "MetadataStore",
658
+ *,
659
+ dry_run: bool = False,
660
+ ) -> "MigrationResult":
661
+ """Execute custom migration.
662
+
663
+ Subclasses must override this to implement custom logic.
664
+
665
+ Args:
666
+ store: Metadata store
667
+ dry_run: If True, only validate
668
+
669
+ Returns:
670
+ MigrationResult
671
+
672
+ Raises:
673
+ NotImplementedError: If not overridden by subclass
674
+ """
675
+ raise NotImplementedError(
676
+ f"{self.__class__.__name__} must implement execute() method"
677
+ )
678
+
679
+
680
+ class MigrationResult(pydantic.BaseModel):
681
+ """Result of executing a migration."""
682
+
683
+ migration_id: str
684
+ status: str # "completed", "failed", "skipped"
685
+ features_completed: int
686
+ features_failed: int
687
+ affected_features: list[str]
688
+ errors: dict[str, str] # feature_key -> error message
689
+ rows_affected: int
690
+ duration_seconds: float
691
+ timestamp: AwareDatetime
692
+
693
+ def summary(self) -> str:
694
+ """Human-readable summary of migration result.
695
+
696
+ Returns:
697
+ Multi-line summary string
698
+ """
699
+ lines = [
700
+ f"Migration: {self.migration_id}",
701
+ f"Status: {self.status.upper()}",
702
+ f"Timestamp: {self.timestamp.isoformat()}",
703
+ f"Duration: {self.duration_seconds:.2f}s",
704
+ f"Features: {self.features_completed} completed, {self.features_failed} failed",
705
+ f"Rows affected: {self.rows_affected}",
706
+ ]
707
+
708
+ if self.affected_features:
709
+ lines.append("\nFeatures processed:")
710
+ for feature in self.affected_features:
711
+ lines.append(f" ✓ {feature}")
712
+
713
+ if self.errors:
714
+ lines.append("\nErrors:")
715
+ for feature, error in self.errors.items():
716
+ lines.append(f" ✗ {feature}: {error}")
717
+
718
+ return "\n".join(lines)