metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""Type-safe migration models with Python class paths.
|
|
2
|
+
|
|
3
|
+
Refactored migration system using:
|
|
4
|
+
- Python class paths for polymorphic deserialization via discriminated unions
|
|
5
|
+
- Struct-based storage for graph data
|
|
6
|
+
- Event-based status tracking
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import TYPE_CHECKING, Annotated, Any, Literal
|
|
11
|
+
|
|
12
|
+
import pydantic
|
|
13
|
+
from pydantic import AliasChoices, TypeAdapter
|
|
14
|
+
from pydantic import Field as PydanticField
|
|
15
|
+
from pydantic.types import AwareDatetime
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from metaxy.graph.diff.diff_models import GraphDiff
|
|
19
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OperationConfig(pydantic.BaseModel):
|
|
23
|
+
"""Configuration for a migration operation.
|
|
24
|
+
|
|
25
|
+
The structure directly matches the YAML - no nested 'config' field.
|
|
26
|
+
All operation-specific fields are defined directly on the operation class.
|
|
27
|
+
|
|
28
|
+
Required fields:
|
|
29
|
+
- type: Full Python class path to operation class (e.g., "metaxy.migrations.ops.DataVersionReconciliation")
|
|
30
|
+
|
|
31
|
+
Optional fields:
|
|
32
|
+
- features: List of feature keys this operation applies to
|
|
33
|
+
- Required for FullGraphMigration
|
|
34
|
+
- Optional for DiffMigration (features determined from graph diff)
|
|
35
|
+
- All other fields are operation-specific and defined by the operation class
|
|
36
|
+
|
|
37
|
+
Example (FullGraphMigration):
|
|
38
|
+
{
|
|
39
|
+
"type": "anam_data_utils.migrations.PostgreSQLBackfill",
|
|
40
|
+
"features": ["raw_video", "scene"],
|
|
41
|
+
"postgresql_url": "postgresql://...", # Direct field, no nesting
|
|
42
|
+
"batch_size": 1000
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
Example (DiffMigration):
|
|
46
|
+
{
|
|
47
|
+
"type": "metaxy.migrations.ops.DataVersionReconciliation",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
Note:
|
|
51
|
+
The 'type' field is stored as a string and only imported when the operation
|
|
52
|
+
needs to be instantiated via the Migration.operations property. This allows
|
|
53
|
+
reading migration configurations even if the operation classes have been
|
|
54
|
+
renamed or don't exist yet.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
model_config = pydantic.ConfigDict(extra="allow")
|
|
58
|
+
|
|
59
|
+
type: str # Python class path as string - imported lazily when needed
|
|
60
|
+
features: list[str] = pydantic.Field(default_factory=list)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Migration(pydantic.BaseModel, ABC): # pyright: ignore[reportUnsafeMultipleInheritance]
|
|
64
|
+
"""Abstract base class for all migrations.
|
|
65
|
+
|
|
66
|
+
Subclasses must define:
|
|
67
|
+
- migration_type: Literal field with class path for discriminated union deserialization
|
|
68
|
+
- execute(): Migration logic
|
|
69
|
+
- get_affected_features(): Return list of affected feature keys
|
|
70
|
+
|
|
71
|
+
The migration_type field is used as a discriminator for automatic polymorphic deserialization.
|
|
72
|
+
|
|
73
|
+
All migrations form a chain via parent IDs (like git commits):
|
|
74
|
+
- parent: ID of parent migration ("initial" for first migration)
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
# Use AliasChoices to accept both "id" (from generated YAML) and "migration_id" (from tests/manual YAML)
|
|
78
|
+
migration_id: str = PydanticField(
|
|
79
|
+
validation_alias=AliasChoices("id", "migration_id"), serialization_alias="id"
|
|
80
|
+
)
|
|
81
|
+
parent: str # Parent migration ID or "initial"
|
|
82
|
+
created_at: AwareDatetime
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def execute(
|
|
86
|
+
self,
|
|
87
|
+
store: "MetadataStore",
|
|
88
|
+
project: str,
|
|
89
|
+
*,
|
|
90
|
+
dry_run: bool = False,
|
|
91
|
+
) -> "MigrationResult":
|
|
92
|
+
"""Execute the migration.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
store: Metadata store to operate on
|
|
96
|
+
project: Project name for event tracking
|
|
97
|
+
dry_run: If True, only validate without executing
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
MigrationResult with execution details
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
Exception: If migration fails
|
|
104
|
+
"""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def get_affected_features(
|
|
109
|
+
self, store: "MetadataStore", project: str | None
|
|
110
|
+
) -> list[str]:
|
|
111
|
+
"""Get list of affected feature keys in topological order.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
store: Metadata store for computing affected features
|
|
115
|
+
project: Project name for filtering snapshots
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of feature key strings
|
|
119
|
+
"""
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def get_status_info(
|
|
123
|
+
self, store: "MetadataStore", project: str | None
|
|
124
|
+
) -> "MigrationStatusInfo":
|
|
125
|
+
"""Get comprehensive status information for this migration.
|
|
126
|
+
|
|
127
|
+
This is a convenience method that combines information from:
|
|
128
|
+
- The migration YAML (expected features)
|
|
129
|
+
- The database events (completed/failed features)
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
store: Metadata store for querying events
|
|
133
|
+
project: Project name for filtering events
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
MigrationStatusInfo with all status details
|
|
137
|
+
"""
|
|
138
|
+
from metaxy.metadata_store.system import SystemTableStorage
|
|
139
|
+
|
|
140
|
+
storage = SystemTableStorage(store)
|
|
141
|
+
|
|
142
|
+
# Get expected features from YAML
|
|
143
|
+
expected_features = self.get_affected_features(store, project)
|
|
144
|
+
|
|
145
|
+
# Get actual status from database
|
|
146
|
+
summary = storage.get_migration_summary(
|
|
147
|
+
self.migration_id, project, expected_features
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Compute pending features
|
|
151
|
+
completed_set = set(summary["completed_features"])
|
|
152
|
+
failed_set = set(summary["failed_features"].keys())
|
|
153
|
+
pending_features = [
|
|
154
|
+
fk
|
|
155
|
+
for fk in expected_features
|
|
156
|
+
if fk not in completed_set and fk not in failed_set
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
return MigrationStatusInfo(
|
|
160
|
+
migration_id=self.migration_id,
|
|
161
|
+
status=summary["status"],
|
|
162
|
+
expected_features=expected_features,
|
|
163
|
+
completed_features=summary["completed_features"],
|
|
164
|
+
failed_features=summary["failed_features"],
|
|
165
|
+
pending_features=pending_features,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def operations(self) -> list[Any]:
|
|
170
|
+
"""Get operations for this migration.
|
|
171
|
+
|
|
172
|
+
Dynamically instantiates operations from the ops field (list of dicts with "type" field).
|
|
173
|
+
If the migration doesn't have an ops field, returns empty list.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
List of operation instances
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
ValueError: If operation dict is missing "type" field or class cannot be loaded
|
|
180
|
+
"""
|
|
181
|
+
import importlib
|
|
182
|
+
|
|
183
|
+
# Check if this migration has an ops field (using getattr to avoid type errors)
|
|
184
|
+
ops = getattr(self, "ops", None)
|
|
185
|
+
if ops is None:
|
|
186
|
+
return []
|
|
187
|
+
|
|
188
|
+
operations = []
|
|
189
|
+
for op_dict in ops:
|
|
190
|
+
# Validate structure has required fields
|
|
191
|
+
op_config = OperationConfig.model_validate(op_dict)
|
|
192
|
+
|
|
193
|
+
# Import the operation class from the string path
|
|
194
|
+
# op_config.type is now a str (e.g., "anam_data_utils.migrations.postgresql_to_metaxy.RootFeatureBackfill")
|
|
195
|
+
module_path, class_name = op_config.type.rsplit(".", 1)
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
module = importlib.import_module(module_path)
|
|
199
|
+
op_cls = getattr(module, class_name)
|
|
200
|
+
except (ImportError, AttributeError) as e:
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Failed to import operation class '{op_config.type}': {e}"
|
|
203
|
+
) from e
|
|
204
|
+
|
|
205
|
+
# Pass the entire dict to the operation class (which inherits from BaseSettings)
|
|
206
|
+
# BaseSettings will extract the fields it needs and read from env vars
|
|
207
|
+
operation = op_cls.model_validate(op_dict)
|
|
208
|
+
operations.append(operation)
|
|
209
|
+
|
|
210
|
+
return operations
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class DiffMigration(Migration):
|
|
214
|
+
"""Migration based on graph diff between two snapshots.
|
|
215
|
+
|
|
216
|
+
Migrations form a chain via parent IDs (like git commits):
|
|
217
|
+
- migration_id: Unique identifier for this migration
|
|
218
|
+
- parent: ID of parent migration ("initial" for first migration)
|
|
219
|
+
- from_snapshot_version: Source snapshot version
|
|
220
|
+
- to_snapshot_version: Target snapshot version
|
|
221
|
+
- ops: List of operation dicts with "type" field
|
|
222
|
+
|
|
223
|
+
The parent chain ensures migrations are applied in correct order.
|
|
224
|
+
Multiple heads (two migrations with no children) is an error.
|
|
225
|
+
|
|
226
|
+
All other information is computed on-demand:
|
|
227
|
+
- affected_features: Computed from GraphDiff when accessed
|
|
228
|
+
- operations: Instantiated from ops
|
|
229
|
+
- description: Auto-generated from affected features count
|
|
230
|
+
|
|
231
|
+
The graph diff is computed on-demand when needed using GraphDiffer.
|
|
232
|
+
|
|
233
|
+
Examples:
|
|
234
|
+
First migration:
|
|
235
|
+
DiffMigration(
|
|
236
|
+
migration_id="20250113_120000",
|
|
237
|
+
parent="initial",
|
|
238
|
+
from_snapshot_version="abc123...",
|
|
239
|
+
to_snapshot_version="def456...",
|
|
240
|
+
created_at=datetime.now(timezone.utc),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
Subsequent migration:
|
|
244
|
+
DiffMigration(
|
|
245
|
+
migration_id="20250113_130000",
|
|
246
|
+
parent="20250113_120000",
|
|
247
|
+
from_snapshot_version="def456...",
|
|
248
|
+
to_snapshot_version="ghi789...",
|
|
249
|
+
created_at=datetime.now(timezone.utc),
|
|
250
|
+
)
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
# Discriminator field for polymorphic deserialization
|
|
254
|
+
migration_type: Literal["metaxy.migrations.models.DiffMigration"] = (
|
|
255
|
+
"metaxy.migrations.models.DiffMigration"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Stored fields - persisted to YAML in git
|
|
259
|
+
from_snapshot_version: str
|
|
260
|
+
to_snapshot_version: str
|
|
261
|
+
ops: list[dict[str, Any]] # Required - must explicitly specify operations
|
|
262
|
+
|
|
263
|
+
# Private attribute for caching computed graph diff
|
|
264
|
+
_graph_diff_cache: "GraphDiff | None" = pydantic.PrivateAttr(default=None)
|
|
265
|
+
|
|
266
|
+
def _get_graph_diff(
|
|
267
|
+
self, store: "MetadataStore", project: str | None
|
|
268
|
+
) -> "GraphDiff":
|
|
269
|
+
"""Get or compute graph diff (cached).
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
store: Metadata store containing snapshots
|
|
273
|
+
project: Project name for filtering snapshots
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
GraphDiff between snapshots
|
|
277
|
+
"""
|
|
278
|
+
if self._graph_diff_cache is None:
|
|
279
|
+
self._graph_diff_cache = self.compute_graph_diff(store, project)
|
|
280
|
+
return self._graph_diff_cache
|
|
281
|
+
|
|
282
|
+
def get_affected_features(
|
|
283
|
+
self, store: "MetadataStore", project: str | None
|
|
284
|
+
) -> list[str]:
|
|
285
|
+
"""Get affected features in topological order (computed on-demand).
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
store: Metadata store containing snapshots (required for computation)
|
|
289
|
+
project: Project name for filtering snapshots
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
List of feature key strings in topological order
|
|
293
|
+
"""
|
|
294
|
+
from metaxy.models.feature import FeatureGraph
|
|
295
|
+
|
|
296
|
+
graph_diff = self._get_graph_diff(store, project)
|
|
297
|
+
|
|
298
|
+
# Get changed feature keys (root changes)
|
|
299
|
+
changed_keys = [node.feature_key for node in graph_diff.changed_nodes]
|
|
300
|
+
|
|
301
|
+
# Also include added nodes
|
|
302
|
+
for node in graph_diff.added_nodes:
|
|
303
|
+
changed_keys.append(node.feature_key)
|
|
304
|
+
|
|
305
|
+
# Get the active graph
|
|
306
|
+
active_graph = FeatureGraph.get_active()
|
|
307
|
+
|
|
308
|
+
# Get all downstream features (features that depend on changed features)
|
|
309
|
+
downstream_keys = active_graph.get_downstream_features(changed_keys)
|
|
310
|
+
|
|
311
|
+
# Combine changed and downstream
|
|
312
|
+
all_affected_keys = changed_keys + downstream_keys
|
|
313
|
+
|
|
314
|
+
# Sort topologically
|
|
315
|
+
sorted_keys = active_graph.topological_sort_features(all_affected_keys)
|
|
316
|
+
|
|
317
|
+
return [key.to_string() for key in sorted_keys]
|
|
318
|
+
|
|
319
|
+
def compute_graph_diff(
|
|
320
|
+
self, store: "MetadataStore", project: str | None
|
|
321
|
+
) -> "GraphDiff":
|
|
322
|
+
"""Compute GraphDiff on-demand from snapshot versions.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
store: Metadata store containing snapshots
|
|
326
|
+
project: Project name for filtering snapshots
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
GraphDiff between from_snapshot_version and to_snapshot_version
|
|
330
|
+
|
|
331
|
+
Raises:
|
|
332
|
+
ValueError: If snapshots cannot be loaded
|
|
333
|
+
"""
|
|
334
|
+
from metaxy.graph.diff.differ import GraphDiffer
|
|
335
|
+
from metaxy.models.feature import FeatureGraph
|
|
336
|
+
|
|
337
|
+
differ = GraphDiffer()
|
|
338
|
+
|
|
339
|
+
# Load from_snapshot data from store
|
|
340
|
+
from_snapshot_data = differ.load_snapshot_data(
|
|
341
|
+
store, self.from_snapshot_version
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Try to load to_snapshot from store, if it doesn't exist use active graph
|
|
345
|
+
try:
|
|
346
|
+
to_snapshot_data = differ.load_snapshot_data(
|
|
347
|
+
store, self.to_snapshot_version
|
|
348
|
+
)
|
|
349
|
+
except ValueError:
|
|
350
|
+
# Snapshot not recorded yet, use active graph
|
|
351
|
+
active_graph = FeatureGraph.get_active()
|
|
352
|
+
if active_graph.snapshot_version != self.to_snapshot_version:
|
|
353
|
+
raise ValueError(
|
|
354
|
+
f"to_snapshot {self.to_snapshot_version} not found in store "
|
|
355
|
+
f"and doesn't match active graph ({active_graph.snapshot_version})"
|
|
356
|
+
)
|
|
357
|
+
to_snapshot_data = active_graph.to_snapshot()
|
|
358
|
+
|
|
359
|
+
# Compute diff
|
|
360
|
+
return differ.diff(
|
|
361
|
+
from_snapshot_data,
|
|
362
|
+
to_snapshot_data,
|
|
363
|
+
self.from_snapshot_version,
|
|
364
|
+
self.to_snapshot_version,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def execute(
|
|
368
|
+
self,
|
|
369
|
+
store: "MetadataStore",
|
|
370
|
+
project: str,
|
|
371
|
+
*,
|
|
372
|
+
dry_run: bool = False,
|
|
373
|
+
) -> "MigrationResult":
|
|
374
|
+
"""Execute diff-based migration.
|
|
375
|
+
|
|
376
|
+
Delegates to MigrationExecutor for execution logic.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
store: Metadata store
|
|
380
|
+
project: Project name for event tracking
|
|
381
|
+
dry_run: If True, only validate
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
MigrationResult
|
|
385
|
+
"""
|
|
386
|
+
from metaxy.metadata_store.system import SystemTableStorage
|
|
387
|
+
from metaxy.migrations.executor import MigrationExecutor
|
|
388
|
+
|
|
389
|
+
storage = SystemTableStorage(store)
|
|
390
|
+
executor = MigrationExecutor(storage)
|
|
391
|
+
return executor._execute_diff_migration(self, store, project, dry_run)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class FullGraphMigration(Migration):
|
|
395
|
+
"""Migration that operates within a single snapshot or across snapshots.
|
|
396
|
+
|
|
397
|
+
Used for operations that don't involve graph structure changes,
|
|
398
|
+
such as backfills or custom transformations on existing features.
|
|
399
|
+
|
|
400
|
+
Each operation specifies which features it applies to, and Metaxy
|
|
401
|
+
handles topological sorting and per-feature execution.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
# Discriminator field for polymorphic deserialization
|
|
405
|
+
migration_type: Literal["metaxy.migrations.models.FullGraphMigration"] = (
|
|
406
|
+
"metaxy.migrations.models.FullGraphMigration"
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
snapshot_version: str
|
|
410
|
+
from_snapshot_version: str | None = None # Optional for cross-snapshot operations
|
|
411
|
+
ops: list[dict[str, Any]] # List of OperationConfig dicts
|
|
412
|
+
|
|
413
|
+
def get_affected_features(
|
|
414
|
+
self, store: "MetadataStore", project: str | None
|
|
415
|
+
) -> list[str]:
|
|
416
|
+
"""Get all affected features from all operations (deduplicated).
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
store: Metadata store (not used for FullGraphMigration)
|
|
420
|
+
project: Project name (not used for FullGraphMigration)
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
List of unique feature key strings (sorted)
|
|
424
|
+
"""
|
|
425
|
+
all_features = set()
|
|
426
|
+
for op_dict in self.ops:
|
|
427
|
+
op_config = OperationConfig.model_validate(op_dict)
|
|
428
|
+
all_features.update(op_config.features)
|
|
429
|
+
return sorted(all_features) # Return sorted for consistency
|
|
430
|
+
|
|
431
|
+
def execute(
|
|
432
|
+
self,
|
|
433
|
+
store: "MetadataStore",
|
|
434
|
+
project: str,
|
|
435
|
+
*,
|
|
436
|
+
dry_run: bool = False,
|
|
437
|
+
) -> "MigrationResult":
|
|
438
|
+
"""Execute full graph migration with multiple operations.
|
|
439
|
+
|
|
440
|
+
Delegates to MigrationExecutor for execution logic.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
store: Metadata store
|
|
444
|
+
project: Project name for event tracking
|
|
445
|
+
dry_run: If True, only validate
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
MigrationResult
|
|
449
|
+
"""
|
|
450
|
+
from metaxy.metadata_store.system import SystemTableStorage
|
|
451
|
+
from metaxy.migrations.executor import MigrationExecutor
|
|
452
|
+
|
|
453
|
+
storage = SystemTableStorage(store)
|
|
454
|
+
executor = MigrationExecutor(storage)
|
|
455
|
+
return executor._execute_full_graph_migration(self, store, project, dry_run)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class MigrationStatusInfo(pydantic.BaseModel):
|
|
459
|
+
"""Status information for a migration computed from events and YAML definition."""
|
|
460
|
+
|
|
461
|
+
migration_id: str
|
|
462
|
+
status: Any # MigrationStatus enum
|
|
463
|
+
expected_features: list[str] # All features from YAML
|
|
464
|
+
completed_features: list[str] # Features completed successfully
|
|
465
|
+
failed_features: dict[str, str] # feature_key -> error_message
|
|
466
|
+
pending_features: list[str] # Features not yet started
|
|
467
|
+
|
|
468
|
+
@property
|
|
469
|
+
def features_remaining(self) -> int:
|
|
470
|
+
"""Number of features still needing processing (pending + failed)."""
|
|
471
|
+
return len(self.pending_features) + len(self.failed_features)
|
|
472
|
+
|
|
473
|
+
@property
|
|
474
|
+
def features_total(self) -> int:
|
|
475
|
+
"""Total number of features in migration."""
|
|
476
|
+
return len(self.expected_features)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class MigrationResult(pydantic.BaseModel):
|
|
480
|
+
"""Result of executing a migration."""
|
|
481
|
+
|
|
482
|
+
migration_id: str
|
|
483
|
+
status: str # "completed", "failed", "skipped"
|
|
484
|
+
features_completed: int
|
|
485
|
+
features_failed: int
|
|
486
|
+
features_skipped: int # Features skipped due to failed dependencies
|
|
487
|
+
affected_features: list[str]
|
|
488
|
+
errors: dict[str, str] # feature_key -> error message
|
|
489
|
+
rows_affected: int
|
|
490
|
+
duration_seconds: float
|
|
491
|
+
timestamp: AwareDatetime
|
|
492
|
+
|
|
493
|
+
def summary(self) -> str:
|
|
494
|
+
"""Human-readable summary of migration result.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Multi-line summary string
|
|
498
|
+
"""
|
|
499
|
+
lines = [
|
|
500
|
+
f"Migration: {self.migration_id}",
|
|
501
|
+
f"Status: {self.status.upper()}",
|
|
502
|
+
f"Timestamp: {self.timestamp.isoformat()}",
|
|
503
|
+
f"Duration: {self.duration_seconds:.2f}s",
|
|
504
|
+
f"Features: {self.features_completed} completed, {self.features_failed} failed",
|
|
505
|
+
f"Rows affected: {self.rows_affected}",
|
|
506
|
+
]
|
|
507
|
+
|
|
508
|
+
if self.affected_features:
|
|
509
|
+
lines.append("\nFeatures processed:")
|
|
510
|
+
for feature in self.affected_features:
|
|
511
|
+
lines.append(f" ✓ {feature}")
|
|
512
|
+
|
|
513
|
+
if self.errors:
|
|
514
|
+
lines.append("\nErrors:")
|
|
515
|
+
for feature, error in self.errors.items():
|
|
516
|
+
lines.append(f" ✗ {feature}: {error}")
|
|
517
|
+
|
|
518
|
+
return "\n".join(lines)
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
# Discriminated union for automatic polymorphic deserialization
|
|
522
|
+
# Use Annotated with Field discriminator for type-safe deserialization
|
|
523
|
+
MigrationAdapter = TypeAdapter(
|
|
524
|
+
Annotated[
|
|
525
|
+
DiffMigration | FullGraphMigration,
|
|
526
|
+
PydanticField(discriminator="migration_type"),
|
|
527
|
+
]
|
|
528
|
+
)
|