metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Types for metadata stores."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
# Access mode for metadata store connections.
|
|
6
|
+
#
|
|
7
|
+
# Controls whether the store is opened in read-only or read-write mode.
|
|
8
|
+
# This is particularly important for stores like DuckDB that lock the database in write mode by default.
|
|
9
|
+
# Specific store implementations should handle this parameter accordingly.
|
|
10
|
+
AccessMode = Literal["read", "write"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from urllib.parse import urlparse, urlunparse
|
|
5
|
+
|
|
6
|
+
from narwhals.typing import FrameT
|
|
7
|
+
|
|
8
|
+
# Context variable for suppressing feature_version warning in migrations
|
|
9
|
+
_suppress_feature_version_warning: ContextVar[bool] = ContextVar(
|
|
10
|
+
"_suppress_feature_version_warning", default=False
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_local_path(path: str) -> bool:
|
|
15
|
+
"""Return True when the path points to the local filesystem."""
|
|
16
|
+
if path.startswith(("file://", "local://")):
|
|
17
|
+
return True
|
|
18
|
+
return "://" not in path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@contextmanager
|
|
22
|
+
def allow_feature_version_override() -> Iterator[None]:
|
|
23
|
+
"""Context manager to suppress warnings when writing metadata with pre-existing metaxy_feature_version.
|
|
24
|
+
|
|
25
|
+
This should only be used in migration code where writing historical feature versions
|
|
26
|
+
is intentional and necessary.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
```py
|
|
30
|
+
with allow_feature_version_override():
|
|
31
|
+
# DataFrame already has metaxy_feature_version column from migration
|
|
32
|
+
store.write_metadata(MyFeature, df_with_feature_version)
|
|
33
|
+
```
|
|
34
|
+
"""
|
|
35
|
+
token = _suppress_feature_version_warning.set(True)
|
|
36
|
+
try:
|
|
37
|
+
yield
|
|
38
|
+
finally:
|
|
39
|
+
_suppress_feature_version_warning.reset(token)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Helper to create empty DataFrame with correct schema and backend
|
|
43
|
+
#
|
|
44
|
+
def empty_frame_like(ref_frame: FrameT) -> FrameT:
|
|
45
|
+
"""Create an empty LazyFrame with the same schema as ref_frame."""
|
|
46
|
+
return ref_frame.head(0)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def sanitize_uri(uri: str) -> str:
|
|
50
|
+
"""Sanitize URI to mask credentials.
|
|
51
|
+
|
|
52
|
+
Replaces username and password in URIs with `***` to prevent credential exposure
|
|
53
|
+
in logs, display strings, and error messages.
|
|
54
|
+
|
|
55
|
+
Examples:
|
|
56
|
+
>>> sanitize_uri("s3://bucket/path")
|
|
57
|
+
's3://bucket/path'
|
|
58
|
+
>>> sanitize_uri("db://user:pass@host/db")
|
|
59
|
+
'db://***:***@host/db'
|
|
60
|
+
>>> sanitize_uri("postgresql://admin:secret@host:5432/db")
|
|
61
|
+
'postgresql://***:***@host:5432/db'
|
|
62
|
+
>>> sanitize_uri("./local/path")
|
|
63
|
+
'./local/path'
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
uri: URI or path string that may contain credentials
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Sanitized URI with credentials masked as ***
|
|
70
|
+
"""
|
|
71
|
+
# Try to parse as URI
|
|
72
|
+
try:
|
|
73
|
+
parsed = urlparse(uri)
|
|
74
|
+
|
|
75
|
+
# If no scheme, it's likely a local path - return as-is
|
|
76
|
+
if not parsed.scheme or parsed.scheme in ("file", "local"):
|
|
77
|
+
return uri
|
|
78
|
+
|
|
79
|
+
# Check if URI contains credentials (username or password)
|
|
80
|
+
if parsed.username or parsed.password:
|
|
81
|
+
# Replace credentials with ***
|
|
82
|
+
username = "***" if parsed.username else ""
|
|
83
|
+
password = "***" if parsed.password else ""
|
|
84
|
+
credentials = f"{username}:{password}@" if username or password else ""
|
|
85
|
+
# Reconstruct netloc without credentials
|
|
86
|
+
host_port = parsed.netloc.split("@")[-1]
|
|
87
|
+
masked_netloc = f"{credentials}{host_port}"
|
|
88
|
+
|
|
89
|
+
# Reconstruct URI with masked credentials
|
|
90
|
+
return urlunparse(
|
|
91
|
+
(
|
|
92
|
+
parsed.scheme,
|
|
93
|
+
masked_netloc,
|
|
94
|
+
parsed.path,
|
|
95
|
+
parsed.params,
|
|
96
|
+
parsed.query,
|
|
97
|
+
parsed.fragment,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
except Exception:
|
|
101
|
+
# If parsing fails, return as-is (likely a local path)
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
return uri
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import narwhals as nw
|
|
4
|
+
from narwhals.typing import Frame
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PolarsMaterializationWarning(Warning):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
@classmethod
|
|
11
|
+
def warn_on_implementation_mismatch(
|
|
12
|
+
cls, expected: nw.Implementation, actual: nw.Implementation, message: str = ""
|
|
13
|
+
):
|
|
14
|
+
if expected != actual:
|
|
15
|
+
warning = f"Narwhals implementation mismatch: native is {expected}, got {actual}. This will lead to materialization into an eager Polars frame."
|
|
16
|
+
|
|
17
|
+
if message:
|
|
18
|
+
warning += f" {message}"
|
|
19
|
+
|
|
20
|
+
warnings.warn(warning, cls, stacklevel=3)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MetaxyColumnMissingWarning(Warning):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def warn_on_missing_column(cls, expected: str, df: Frame, message: str = ""):
|
|
28
|
+
if expected in df.columns:
|
|
29
|
+
return
|
|
30
|
+
else:
|
|
31
|
+
warning = f"Metaxy column missing: expected {expected}, got {df.columns}."
|
|
32
|
+
|
|
33
|
+
if message:
|
|
34
|
+
warning += f" {message}"
|
|
35
|
+
|
|
36
|
+
warnings.warn(warning, cls, stacklevel=3)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Migration system for metadata version updates."""
|
|
2
|
+
|
|
3
|
+
from metaxy.metadata_store.system import SystemTableStorage
|
|
4
|
+
from metaxy.migrations.detector import detect_diff_migration
|
|
5
|
+
from metaxy.migrations.executor import MigrationExecutor
|
|
6
|
+
from metaxy.migrations.models import (
|
|
7
|
+
DiffMigration,
|
|
8
|
+
FullGraphMigration,
|
|
9
|
+
Migration,
|
|
10
|
+
MigrationResult,
|
|
11
|
+
)
|
|
12
|
+
from metaxy.migrations.ops import (
|
|
13
|
+
BaseOperation,
|
|
14
|
+
DataVersionReconciliation,
|
|
15
|
+
MetadataBackfill,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Core migration types
|
|
20
|
+
"Migration",
|
|
21
|
+
"DiffMigration",
|
|
22
|
+
"FullGraphMigration",
|
|
23
|
+
"MigrationResult",
|
|
24
|
+
# Operations (for custom migrations)
|
|
25
|
+
"BaseOperation",
|
|
26
|
+
"DataVersionReconciliation",
|
|
27
|
+
"MetadataBackfill",
|
|
28
|
+
# Migration workflow
|
|
29
|
+
"detect_diff_migration",
|
|
30
|
+
"MigrationExecutor",
|
|
31
|
+
"SystemTableStorage",
|
|
32
|
+
]
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Feature change detection for automatic migration generation."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
from metaxy.graph.diff.differ import GraphDiffer
|
|
8
|
+
from metaxy.migrations.models import DiffMigration, FullGraphMigration
|
|
9
|
+
from metaxy.models.feature import FeatureGraph
|
|
10
|
+
from metaxy.utils.hashing import ensure_hash_compatibility, get_hash_truncation_length
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def detect_diff_migration(
|
|
17
|
+
store: "MetadataStore",
|
|
18
|
+
project: str | None = None,
|
|
19
|
+
from_snapshot_version: str | None = None,
|
|
20
|
+
ops: list[dict[str, Any]] | None = None,
|
|
21
|
+
migrations_dir: Path | None = None,
|
|
22
|
+
name: str | None = None,
|
|
23
|
+
command: str | None = None,
|
|
24
|
+
) -> "DiffMigration | None":
|
|
25
|
+
"""Detect migration needed between snapshots and write YAML file.
|
|
26
|
+
|
|
27
|
+
Compares the latest snapshot in the store (or specified from_snapshot_version)
|
|
28
|
+
with the current active graph to detect changes and generate a migration YAML file.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
store: Metadata store containing snapshot metadata
|
|
32
|
+
project: Project name for filtering snapshots
|
|
33
|
+
from_snapshot_version: Source snapshot version (defaults to latest in store for project)
|
|
34
|
+
ops: List of operation dicts with "type" field (defaults to [{"type": "metaxy.migrations.ops.DataVersionReconciliation"}])
|
|
35
|
+
migrations_dir: Directory to write migration YAML (defaults to .metaxy/migrations/)
|
|
36
|
+
name: Migration name (creates {timestamp}_{name} ID and filename)
|
|
37
|
+
command: CLI command that generated this migration (written as YAML comment)
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
DiffMigration if changes detected and written, None otherwise
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
```py
|
|
44
|
+
# Compare latest snapshot in store vs current graph
|
|
45
|
+
with store:
|
|
46
|
+
migration = detect_diff_migration(store, project="my_project")
|
|
47
|
+
if migration:
|
|
48
|
+
print(f"Migration written to {migration.yaml_path}")
|
|
49
|
+
|
|
50
|
+
```py
|
|
51
|
+
# Use custom operation
|
|
52
|
+
migration = detect_diff_migration(store, project="my_project", ops=[{"type": "myproject.ops.CustomOp"}])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
```py
|
|
56
|
+
# Use custom name
|
|
57
|
+
migration = detect_diff_migration(store, project="my_project", name="example_migration")
|
|
58
|
+
```
|
|
59
|
+
"""
|
|
60
|
+
differ = GraphDiffer()
|
|
61
|
+
|
|
62
|
+
# Get from_snapshot_version (use latest if not specified)
|
|
63
|
+
if from_snapshot_version is None:
|
|
64
|
+
from metaxy.metadata_store.system.storage import SystemTableStorage
|
|
65
|
+
|
|
66
|
+
with store:
|
|
67
|
+
storage = SystemTableStorage(store)
|
|
68
|
+
snapshots = storage.read_graph_snapshots(project=project)
|
|
69
|
+
if snapshots.height == 0:
|
|
70
|
+
# No snapshots in store for this project - nothing to migrate from
|
|
71
|
+
return None
|
|
72
|
+
from_snapshot_version = snapshots["metaxy_snapshot_version"][0]
|
|
73
|
+
|
|
74
|
+
# At this point, from_snapshot_version is guaranteed to be a str
|
|
75
|
+
assert from_snapshot_version is not None # Type narrowing for type checker
|
|
76
|
+
|
|
77
|
+
# Get to_snapshot_version from current active graph
|
|
78
|
+
active_graph = FeatureGraph.get_active()
|
|
79
|
+
if len(active_graph.features_by_key) == 0:
|
|
80
|
+
# No features in active graph - nothing to migrate to
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
to_snapshot_version = active_graph.snapshot_version
|
|
84
|
+
|
|
85
|
+
# Check hash truncation compatibility
|
|
86
|
+
# If truncation is in use, the snapshot versions should be compatible
|
|
87
|
+
# (either exactly equal or one is a truncated version of the other)
|
|
88
|
+
truncation_length = get_hash_truncation_length()
|
|
89
|
+
if truncation_length is not None:
|
|
90
|
+
# When using truncation, we need to check compatibility rather than exact equality
|
|
91
|
+
if ensure_hash_compatibility(from_snapshot_version, to_snapshot_version):
|
|
92
|
+
# Hashes are compatible (same or truncated versions) - no changes
|
|
93
|
+
return None
|
|
94
|
+
else:
|
|
95
|
+
# No truncation - use exact comparison
|
|
96
|
+
if from_snapshot_version == to_snapshot_version:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
# Load snapshot data using GraphDiffer
|
|
100
|
+
try:
|
|
101
|
+
from_snapshot_data = differ.load_snapshot_data(store, from_snapshot_version)
|
|
102
|
+
except ValueError:
|
|
103
|
+
# Snapshot not found - nothing to migrate from
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
# Build snapshot data for to_snapshot (current graph)
|
|
107
|
+
to_snapshot_data = active_graph.to_snapshot()
|
|
108
|
+
|
|
109
|
+
# Compute GraphDiff using GraphDiffer
|
|
110
|
+
graph_diff = differ.diff(
|
|
111
|
+
from_snapshot_data,
|
|
112
|
+
to_snapshot_data,
|
|
113
|
+
from_snapshot_version,
|
|
114
|
+
to_snapshot_version,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Check if there are any changes
|
|
118
|
+
if not graph_diff.has_changes:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
# Generate migration ID (timestamp first for sorting)
|
|
122
|
+
timestamp = datetime.now(timezone.utc)
|
|
123
|
+
timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
|
|
124
|
+
if name is not None:
|
|
125
|
+
migration_id = f"{timestamp_str}_{name}"
|
|
126
|
+
else:
|
|
127
|
+
migration_id = f"{timestamp_str}"
|
|
128
|
+
|
|
129
|
+
# ops is required - caller must specify
|
|
130
|
+
if ops is None:
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"ops parameter is required - must explicitly specify migration operations. "
|
|
133
|
+
"Example: ops=[{'type': 'metaxy.migrations.ops.DataVersionReconciliation'}]"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Default migrations directory
|
|
137
|
+
if migrations_dir is None:
|
|
138
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
139
|
+
|
|
140
|
+
migrations_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
# Find parent migration (latest migration in chain)
|
|
143
|
+
from metaxy.migrations.loader import find_latest_migration
|
|
144
|
+
|
|
145
|
+
parent = find_latest_migration(migrations_dir)
|
|
146
|
+
if parent is None:
|
|
147
|
+
parent = "initial"
|
|
148
|
+
|
|
149
|
+
# Create minimal DiffMigration - affected_features and description are computed on-demand
|
|
150
|
+
migration = DiffMigration(
|
|
151
|
+
migration_id=migration_id,
|
|
152
|
+
created_at=timestamp,
|
|
153
|
+
parent=parent,
|
|
154
|
+
from_snapshot_version=from_snapshot_version,
|
|
155
|
+
to_snapshot_version=to_snapshot_version,
|
|
156
|
+
ops=ops,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Write migration YAML file
|
|
160
|
+
import yaml
|
|
161
|
+
|
|
162
|
+
yaml_path = migrations_dir / f"{migration_id}.yaml"
|
|
163
|
+
migration_yaml = {
|
|
164
|
+
"migration_type": "metaxy.migrations.models.DiffMigration",
|
|
165
|
+
"id": migration.migration_id,
|
|
166
|
+
"created_at": migration.created_at.isoformat(),
|
|
167
|
+
"parent": migration.parent,
|
|
168
|
+
"from_snapshot_version": migration.from_snapshot_version,
|
|
169
|
+
"to_snapshot_version": migration.to_snapshot_version,
|
|
170
|
+
"ops": migration.ops,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
with open(yaml_path, "w") as f:
|
|
174
|
+
# Write command as a comment header if provided
|
|
175
|
+
if command:
|
|
176
|
+
f.write(f"# Generated by: {command}\n")
|
|
177
|
+
yaml.safe_dump(migration_yaml, f, sort_keys=False, default_flow_style=False)
|
|
178
|
+
|
|
179
|
+
return migration
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def generate_full_graph_migration(
|
|
183
|
+
store: "MetadataStore",
|
|
184
|
+
project: str | None = None,
|
|
185
|
+
ops: list[dict[str, Any]] | None = None,
|
|
186
|
+
migrations_dir: Path | None = None,
|
|
187
|
+
name: str | None = None,
|
|
188
|
+
command: str | None = None,
|
|
189
|
+
) -> "FullGraphMigration":
|
|
190
|
+
"""Generate a FullGraphMigration that includes all features in the current graph.
|
|
191
|
+
|
|
192
|
+
Creates a migration YAML file with all feature keys specified in each operation's
|
|
193
|
+
'features' list.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
store: Metadata store (used to push snapshot)
|
|
197
|
+
project: Project name
|
|
198
|
+
ops: List of operation dicts with "type" field
|
|
199
|
+
migrations_dir: Directory to write migration YAML (defaults to .metaxy/migrations/)
|
|
200
|
+
name: Migration name (creates {timestamp}_{name} ID and filename)
|
|
201
|
+
command: CLI command that generated this migration (written as YAML comment)
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
FullGraphMigration with all features
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ValueError: If no features in active graph or ops not provided
|
|
208
|
+
"""
|
|
209
|
+
from metaxy.metadata_store.system.storage import SystemTableStorage
|
|
210
|
+
|
|
211
|
+
# Get active graph
|
|
212
|
+
active_graph = FeatureGraph.get_active()
|
|
213
|
+
if len(active_graph.features_by_key) == 0:
|
|
214
|
+
raise ValueError("No features in active graph")
|
|
215
|
+
|
|
216
|
+
# Get all feature keys in topological order
|
|
217
|
+
all_feature_keys = active_graph.topological_sort_features(
|
|
218
|
+
list(active_graph.features_by_key.keys())
|
|
219
|
+
)
|
|
220
|
+
feature_key_strings = [key.to_string() for key in all_feature_keys]
|
|
221
|
+
|
|
222
|
+
# ops is required
|
|
223
|
+
if ops is None or len(ops) == 0:
|
|
224
|
+
raise ValueError(
|
|
225
|
+
"ops parameter is required - must explicitly specify migration operations. "
|
|
226
|
+
"Example: ops=[{'type': 'myproject.ops.CustomBackfill'}]"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Add features to each operation
|
|
230
|
+
ops_with_features = []
|
|
231
|
+
for op in ops:
|
|
232
|
+
op_copy = dict(op)
|
|
233
|
+
op_copy["features"] = feature_key_strings
|
|
234
|
+
ops_with_features.append(op_copy)
|
|
235
|
+
|
|
236
|
+
# Push snapshot to get the current snapshot version
|
|
237
|
+
with store:
|
|
238
|
+
storage = SystemTableStorage(store)
|
|
239
|
+
snapshot_result = storage.push_graph_snapshot()
|
|
240
|
+
snapshot_version = snapshot_result.snapshot_version
|
|
241
|
+
|
|
242
|
+
# Generate migration ID (timestamp first for sorting)
|
|
243
|
+
timestamp = datetime.now(timezone.utc)
|
|
244
|
+
timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
|
|
245
|
+
if name is not None:
|
|
246
|
+
migration_id = f"{timestamp_str}_{name}"
|
|
247
|
+
else:
|
|
248
|
+
migration_id = f"{timestamp_str}"
|
|
249
|
+
|
|
250
|
+
# Default migrations directory
|
|
251
|
+
if migrations_dir is None:
|
|
252
|
+
migrations_dir = Path(".metaxy/migrations")
|
|
253
|
+
|
|
254
|
+
migrations_dir.mkdir(parents=True, exist_ok=True)
|
|
255
|
+
|
|
256
|
+
# Find parent migration (latest migration in chain)
|
|
257
|
+
from metaxy.migrations.loader import find_latest_migration
|
|
258
|
+
|
|
259
|
+
parent = find_latest_migration(migrations_dir)
|
|
260
|
+
if parent is None:
|
|
261
|
+
parent = "initial"
|
|
262
|
+
|
|
263
|
+
# Create FullGraphMigration
|
|
264
|
+
migration = FullGraphMigration(
|
|
265
|
+
migration_id=migration_id,
|
|
266
|
+
created_at=timestamp,
|
|
267
|
+
parent=parent,
|
|
268
|
+
snapshot_version=snapshot_version,
|
|
269
|
+
ops=ops_with_features,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Write migration YAML file
|
|
273
|
+
import yaml
|
|
274
|
+
|
|
275
|
+
yaml_path = migrations_dir / f"{migration_id}.yaml"
|
|
276
|
+
migration_yaml = {
|
|
277
|
+
"migration_type": "metaxy.migrations.models.FullGraphMigration",
|
|
278
|
+
"id": migration.migration_id,
|
|
279
|
+
"created_at": migration.created_at.isoformat(),
|
|
280
|
+
"parent": migration.parent,
|
|
281
|
+
"snapshot_version": migration.snapshot_version,
|
|
282
|
+
"ops": migration.ops,
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
with open(yaml_path, "w") as f:
|
|
286
|
+
# Write command as a comment header if provided
|
|
287
|
+
if command:
|
|
288
|
+
f.write(f"# Generated by: {command}\n")
|
|
289
|
+
yaml.safe_dump(migration_yaml, f, sort_keys=False, default_flow_style=False)
|
|
290
|
+
|
|
291
|
+
return migration
|