metaxy 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metaxy might be problematic. Click here for more details.

Files changed (75) hide show
  1. metaxy/__init__.py +61 -0
  2. metaxy/_testing.py +542 -0
  3. metaxy/_utils.py +16 -0
  4. metaxy/_version.py +1 -0
  5. metaxy/cli/app.py +76 -0
  6. metaxy/cli/context.py +71 -0
  7. metaxy/cli/graph.py +576 -0
  8. metaxy/cli/graph_diff.py +290 -0
  9. metaxy/cli/list.py +42 -0
  10. metaxy/cli/metadata.py +271 -0
  11. metaxy/cli/migrations.py +862 -0
  12. metaxy/cli/push.py +55 -0
  13. metaxy/config.py +450 -0
  14. metaxy/data_versioning/__init__.py +24 -0
  15. metaxy/data_versioning/calculators/__init__.py +13 -0
  16. metaxy/data_versioning/calculators/base.py +97 -0
  17. metaxy/data_versioning/calculators/duckdb.py +186 -0
  18. metaxy/data_versioning/calculators/ibis.py +225 -0
  19. metaxy/data_versioning/calculators/polars.py +135 -0
  20. metaxy/data_versioning/diff/__init__.py +15 -0
  21. metaxy/data_versioning/diff/base.py +150 -0
  22. metaxy/data_versioning/diff/narwhals.py +108 -0
  23. metaxy/data_versioning/hash_algorithms.py +19 -0
  24. metaxy/data_versioning/joiners/__init__.py +9 -0
  25. metaxy/data_versioning/joiners/base.py +70 -0
  26. metaxy/data_versioning/joiners/narwhals.py +235 -0
  27. metaxy/entrypoints.py +309 -0
  28. metaxy/ext/__init__.py +1 -0
  29. metaxy/ext/alembic.py +326 -0
  30. metaxy/ext/sqlmodel.py +172 -0
  31. metaxy/ext/sqlmodel_system_tables.py +139 -0
  32. metaxy/graph/__init__.py +21 -0
  33. metaxy/graph/diff/__init__.py +21 -0
  34. metaxy/graph/diff/diff_models.py +399 -0
  35. metaxy/graph/diff/differ.py +740 -0
  36. metaxy/graph/diff/models.py +418 -0
  37. metaxy/graph/diff/rendering/__init__.py +18 -0
  38. metaxy/graph/diff/rendering/base.py +274 -0
  39. metaxy/graph/diff/rendering/cards.py +188 -0
  40. metaxy/graph/diff/rendering/formatter.py +805 -0
  41. metaxy/graph/diff/rendering/graphviz.py +246 -0
  42. metaxy/graph/diff/rendering/mermaid.py +320 -0
  43. metaxy/graph/diff/rendering/rich.py +165 -0
  44. metaxy/graph/diff/rendering/theme.py +48 -0
  45. metaxy/graph/diff/traversal.py +247 -0
  46. metaxy/graph/utils.py +58 -0
  47. metaxy/metadata_store/__init__.py +31 -0
  48. metaxy/metadata_store/_protocols.py +38 -0
  49. metaxy/metadata_store/base.py +1676 -0
  50. metaxy/metadata_store/clickhouse.py +161 -0
  51. metaxy/metadata_store/duckdb.py +167 -0
  52. metaxy/metadata_store/exceptions.py +43 -0
  53. metaxy/metadata_store/ibis.py +451 -0
  54. metaxy/metadata_store/memory.py +228 -0
  55. metaxy/metadata_store/sqlite.py +187 -0
  56. metaxy/metadata_store/system_tables.py +257 -0
  57. metaxy/migrations/__init__.py +34 -0
  58. metaxy/migrations/detector.py +153 -0
  59. metaxy/migrations/executor.py +208 -0
  60. metaxy/migrations/loader.py +260 -0
  61. metaxy/migrations/models.py +718 -0
  62. metaxy/migrations/ops.py +390 -0
  63. metaxy/models/__init__.py +0 -0
  64. metaxy/models/bases.py +6 -0
  65. metaxy/models/constants.py +24 -0
  66. metaxy/models/feature.py +665 -0
  67. metaxy/models/feature_spec.py +105 -0
  68. metaxy/models/field.py +25 -0
  69. metaxy/models/plan.py +155 -0
  70. metaxy/models/types.py +157 -0
  71. metaxy/py.typed +0 -0
  72. metaxy-0.0.0.dist-info/METADATA +247 -0
  73. metaxy-0.0.0.dist-info/RECORD +75 -0
  74. metaxy-0.0.0.dist-info/WHEEL +4 -0
  75. metaxy-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,208 @@
1
+ """Migration executor using event-based tracking and GraphWalker.
2
+
3
+ This is the new executor that replaces the old 3-table system with a single
4
+ event-based system stored in system tables via SystemTableStorage.
5
+ """
6
+
7
+ from datetime import datetime, timezone
8
+ from typing import TYPE_CHECKING
9
+
10
+ from metaxy.migrations.ops import DataVersionReconciliation
11
+
12
+ if TYPE_CHECKING:
13
+ from metaxy.metadata_store.base import MetadataStore
14
+ from metaxy.metadata_store.system_tables import SystemTableStorage
15
+ from metaxy.migrations.models import (
16
+ DiffMigration,
17
+ FullGraphMigration,
18
+ Migration,
19
+ MigrationResult,
20
+ )
21
+
22
+
23
+ class MigrationExecutor:
24
+ """Executes migrations with event-based progress tracking.
25
+
26
+ Uses GraphWalker for topological traversal and SystemTableStorage for
27
+ event logging. Supports resumability after failures.
28
+ """
29
+
30
+ def __init__(self, storage: "SystemTableStorage"):
31
+ """Initialize executor.
32
+
33
+ Args:
34
+ storage: System table storage for event logging
35
+ """
36
+ self.storage = storage
37
+
38
+ def execute(
39
+ self, migration: "Migration", store: "MetadataStore", *, dry_run: bool = False
40
+ ) -> "MigrationResult":
41
+ """Execute migration with event logging and resumability.
42
+
43
+ Process:
44
+ 1. Log migration_started event
45
+ 2. Get features to process from migration
46
+ 3. Use GraphWalker to get topological order
47
+ 4. For each feature:
48
+ - Check if already completed (resume support)
49
+ - Log feature_started
50
+ - Execute migration logic
51
+ - Log feature_completed/failed
52
+ 5. Log migration_completed/failed
53
+
54
+ Args:
55
+ migration: Migration to execute
56
+ store: Metadata store to operate on
57
+ dry_run: If True, only validate without executing
58
+
59
+ Returns:
60
+ MigrationResult with execution details
61
+
62
+ Raises:
63
+ Exception: If migration fails and cannot continue
64
+ """
65
+ # Import here to avoid circular dependency
66
+ from metaxy.migrations.models import DiffMigration, FullGraphMigration
67
+
68
+ # Delegate to migration's execute method (which uses this executor internally)
69
+ if isinstance(migration, DiffMigration):
70
+ return self._execute_diff_migration(migration, store, dry_run=dry_run)
71
+ elif isinstance(migration, FullGraphMigration):
72
+ return self._execute_full_graph_migration(migration, store, dry_run=dry_run)
73
+ else:
74
+ # CustomMigration - call its execute method directly
75
+ return migration.execute(store, dry_run=dry_run)
76
+
77
+ def _execute_diff_migration(
78
+ self, migration: "DiffMigration", store: "MetadataStore", dry_run: bool
79
+ ) -> "MigrationResult":
80
+ """Execute DiffMigration using GraphWalker.
81
+
82
+ Args:
83
+ migration: DiffMigration to execute
84
+ store: Metadata store
85
+ dry_run: If True, only validate
86
+
87
+ Returns:
88
+ MigrationResult
89
+ """
90
+ from metaxy.migrations.models import MigrationResult
91
+
92
+ start_time = datetime.now(timezone.utc)
93
+
94
+ # Note: GraphDiff is not needed for execution
95
+ # It can be computed on-demand via migration.compute_graph_diff(store) if needed
96
+
97
+ # Write migration_started event
98
+ if not dry_run:
99
+ self.storage.write_event(migration.migration_id, "started")
100
+
101
+ affected_features = []
102
+ errors = {}
103
+ rows_affected_total = 0
104
+
105
+ # Get affected features (computed on-demand for DiffMigration)
106
+ affected_features_to_process = migration.get_affected_features(store)
107
+
108
+ # Execute for each affected feature in topological order
109
+ for feature_key_str in affected_features_to_process:
110
+ # Check if already completed (resume support)
111
+ if not dry_run and self.storage.is_feature_completed(
112
+ migration.migration_id, feature_key_str
113
+ ):
114
+ affected_features.append(feature_key_str)
115
+ continue
116
+
117
+ # Log feature_started
118
+ if not dry_run:
119
+ self.storage.write_event(
120
+ migration.migration_id,
121
+ "feature_started",
122
+ feature_key=feature_key_str,
123
+ )
124
+
125
+ try:
126
+ # Execute data version reconciliation for this feature
127
+ op = DataVersionReconciliation()
128
+
129
+ rows_affected = op.execute_for_feature(
130
+ store,
131
+ feature_key_str,
132
+ from_snapshot_version=migration.from_snapshot_version,
133
+ to_snapshot_version=migration.to_snapshot_version,
134
+ dry_run=dry_run,
135
+ )
136
+
137
+ # Log feature_completed
138
+ if not dry_run:
139
+ self.storage.write_event(
140
+ migration.migration_id,
141
+ "feature_completed",
142
+ feature_key=feature_key_str,
143
+ rows_affected=rows_affected,
144
+ )
145
+
146
+ affected_features.append(feature_key_str)
147
+ rows_affected_total += rows_affected
148
+
149
+ except Exception as e:
150
+ error_msg = str(e)
151
+ errors[feature_key_str] = error_msg
152
+
153
+ # Log feature_failed
154
+ if not dry_run:
155
+ self.storage.write_event(
156
+ migration.migration_id,
157
+ "feature_completed",
158
+ feature_key=feature_key_str,
159
+ error_message=error_msg,
160
+ )
161
+
162
+ continue
163
+
164
+ # Determine status
165
+ if dry_run:
166
+ status = "skipped"
167
+ elif len(errors) == 0:
168
+ status = "completed"
169
+ if not dry_run:
170
+ self.storage.write_event(migration.migration_id, "completed")
171
+ else:
172
+ status = "failed"
173
+ if not dry_run:
174
+ self.storage.write_event(migration.migration_id, "failed")
175
+
176
+ duration = (datetime.now(timezone.utc) - start_time).total_seconds()
177
+
178
+ return MigrationResult(
179
+ migration_id=migration.migration_id,
180
+ status=status,
181
+ features_completed=len(affected_features),
182
+ features_failed=len(errors),
183
+ affected_features=affected_features,
184
+ errors=errors,
185
+ rows_affected=rows_affected_total,
186
+ duration_seconds=duration,
187
+ timestamp=start_time,
188
+ )
189
+
190
+ def _execute_full_graph_migration(
191
+ self,
192
+ migration: "FullGraphMigration",
193
+ store: "MetadataStore",
194
+ dry_run: bool,
195
+ ) -> "MigrationResult":
196
+ """Execute FullGraphMigration.
197
+
198
+ Args:
199
+ migration: FullGraphMigration to execute
200
+ store: Metadata store
201
+ dry_run: If True, only validate
202
+
203
+ Returns:
204
+ MigrationResult
205
+ """
206
+ # FullGraphMigration has custom execute logic in the subclass
207
+ # Base implementation is a no-op
208
+ return migration.execute(store, dry_run=dry_run)
@@ -0,0 +1,260 @@
1
+ """Load migrations from YAML files."""
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from metaxy.migrations.models import DiffMigration
9
+
10
+
11
+ def load_migration_from_yaml(yaml_path: Path) -> "DiffMigration":
12
+ """Load migration from YAML file.
13
+
14
+ Args:
15
+ yaml_path: Path to migration YAML file
16
+
17
+ Returns:
18
+ DiffMigration instance
19
+
20
+ Raises:
21
+ FileNotFoundError: If YAML file doesn't exist
22
+ ValueError: If YAML is invalid
23
+ """
24
+ import yaml
25
+
26
+ from metaxy.migrations.models import DiffMigration
27
+
28
+ if not yaml_path.exists():
29
+ raise FileNotFoundError(f"Migration YAML not found: {yaml_path}")
30
+
31
+ with open(yaml_path) as f:
32
+ data = yaml.safe_load(f)
33
+
34
+ # Extract migration ID from YAML
35
+ migration_id = data["id"]
36
+
37
+ # Parse timestamp from YAML, migration ID, or file modification time
38
+ if "created_at" in data:
39
+ # Read from YAML (preferred)
40
+ created_at_str = data["created_at"]
41
+ if isinstance(created_at_str, str):
42
+ created_at = datetime.fromisoformat(created_at_str)
43
+ else:
44
+ # Already a datetime object
45
+ created_at = created_at_str
46
+ else:
47
+ # Fallback: try to parse from migration ID or use file mtime
48
+ try:
49
+ timestamp_str = migration_id.replace("migration_", "")
50
+ created_at = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
51
+ except ValueError:
52
+ # Fallback to file modification time
53
+ created_at = datetime.fromtimestamp(yaml_path.stat().st_mtime)
54
+
55
+ # Get parent (default to "initial" if not specified for backwards compatibility)
56
+ parent = data.get("parent", "initial")
57
+
58
+ # Create migration instance - ops is required
59
+ if "ops" not in data:
60
+ raise ValueError(
61
+ f"Migration YAML missing required 'ops' field: {yaml_path}. "
62
+ "Migrations must explicitly specify operations. "
63
+ "Example: ops: [{type: metaxy.migrations.ops.DataVersionReconciliation}]"
64
+ )
65
+
66
+ migration = DiffMigration(
67
+ migration_id=migration_id,
68
+ created_at=created_at,
69
+ parent=parent,
70
+ from_snapshot_version=data["from_snapshot_version"],
71
+ to_snapshot_version=data["to_snapshot_version"],
72
+ ops=data["ops"],
73
+ )
74
+
75
+ return migration
76
+
77
+
78
+ def find_migration_yaml(migration_id: str, migrations_dir: Path | None = None) -> Path:
79
+ """Find YAML file for a migration ID by searching all YAML files.
80
+
81
+ Args:
82
+ migration_id: Migration ID (e.g., "20250127_120000" or "20250127_120000_feature_update")
83
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
84
+
85
+ Returns:
86
+ Path to migration YAML file
87
+
88
+ Raises:
89
+ FileNotFoundError: If migration YAML not found
90
+ """
91
+ if migrations_dir is None:
92
+ migrations_dir = Path(".metaxy/migrations")
93
+
94
+ if not migrations_dir.exists():
95
+ raise FileNotFoundError(
96
+ f"Migration '{migration_id}' not found. "
97
+ f"Migrations directory does not exist: {migrations_dir}"
98
+ )
99
+
100
+ # Search through all YAML files to find the one with matching ID
101
+ for yaml_file in migrations_dir.glob("*.yaml"):
102
+ try:
103
+ migration = load_migration_from_yaml(yaml_file)
104
+ if migration.migration_id == migration_id:
105
+ return yaml_file
106
+ except Exception:
107
+ # Skip files that can't be loaded
108
+ continue
109
+
110
+ # Not found - list available migrations
111
+ available = []
112
+ for yaml_file in migrations_dir.glob("*.yaml"):
113
+ try:
114
+ migration = load_migration_from_yaml(yaml_file)
115
+ available.append(migration.migration_id)
116
+ except Exception:
117
+ continue
118
+
119
+ raise FileNotFoundError(
120
+ f"Migration '{migration_id}' not found in {migrations_dir}.\n"
121
+ f"Available migrations: {available}"
122
+ )
123
+
124
+
125
+ def list_migrations(migrations_dir: Path | None = None) -> list[str]:
126
+ """List all available migration IDs.
127
+
128
+ Args:
129
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
130
+
131
+ Returns:
132
+ List of migration IDs sorted by creation time
133
+ """
134
+ if migrations_dir is None:
135
+ migrations_dir = Path(".metaxy/migrations")
136
+
137
+ if not migrations_dir.exists():
138
+ return []
139
+
140
+ yaml_files = sorted(migrations_dir.glob("*.yaml"))
141
+ return [f.stem for f in yaml_files]
142
+
143
+
144
+ def find_latest_migration(migrations_dir: Path | None = None) -> str | None:
145
+ """Find the latest migration ID (head of the chain).
146
+
147
+ Args:
148
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
149
+
150
+ Returns:
151
+ Migration ID of the head, or None if no migrations exist
152
+
153
+ Raises:
154
+ ValueError: If multiple heads detected (conflict)
155
+ """
156
+ if migrations_dir is None:
157
+ migrations_dir = Path(".metaxy/migrations")
158
+
159
+ if not migrations_dir.exists():
160
+ return None
161
+
162
+ # Load all migrations
163
+ migrations: dict[str, DiffMigration] = {}
164
+ for yaml_file in migrations_dir.glob("*.yaml"):
165
+ migration = load_migration_from_yaml(yaml_file)
166
+ migrations[migration.migration_id] = migration
167
+
168
+ if not migrations:
169
+ return None
170
+
171
+ # Find migrations that are parents of others
172
+ all_parents = {m.parent for m in migrations.values() if m.parent != "initial"}
173
+
174
+ # Find heads (migrations that are not parents of any other migration)
175
+ heads = [mid for mid in migrations.keys() if mid not in all_parents]
176
+
177
+ if len(heads) == 0:
178
+ # This means there's a cycle or orphaned migrations
179
+ raise ValueError(
180
+ "No head migration found - possible cycle in migration chain. "
181
+ f"All migrations: {list(migrations.keys())}"
182
+ )
183
+
184
+ if len(heads) > 1:
185
+ raise ValueError(
186
+ f"Multiple migration heads detected: {heads}. "
187
+ "This usually means two migrations were created in parallel. "
188
+ "Please merge them by creating a new migration that depends on one head, "
189
+ "or delete one of the conflicting migrations."
190
+ )
191
+
192
+ return heads[0]
193
+
194
+
195
+ def build_migration_chain(
196
+ migrations_dir: Path | None = None,
197
+ ) -> list["DiffMigration"]:
198
+ """Build ordered migration chain from parent IDs.
199
+
200
+ Args:
201
+ migrations_dir: Directory containing migrations (defaults to .metaxy/migrations/)
202
+
203
+ Returns:
204
+ List of migrations in order from oldest to newest
205
+
206
+ Raises:
207
+ ValueError: If chain is invalid (cycles, orphans, multiple heads)
208
+ """
209
+ if migrations_dir is None:
210
+ migrations_dir = Path(".metaxy/migrations")
211
+
212
+ if not migrations_dir.exists():
213
+ return []
214
+
215
+ # Load all migrations
216
+ migrations: dict[str, DiffMigration] = {}
217
+ for yaml_file in sorted(migrations_dir.glob("*.yaml")):
218
+ migration = load_migration_from_yaml(yaml_file)
219
+ migrations[migration.migration_id] = migration
220
+
221
+ if not migrations:
222
+ return []
223
+
224
+ # Validate single head
225
+ head_id = find_latest_migration(migrations_dir)
226
+ if head_id is None:
227
+ return []
228
+
229
+ # Build chain by following parent links backwards
230
+ chain = []
231
+ current_id: str | None = head_id
232
+
233
+ visited = set()
234
+ while current_id is not None and current_id != "initial":
235
+ if current_id in visited:
236
+ raise ValueError(f"Cycle detected in migration chain at: {current_id}")
237
+
238
+ if current_id not in migrations:
239
+ raise ValueError(
240
+ f"Migration '{current_id}' referenced as parent but YAML not found. "
241
+ f"Available migrations: {list(migrations.keys())}"
242
+ )
243
+
244
+ visited.add(current_id)
245
+ migration = migrations[current_id]
246
+ chain.append(migration)
247
+ current_id = migration.parent
248
+
249
+ # Reverse to get oldest-first order
250
+ chain.reverse()
251
+
252
+ # Validate all migrations are in the chain (no orphans)
253
+ if len(chain) != len(migrations):
254
+ orphans = set(migrations.keys()) - set(m.migration_id for m in chain)
255
+ raise ValueError(
256
+ f"Orphaned migrations detected (not in main chain): {orphans}. "
257
+ "Each migration must have parent pointing to previous migration or 'initial'."
258
+ )
259
+
260
+ return chain