metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
import textwrap
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from metaxy.config import MetaxyConfig
|
|
14
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
15
|
+
from metaxy.models.feature import FeatureGraph
|
|
16
|
+
from metaxy.models.feature_spec import (
|
|
17
|
+
FeatureSpecWithIDColumns,
|
|
18
|
+
)
|
|
19
|
+
from metaxy.versioning.types import HashAlgorithm
|
|
20
|
+
|
|
21
|
+
DEFAULT_ID_COLUMNS = ["sample_uid"]
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"TempFeatureModule",
|
|
25
|
+
"assert_all_results_equal",
|
|
26
|
+
"HashAlgorithmCases",
|
|
27
|
+
"MetaxyProject",
|
|
28
|
+
"ExternalMetaxyProject",
|
|
29
|
+
"TempMetaxyProject", # Backward compatibility alias
|
|
30
|
+
"DEFAULT_ID_COLUMNS",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TempFeatureModule:
|
|
35
|
+
"""Helper to create temporary Python modules with feature definitions.
|
|
36
|
+
|
|
37
|
+
This allows features to be importable by historical graph reconstruction.
|
|
38
|
+
The same import path (e.g., 'temp_features.Upstream') can be used across
|
|
39
|
+
different feature versions by overwriting the module file.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, module_name: str = "temp_test_features"):
|
|
43
|
+
self.temp_dir = tempfile.mkdtemp(prefix="metaxy_test_")
|
|
44
|
+
self.module_name = module_name
|
|
45
|
+
self.module_path = Path(self.temp_dir) / f"{module_name}.py"
|
|
46
|
+
|
|
47
|
+
# Add to sys.path so module can be imported
|
|
48
|
+
sys.path.insert(0, self.temp_dir)
|
|
49
|
+
|
|
50
|
+
def write_features(self, feature_specs: dict[str, FeatureSpecWithIDColumns]):
|
|
51
|
+
"""Write feature classes to the module file.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
feature_specs: Dict mapping class names to FeatureSpec objects
|
|
55
|
+
"""
|
|
56
|
+
code_lines = [
|
|
57
|
+
"# Auto-generated test feature module",
|
|
58
|
+
"from metaxy import BaseFeature as Feature, FeatureSpec, FieldSpec, FieldKey, FeatureDep, FeatureKey, FieldDep, SpecialFieldDep",
|
|
59
|
+
"from metaxy._testing.models import SampleFeatureSpec",
|
|
60
|
+
"from metaxy.models.feature import FeatureGraph",
|
|
61
|
+
"",
|
|
62
|
+
"# Use a dedicated graph for this temp module",
|
|
63
|
+
"_graph = FeatureGraph()",
|
|
64
|
+
"",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
for class_name, spec in feature_specs.items():
|
|
68
|
+
# Generate the spec definition
|
|
69
|
+
spec_dict = spec.model_dump(mode="python")
|
|
70
|
+
spec_class_name = spec.__class__.__name__
|
|
71
|
+
spec_repr = self._generate_spec_repr(
|
|
72
|
+
spec_dict, spec_class_name=spec_class_name
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
code_lines.extend(
|
|
76
|
+
[
|
|
77
|
+
f"# Define {class_name} in the temp graph context",
|
|
78
|
+
"with _graph.use():",
|
|
79
|
+
f" class {class_name}(",
|
|
80
|
+
" Feature,",
|
|
81
|
+
f" spec={spec_repr}",
|
|
82
|
+
" ):",
|
|
83
|
+
" pass",
|
|
84
|
+
"",
|
|
85
|
+
]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Write the file
|
|
89
|
+
self.module_path.write_text("\n".join(code_lines))
|
|
90
|
+
|
|
91
|
+
# Reload module if it was already imported
|
|
92
|
+
if self.module_name in sys.modules:
|
|
93
|
+
importlib.reload(sys.modules[self.module_name])
|
|
94
|
+
|
|
95
|
+
def _generate_spec_repr(
|
|
96
|
+
self, spec_dict: dict[str, Any], spec_class_name: str = "FeatureSpec"
|
|
97
|
+
) -> str:
|
|
98
|
+
"""Generate FeatureSpec constructor call from dict.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
spec_dict: Dictionary representation of the spec
|
|
102
|
+
spec_class_name: Name of the spec class to use (e.g., "SampleFeatureSpec", "FeatureSpec")
|
|
103
|
+
"""
|
|
104
|
+
# This is a simple representation - could be made more robust
|
|
105
|
+
parts = []
|
|
106
|
+
|
|
107
|
+
# key
|
|
108
|
+
key = spec_dict["key"]
|
|
109
|
+
parts.append(f"key=FeatureKey({key!r})")
|
|
110
|
+
|
|
111
|
+
# deps
|
|
112
|
+
deps = spec_dict.get("deps") or []
|
|
113
|
+
deps_repr = [f"FeatureDep(feature=FeatureKey({d['feature']!r}))" for d in deps]
|
|
114
|
+
parts.append(f"deps=[{', '.join(deps_repr)}]")
|
|
115
|
+
|
|
116
|
+
# fields
|
|
117
|
+
fields = spec_dict.get("fields", [])
|
|
118
|
+
if fields:
|
|
119
|
+
field_reprs = []
|
|
120
|
+
for c in fields:
|
|
121
|
+
c_parts = [
|
|
122
|
+
f"key=FieldKey({c['key']!r})",
|
|
123
|
+
f"code_version={c['code_version']!r}",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
# Handle deps
|
|
127
|
+
deps_val = c.get("deps")
|
|
128
|
+
if deps_val == "__METAXY_ALL_DEP__":
|
|
129
|
+
c_parts.append("deps=SpecialFieldDep.ALL")
|
|
130
|
+
elif isinstance(deps_val, list) and deps_val:
|
|
131
|
+
# Field deps (list of FieldDep)
|
|
132
|
+
cdeps: list[str] = [] # type: ignore[misc]
|
|
133
|
+
for cd in deps_val:
|
|
134
|
+
fields_val = cd.get("fields")
|
|
135
|
+
if fields_val == "__METAXY_ALL_DEP__":
|
|
136
|
+
cdeps.append( # type: ignore[arg-type]
|
|
137
|
+
f"FieldDep(feature=FeatureKey({cd['feature']!r}), fields=SpecialFieldDep.ALL)"
|
|
138
|
+
)
|
|
139
|
+
else:
|
|
140
|
+
# Build list of FieldKey objects
|
|
141
|
+
field_keys = [f"FieldKey({k!r})" for k in fields_val]
|
|
142
|
+
cdeps.append(
|
|
143
|
+
f"FieldDep(feature=FeatureKey({cd['feature']!r}), fields=[{', '.join(field_keys)}])"
|
|
144
|
+
)
|
|
145
|
+
c_parts.append(f"deps=[{', '.join(cdeps)}]")
|
|
146
|
+
|
|
147
|
+
field_reprs.append(f"FieldSpec({', '.join(c_parts)})") # type: ignore[arg-type]
|
|
148
|
+
|
|
149
|
+
parts.append(f"fields=[{', '.join(field_reprs)}]")
|
|
150
|
+
|
|
151
|
+
# Note: id_columns is handled by the concrete spec class (SampleFeatureSpec has default)
|
|
152
|
+
# so we don't need to include it here explicitly
|
|
153
|
+
|
|
154
|
+
return f"{spec_class_name}({', '.join(parts)})"
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def graph(self) -> FeatureGraph:
|
|
158
|
+
"""Get the FeatureGraph from the temp module.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
The _graph instance from the imported module
|
|
162
|
+
"""
|
|
163
|
+
# Import the module to get its _graph
|
|
164
|
+
module = importlib.import_module(self.module_name)
|
|
165
|
+
return module._graph
|
|
166
|
+
|
|
167
|
+
def cleanup(self):
|
|
168
|
+
"""Remove temp directory and module from sys.path.
|
|
169
|
+
|
|
170
|
+
NOTE: Don't call this until the test session is completely done,
|
|
171
|
+
as historical graph loading may need to import from these modules.
|
|
172
|
+
"""
|
|
173
|
+
if self.temp_dir in sys.path:
|
|
174
|
+
sys.path.remove(self.temp_dir)
|
|
175
|
+
|
|
176
|
+
# Remove from sys.modules
|
|
177
|
+
if self.module_name in sys.modules:
|
|
178
|
+
del sys.modules[self.module_name]
|
|
179
|
+
|
|
180
|
+
# Delete temp directory
|
|
181
|
+
import shutil
|
|
182
|
+
|
|
183
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def assert_all_results_equal(results: dict[str, Any], snapshot=None) -> None:
|
|
187
|
+
"""Compare all results from different store type combinations.
|
|
188
|
+
|
|
189
|
+
Ensures all variants produce identical results, then optionally snapshots all results.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
results: Dict mapping store_type to result data
|
|
193
|
+
snapshot: Optional syrupy snapshot fixture to record all results
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
AssertionError: If any variants produce different results
|
|
197
|
+
"""
|
|
198
|
+
if not results:
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
# Get all result values as a list
|
|
202
|
+
all_results = list(results.items())
|
|
203
|
+
reference_key, reference_result = all_results[0]
|
|
204
|
+
|
|
205
|
+
# Compare each result to the reference
|
|
206
|
+
for key, result in all_results[1:]:
|
|
207
|
+
assert result == reference_result, (
|
|
208
|
+
f"{key} produced different results than {reference_key}:\n"
|
|
209
|
+
f"Expected: {reference_result}\n"
|
|
210
|
+
f"Got: {result}"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Snapshot ALL results if snapshot provided
|
|
214
|
+
# Sort by keys to ensure deterministic ordering across test runs
|
|
215
|
+
if snapshot is not None:
|
|
216
|
+
sorted_results = dict(sorted(results.items()))
|
|
217
|
+
assert sorted_results == snapshot
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class HashAlgorithmCases:
|
|
221
|
+
"""Test cases for different hash algorithms."""
|
|
222
|
+
|
|
223
|
+
def case_xxhash64(self) -> HashAlgorithm:
|
|
224
|
+
"""xxHash64 algorithm."""
|
|
225
|
+
return HashAlgorithm.XXHASH64
|
|
226
|
+
|
|
227
|
+
def case_xxhash32(self) -> HashAlgorithm:
|
|
228
|
+
"""xxHash32 algorithm."""
|
|
229
|
+
return HashAlgorithm.XXHASH32
|
|
230
|
+
|
|
231
|
+
def case_wyhash(self) -> HashAlgorithm:
|
|
232
|
+
"""WyHash algorithm."""
|
|
233
|
+
return HashAlgorithm.WYHASH
|
|
234
|
+
|
|
235
|
+
def case_sha256(self) -> HashAlgorithm:
|
|
236
|
+
"""SHA256 algorithm."""
|
|
237
|
+
return HashAlgorithm.SHA256
|
|
238
|
+
|
|
239
|
+
def case_md5(self) -> HashAlgorithm:
|
|
240
|
+
"""MD5 algorithm."""
|
|
241
|
+
return HashAlgorithm.MD5
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class MetaxyProject:
|
|
245
|
+
"""Base class for Metaxy projects.
|
|
246
|
+
|
|
247
|
+
Provides common functionality for running CLI commands with proper
|
|
248
|
+
environment setup and accessing project configuration.
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
def __init__(self, project_dir: Path):
|
|
252
|
+
"""Initialize a Metaxy project.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
project_dir: Path to project directory containing metaxy.toml
|
|
256
|
+
"""
|
|
257
|
+
self.project_dir = Path(project_dir)
|
|
258
|
+
|
|
259
|
+
def run_cli(
|
|
260
|
+
self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
|
|
261
|
+
):
|
|
262
|
+
"""Run CLI command with proper environment setup.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
*args: CLI command arguments (e.g., "graph", "push")
|
|
266
|
+
check: If True (default), raises CalledProcessError on non-zero exit
|
|
267
|
+
env: Optional dict of additional environment variables
|
|
268
|
+
**kwargs: Additional arguments to pass to subprocess.run()
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
subprocess.CompletedProcess: Result of the CLI command
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
subprocess.CalledProcessError: If check=True and command fails
|
|
275
|
+
|
|
276
|
+
Example:
|
|
277
|
+
```py
|
|
278
|
+
result = project.run_cli("graph", "history", "--limit", "5")
|
|
279
|
+
print(result.stdout)
|
|
280
|
+
```
|
|
281
|
+
"""
|
|
282
|
+
# Start with current environment
|
|
283
|
+
cmd_env = os.environ.copy()
|
|
284
|
+
|
|
285
|
+
# Add project directory to PYTHONPATH so modules can be imported
|
|
286
|
+
pythonpath = str(self.project_dir)
|
|
287
|
+
if "PYTHONPATH" in cmd_env:
|
|
288
|
+
pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
|
|
289
|
+
cmd_env["PYTHONPATH"] = pythonpath
|
|
290
|
+
|
|
291
|
+
# Apply additional env overrides
|
|
292
|
+
if env:
|
|
293
|
+
cmd_env.update(env)
|
|
294
|
+
|
|
295
|
+
# Run CLI command
|
|
296
|
+
try:
|
|
297
|
+
result = subprocess.run(
|
|
298
|
+
[sys.executable, "-m", "metaxy.cli.app", *args],
|
|
299
|
+
cwd=str(self.project_dir),
|
|
300
|
+
capture_output=True,
|
|
301
|
+
text=True,
|
|
302
|
+
env=cmd_env,
|
|
303
|
+
check=check,
|
|
304
|
+
**kwargs,
|
|
305
|
+
)
|
|
306
|
+
except subprocess.CalledProcessError as e:
|
|
307
|
+
# Re-raise with stderr output for better debugging
|
|
308
|
+
error_msg = f"CLI command failed: {' '.join(args)}\n"
|
|
309
|
+
error_msg += f"Exit code: {e.returncode}\n"
|
|
310
|
+
if e.stdout:
|
|
311
|
+
error_msg += f"STDOUT:\n{e.stdout}\n"
|
|
312
|
+
if e.stderr:
|
|
313
|
+
error_msg += f"STDERR:\n{e.stderr}\n"
|
|
314
|
+
raise RuntimeError(error_msg) from e
|
|
315
|
+
|
|
316
|
+
return result
|
|
317
|
+
|
|
318
|
+
@cached_property
|
|
319
|
+
def config(self) -> MetaxyConfig:
|
|
320
|
+
"""Load configuration from project's metaxy.toml."""
|
|
321
|
+
return MetaxyConfig.load(self.project_dir / "metaxy.toml")
|
|
322
|
+
|
|
323
|
+
@cached_property
|
|
324
|
+
def stores(self) -> dict[str, MetadataStore]:
|
|
325
|
+
"""Get all configured stores from project config."""
|
|
326
|
+
return {k: self.config.get_store(k) for k in self.config.stores}
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class ExternalMetaxyProject(MetaxyProject):
|
|
330
|
+
"""Helper for working with existing Metaxy projects.
|
|
331
|
+
|
|
332
|
+
Use this class to interact with pre-existing projects like examples,
|
|
333
|
+
running CLI commands and accessing their configuration.
|
|
334
|
+
|
|
335
|
+
Example:
|
|
336
|
+
```py
|
|
337
|
+
project = ExternalMetaxyProject(Path("examples/example-migration"))
|
|
338
|
+
result = project.run_cli("graph", "push", env={"STAGE": "1"})
|
|
339
|
+
assert result.returncode == 0
|
|
340
|
+
print(project.package_name) # "example_migration"
|
|
341
|
+
```
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
def __init__(self, project_dir: Path, require_config: bool = True):
|
|
345
|
+
"""Initialize an external Metaxy project.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
project_dir: Path to existing project directory (may contain metaxy.toml)
|
|
349
|
+
require_config: If True, requires metaxy.toml to exist (default: True)
|
|
350
|
+
"""
|
|
351
|
+
super().__init__(project_dir)
|
|
352
|
+
if require_config and not (self.project_dir / "metaxy.toml").exists():
|
|
353
|
+
raise ValueError(
|
|
354
|
+
f"No metaxy.toml found in {self.project_dir}. "
|
|
355
|
+
"ExternalMetaxyProject requires an existing project configuration."
|
|
356
|
+
)
|
|
357
|
+
self._venv_path: Path | None = None
|
|
358
|
+
self._venv_python: Path | None = None
|
|
359
|
+
|
|
360
|
+
def setup_venv(self, venv_path: Path, install_metaxy_from: Path | None = None):
|
|
361
|
+
"""Create a virtual environment and install the project.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
venv_path: Path where the venv should be created
|
|
365
|
+
install_metaxy_from: Optional path to metaxy source to install (defaults to current)
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Path to the Python interpreter in the venv
|
|
369
|
+
|
|
370
|
+
Example:
|
|
371
|
+
```py
|
|
372
|
+
project = ExternalMetaxyProject(Path("tests/fixtures/test-project"))
|
|
373
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
374
|
+
project.setup_venv(Path(tmpdir) / "venv")
|
|
375
|
+
result = project.run_in_venv("python", "-c", "import test_metaxy_project")
|
|
376
|
+
```
|
|
377
|
+
"""
|
|
378
|
+
import os
|
|
379
|
+
import subprocess
|
|
380
|
+
|
|
381
|
+
# Create venv using uv
|
|
382
|
+
subprocess.run(
|
|
383
|
+
["uv", "venv", str(venv_path), "--python", str(sys.executable)], check=True
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Install metaxy using the venv's pip directly
|
|
387
|
+
if install_metaxy_from is None:
|
|
388
|
+
# Default to metaxy package location (get the repo root)
|
|
389
|
+
# metaxy.__file__ -> .../src/metaxy/__init__.py
|
|
390
|
+
# .parent -> .../src/metaxy
|
|
391
|
+
# .parent -> .../src
|
|
392
|
+
# .parent -> repo root
|
|
393
|
+
import metaxy
|
|
394
|
+
|
|
395
|
+
install_metaxy_from = Path(metaxy.__file__).parent.parent.parent
|
|
396
|
+
|
|
397
|
+
# Set VIRTUAL_ENV to activate the venv
|
|
398
|
+
venv_env = os.environ.copy()
|
|
399
|
+
venv_env["VIRTUAL_ENV"] = str(venv_path)
|
|
400
|
+
# Remove PYTHONHOME if set (can interfere with venv)
|
|
401
|
+
venv_env.pop("PYTHONHOME", None)
|
|
402
|
+
|
|
403
|
+
# Use uv pip to install packages into the venv
|
|
404
|
+
result = subprocess.run(
|
|
405
|
+
[
|
|
406
|
+
"uv",
|
|
407
|
+
"pip",
|
|
408
|
+
"install",
|
|
409
|
+
"-e",
|
|
410
|
+
str(install_metaxy_from),
|
|
411
|
+
],
|
|
412
|
+
env=venv_env,
|
|
413
|
+
capture_output=True,
|
|
414
|
+
text=True,
|
|
415
|
+
check=False,
|
|
416
|
+
)
|
|
417
|
+
if result.returncode != 0:
|
|
418
|
+
raise RuntimeError(
|
|
419
|
+
f"Failed to install metaxy from {install_metaxy_from}\n"
|
|
420
|
+
f"STDOUT: {result.stdout}\n"
|
|
421
|
+
f"STDERR: {result.stderr}"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# Install the project itself using uv pip
|
|
425
|
+
result = subprocess.run(
|
|
426
|
+
[
|
|
427
|
+
"uv",
|
|
428
|
+
"pip",
|
|
429
|
+
"install",
|
|
430
|
+
"-e",
|
|
431
|
+
str(self.project_dir),
|
|
432
|
+
],
|
|
433
|
+
env=venv_env,
|
|
434
|
+
capture_output=True,
|
|
435
|
+
text=True,
|
|
436
|
+
check=False,
|
|
437
|
+
)
|
|
438
|
+
if result.returncode != 0:
|
|
439
|
+
raise RuntimeError(
|
|
440
|
+
f"Failed to install project from {self.project_dir}\n"
|
|
441
|
+
f"STDOUT: {result.stdout}\n"
|
|
442
|
+
f"STDERR: {result.stderr}"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
self._venv_path = venv_path
|
|
446
|
+
|
|
447
|
+
def run_in_venv(
|
|
448
|
+
self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
|
|
449
|
+
):
|
|
450
|
+
"""Run a command in the configured venv.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
*args: Command and arguments (e.g., "python", "-c", "print('hello')")
|
|
454
|
+
check: If True (default), raises CalledProcessError on non-zero exit
|
|
455
|
+
env: Optional dict of additional environment variables
|
|
456
|
+
**kwargs: Additional arguments to pass to subprocess.run()
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
subprocess.CompletedProcess: Result of the command
|
|
460
|
+
|
|
461
|
+
Raises:
|
|
462
|
+
RuntimeError: If setup_venv() hasn't been called yet
|
|
463
|
+
subprocess.CalledProcessError: If check=True and command fails
|
|
464
|
+
|
|
465
|
+
Example:
|
|
466
|
+
```py
|
|
467
|
+
project.setup_venv(Path("/tmp/venv"))
|
|
468
|
+
result = project.run_in_venv("python", "-m", "my_module")
|
|
469
|
+
```
|
|
470
|
+
"""
|
|
471
|
+
import subprocess
|
|
472
|
+
|
|
473
|
+
if self._venv_path is None:
|
|
474
|
+
raise RuntimeError("No venv configured. Call setup_venv() first.")
|
|
475
|
+
|
|
476
|
+
# Start with current environment
|
|
477
|
+
import os
|
|
478
|
+
|
|
479
|
+
cmd_env = os.environ.copy()
|
|
480
|
+
|
|
481
|
+
# Set VIRTUAL_ENV to activate the venv
|
|
482
|
+
cmd_env["VIRTUAL_ENV"] = str(self._venv_path)
|
|
483
|
+
# Remove PYTHONHOME if set (can interfere with venv)
|
|
484
|
+
cmd_env.pop("PYTHONHOME", None)
|
|
485
|
+
|
|
486
|
+
# Apply additional env overrides
|
|
487
|
+
if env:
|
|
488
|
+
cmd_env.update(env)
|
|
489
|
+
|
|
490
|
+
# Run command with venv python
|
|
491
|
+
result = subprocess.run(
|
|
492
|
+
["uv", "run", "--active", *args],
|
|
493
|
+
cwd=str(self.project_dir),
|
|
494
|
+
capture_output=True,
|
|
495
|
+
text=True,
|
|
496
|
+
env=cmd_env,
|
|
497
|
+
check=check,
|
|
498
|
+
**kwargs,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
return result
|
|
502
|
+
|
|
503
|
+
@cached_property
|
|
504
|
+
def package_name(self) -> str:
|
|
505
|
+
"""Get the Python package name from pyproject.toml.
|
|
506
|
+
|
|
507
|
+
Converts the project name (e.g., "example-migration") to a valid
|
|
508
|
+
Python module name (e.g., "example_migration") by replacing hyphens
|
|
509
|
+
with underscores.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
The Python package/module name
|
|
513
|
+
|
|
514
|
+
Raises:
|
|
515
|
+
FileNotFoundError: If pyproject.toml doesn't exist
|
|
516
|
+
ValueError: If pyproject.toml doesn't contain project.name
|
|
517
|
+
"""
|
|
518
|
+
pyproject_path = self.project_dir / "pyproject.toml"
|
|
519
|
+
if not pyproject_path.exists():
|
|
520
|
+
raise FileNotFoundError(
|
|
521
|
+
f"No pyproject.toml found in {self.project_dir}. "
|
|
522
|
+
"Cannot determine package name."
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# Parse TOML to get project name
|
|
526
|
+
import tomli
|
|
527
|
+
|
|
528
|
+
with open(pyproject_path, "rb") as f:
|
|
529
|
+
pyproject = tomli.load(f)
|
|
530
|
+
|
|
531
|
+
project_name = pyproject.get("project", {}).get("name")
|
|
532
|
+
if not project_name:
|
|
533
|
+
raise ValueError(
|
|
534
|
+
f"No project.name found in {pyproject_path}. "
|
|
535
|
+
"Cannot determine package name."
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Convert project name to valid Python package name (replace hyphens with underscores)
|
|
539
|
+
return project_name.replace("-", "_")
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
class TempMetaxyProject(MetaxyProject):
|
|
543
|
+
"""Helper for creating temporary Metaxy projects.
|
|
544
|
+
|
|
545
|
+
Provides a context manager API for dynamically creating feature modules
|
|
546
|
+
and running CLI commands with proper entrypoint configuration.
|
|
547
|
+
|
|
548
|
+
Example:
|
|
549
|
+
```py
|
|
550
|
+
project = TempMetaxyProject(tmp_path)
|
|
551
|
+
|
|
552
|
+
def features():
|
|
553
|
+
from metaxy import BaseFeature as Feature, FeatureSpec, FeatureKey, FieldSpec, FieldKey
|
|
554
|
+
|
|
555
|
+
class MyFeature(Feature, spec=FeatureSpec(
|
|
556
|
+
key=FeatureKey(["my_feature"]),
|
|
557
|
+
fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")]
|
|
558
|
+
)):
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
with project.with_features(features):
|
|
562
|
+
result = project.run_cli("graph", "push")
|
|
563
|
+
assert result.returncode == 0
|
|
564
|
+
```
|
|
565
|
+
"""
|
|
566
|
+
|
|
567
|
+
def __init__(self, tmp_path: Path, config_content: str | None = None):
|
|
568
|
+
"""Initialize a temporary Metaxy project.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
tmp_path: Temporary directory path (usually from pytest tmp_path fixture)
|
|
572
|
+
config_content: Optional custom configuration content for metaxy.toml.
|
|
573
|
+
If not provided, uses default DuckDB configuration.
|
|
574
|
+
"""
|
|
575
|
+
super().__init__(tmp_path)
|
|
576
|
+
self.project_dir.mkdir(exist_ok=True)
|
|
577
|
+
self._feature_modules: list[str] = []
|
|
578
|
+
self._module_counter = 0
|
|
579
|
+
self._custom_config = config_content
|
|
580
|
+
self._write_config()
|
|
581
|
+
|
|
582
|
+
def _write_config(self):
|
|
583
|
+
"""Write metaxy.toml configuration file."""
|
|
584
|
+
if self._custom_config is not None:
|
|
585
|
+
# Use custom config content
|
|
586
|
+
config_content = self._custom_config
|
|
587
|
+
else:
|
|
588
|
+
# Default DuckDB store configuration
|
|
589
|
+
dev_db_path = self.project_dir / "metadata.duckdb"
|
|
590
|
+
staging_db_path = self.project_dir / "metadata_staging.duckdb"
|
|
591
|
+
config_content = f'''project = "test"
|
|
592
|
+
store = "dev"
|
|
593
|
+
|
|
594
|
+
[stores.dev]
|
|
595
|
+
type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
|
|
596
|
+
|
|
597
|
+
[stores.dev.config]
|
|
598
|
+
database = "{dev_db_path}"
|
|
599
|
+
|
|
600
|
+
[stores.staging]
|
|
601
|
+
type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
|
|
602
|
+
|
|
603
|
+
[stores.staging.config]
|
|
604
|
+
database = "{staging_db_path}"
|
|
605
|
+
'''
|
|
606
|
+
(self.project_dir / "metaxy.toml").write_text(config_content)
|
|
607
|
+
|
|
608
|
+
def with_features(self, features_func, module_name: str | None = None):
|
|
609
|
+
"""Context manager that sets up features for the duration of the block.
|
|
610
|
+
|
|
611
|
+
Extracts source code from features_func (skipping the function definition line),
|
|
612
|
+
writes it to a Python module file, and tracks it for METAXY_ENTRYPOINTS__N
|
|
613
|
+
environment variable configuration.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
features_func: Function containing feature class definitions.
|
|
617
|
+
All imports must be inside the function body.
|
|
618
|
+
module_name: Optional module name. If not provided, generates
|
|
619
|
+
"features_N" based on number of existing modules.
|
|
620
|
+
|
|
621
|
+
Yields:
|
|
622
|
+
str: The module name that was created
|
|
623
|
+
|
|
624
|
+
Example:
|
|
625
|
+
```py
|
|
626
|
+
def my_features():
|
|
627
|
+
from metaxy import BaseFeature as Feature, FeatureSpec, FeatureKey
|
|
628
|
+
|
|
629
|
+
class MyFeature(Feature, spec=...):
|
|
630
|
+
pass
|
|
631
|
+
|
|
632
|
+
with project.with_features(my_features) as module:
|
|
633
|
+
print(module) # "features_0"
|
|
634
|
+
result = project.run_cli("graph", "push")
|
|
635
|
+
```
|
|
636
|
+
"""
|
|
637
|
+
|
|
638
|
+
@contextmanager
|
|
639
|
+
def _context():
|
|
640
|
+
# Generate module name if not provided
|
|
641
|
+
nonlocal module_name
|
|
642
|
+
if module_name is None:
|
|
643
|
+
module_name = f"features_{self._module_counter}"
|
|
644
|
+
self._module_counter += 1
|
|
645
|
+
|
|
646
|
+
# Extract source code from function
|
|
647
|
+
source = inspect.getsource(features_func)
|
|
648
|
+
|
|
649
|
+
# Remove function definition line and dedent
|
|
650
|
+
lines = source.split("\n")
|
|
651
|
+
# Find the first line that's not a decorator or function def
|
|
652
|
+
body_start = 0
|
|
653
|
+
for i, line in enumerate(lines):
|
|
654
|
+
if line.strip().startswith("def ") and ":" in line:
|
|
655
|
+
body_start = i + 1
|
|
656
|
+
break
|
|
657
|
+
|
|
658
|
+
body_lines = lines[body_start:]
|
|
659
|
+
dedented = textwrap.dedent("\n".join(body_lines))
|
|
660
|
+
|
|
661
|
+
# Write to file in project directory
|
|
662
|
+
feature_file = self.project_dir / f"{module_name}.py"
|
|
663
|
+
feature_file.write_text(dedented)
|
|
664
|
+
|
|
665
|
+
# Track this module
|
|
666
|
+
self._feature_modules.append(module_name)
|
|
667
|
+
|
|
668
|
+
try:
|
|
669
|
+
yield module_name
|
|
670
|
+
finally:
|
|
671
|
+
# Cleanup: remove from tracking (file stays for debugging)
|
|
672
|
+
if module_name in self._feature_modules:
|
|
673
|
+
self._feature_modules.remove(module_name)
|
|
674
|
+
|
|
675
|
+
return _context()
|
|
676
|
+
|
|
677
|
+
def run_cli(
|
|
678
|
+
self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
|
|
679
|
+
):
|
|
680
|
+
"""Run CLI command with current feature modules loaded.
|
|
681
|
+
|
|
682
|
+
Automatically sets METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc.
|
|
683
|
+
based on active with_features() context managers.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
*args: CLI command arguments (e.g., "graph", "push")
|
|
687
|
+
check: If True (default), raises CalledProcessError on non-zero exit
|
|
688
|
+
env: Optional dict of additional environment variables
|
|
689
|
+
**kwargs: Additional arguments to pass to subprocess.run()
|
|
690
|
+
|
|
691
|
+
Returns:
|
|
692
|
+
subprocess.CompletedProcess: Result of the CLI command
|
|
693
|
+
|
|
694
|
+
Raises:
|
|
695
|
+
subprocess.CalledProcessError: If check=True and command fails
|
|
696
|
+
|
|
697
|
+
Example:
|
|
698
|
+
```py
|
|
699
|
+
result = project.run_cli("graph", "history", "--limit", "5")
|
|
700
|
+
print(result.stdout)
|
|
701
|
+
```
|
|
702
|
+
"""
|
|
703
|
+
# Start with current environment
|
|
704
|
+
cmd_env = os.environ.copy()
|
|
705
|
+
|
|
706
|
+
# Add project directory to PYTHONPATH so modules can be imported
|
|
707
|
+
pythonpath = str(self.project_dir)
|
|
708
|
+
if "PYTHONPATH" in cmd_env:
|
|
709
|
+
pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
|
|
710
|
+
cmd_env["PYTHONPATH"] = pythonpath
|
|
711
|
+
|
|
712
|
+
# Set entrypoints for all tracked modules
|
|
713
|
+
# Use METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc. (single underscore for list indexing)
|
|
714
|
+
for idx, module_name in enumerate(self._feature_modules):
|
|
715
|
+
cmd_env[f"METAXY_ENTRYPOINT_{idx}"] = module_name
|
|
716
|
+
|
|
717
|
+
# Apply additional env overrides
|
|
718
|
+
if env:
|
|
719
|
+
cmd_env.update(env)
|
|
720
|
+
|
|
721
|
+
# Run CLI command
|
|
722
|
+
try:
|
|
723
|
+
result = subprocess.run(
|
|
724
|
+
[sys.executable, "-m", "metaxy.cli.app", *args],
|
|
725
|
+
cwd=str(self.project_dir),
|
|
726
|
+
capture_output=True,
|
|
727
|
+
text=True,
|
|
728
|
+
env=cmd_env,
|
|
729
|
+
check=check,
|
|
730
|
+
**kwargs,
|
|
731
|
+
)
|
|
732
|
+
except subprocess.CalledProcessError as e:
|
|
733
|
+
# Re-raise with stderr output for better debugging
|
|
734
|
+
error_msg = f"CLI command failed: {' '.join(args)}\n"
|
|
735
|
+
error_msg += f"Exit code: {e.returncode}\n"
|
|
736
|
+
if e.stdout:
|
|
737
|
+
error_msg += f"STDOUT:\n{e.stdout}\n"
|
|
738
|
+
if e.stderr:
|
|
739
|
+
error_msg += f"STDERR:\n{e.stderr}\n"
|
|
740
|
+
raise RuntimeError(error_msg) from e
|
|
741
|
+
|
|
742
|
+
return result
|
|
743
|
+
|
|
744
|
+
@property
|
|
745
|
+
def entrypoints(self):
|
|
746
|
+
return [f"METAXY_ENTRYPOINT_{idx}" for idx in range(len(self._feature_modules))]
|
|
747
|
+
|
|
748
|
+
@property
|
|
749
|
+
def graph(self) -> FeatureGraph:
|
|
750
|
+
"""Load features from the project's feature modules into a graph.
|
|
751
|
+
|
|
752
|
+
Returns:
|
|
753
|
+
FeatureGraph with all features from tracked modules loaded
|
|
754
|
+
"""
|
|
755
|
+
import importlib
|
|
756
|
+
import sys
|
|
757
|
+
|
|
758
|
+
graph = FeatureGraph()
|
|
759
|
+
|
|
760
|
+
# Ensure project dir is in sys.path
|
|
761
|
+
project_dir_str = str(self.project_dir)
|
|
762
|
+
was_in_path = project_dir_str in sys.path
|
|
763
|
+
if not was_in_path:
|
|
764
|
+
sys.path.insert(0, project_dir_str)
|
|
765
|
+
|
|
766
|
+
try:
|
|
767
|
+
with graph.use():
|
|
768
|
+
# Import feature modules directly
|
|
769
|
+
for module_name in self._feature_modules:
|
|
770
|
+
# Import or reload the module
|
|
771
|
+
if module_name in sys.modules:
|
|
772
|
+
importlib.reload(sys.modules[module_name])
|
|
773
|
+
else:
|
|
774
|
+
importlib.import_module(module_name)
|
|
775
|
+
finally:
|
|
776
|
+
# Clean up sys.path if we added it
|
|
777
|
+
if not was_in_path and project_dir_str in sys.path:
|
|
778
|
+
sys.path.remove(project_dir_str)
|
|
779
|
+
|
|
780
|
+
return graph
|