metaxy 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of metaxy might be problematic. Click here for more details.
- metaxy/__init__.py +61 -0
- metaxy/_testing.py +542 -0
- metaxy/_utils.py +16 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +76 -0
- metaxy/cli/context.py +71 -0
- metaxy/cli/graph.py +576 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +42 -0
- metaxy/cli/metadata.py +271 -0
- metaxy/cli/migrations.py +862 -0
- metaxy/cli/push.py +55 -0
- metaxy/config.py +450 -0
- metaxy/data_versioning/__init__.py +24 -0
- metaxy/data_versioning/calculators/__init__.py +13 -0
- metaxy/data_versioning/calculators/base.py +97 -0
- metaxy/data_versioning/calculators/duckdb.py +186 -0
- metaxy/data_versioning/calculators/ibis.py +225 -0
- metaxy/data_versioning/calculators/polars.py +135 -0
- metaxy/data_versioning/diff/__init__.py +15 -0
- metaxy/data_versioning/diff/base.py +150 -0
- metaxy/data_versioning/diff/narwhals.py +108 -0
- metaxy/data_versioning/hash_algorithms.py +19 -0
- metaxy/data_versioning/joiners/__init__.py +9 -0
- metaxy/data_versioning/joiners/base.py +70 -0
- metaxy/data_versioning/joiners/narwhals.py +235 -0
- metaxy/entrypoints.py +309 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/alembic.py +326 -0
- metaxy/ext/sqlmodel.py +172 -0
- metaxy/ext/sqlmodel_system_tables.py +139 -0
- metaxy/graph/__init__.py +21 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +399 -0
- metaxy/graph/diff/differ.py +740 -0
- metaxy/graph/diff/models.py +418 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +274 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +320 -0
- metaxy/graph/diff/rendering/rich.py +165 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +31 -0
- metaxy/metadata_store/_protocols.py +38 -0
- metaxy/metadata_store/base.py +1676 -0
- metaxy/metadata_store/clickhouse.py +161 -0
- metaxy/metadata_store/duckdb.py +167 -0
- metaxy/metadata_store/exceptions.py +43 -0
- metaxy/metadata_store/ibis.py +451 -0
- metaxy/metadata_store/memory.py +228 -0
- metaxy/metadata_store/sqlite.py +187 -0
- metaxy/metadata_store/system_tables.py +257 -0
- metaxy/migrations/__init__.py +34 -0
- metaxy/migrations/detector.py +153 -0
- metaxy/migrations/executor.py +208 -0
- metaxy/migrations/loader.py +260 -0
- metaxy/migrations/models.py +718 -0
- metaxy/migrations/ops.py +390 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +6 -0
- metaxy/models/constants.py +24 -0
- metaxy/models/feature.py +665 -0
- metaxy/models/feature_spec.py +105 -0
- metaxy/models/field.py +25 -0
- metaxy/models/plan.py +155 -0
- metaxy/models/types.py +157 -0
- metaxy/py.typed +0 -0
- metaxy-0.0.0.dist-info/METADATA +247 -0
- metaxy-0.0.0.dist-info/RECORD +75 -0
- metaxy-0.0.0.dist-info/WHEEL +4 -0
- metaxy-0.0.0.dist-info/entry_points.txt +3 -0
metaxy/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from metaxy.config import MetaxyConfig, StoreConfig
|
|
2
|
+
from metaxy.entrypoints import (
|
|
3
|
+
load_features,
|
|
4
|
+
load_module_entrypoint,
|
|
5
|
+
load_package_entrypoints,
|
|
6
|
+
)
|
|
7
|
+
from metaxy.metadata_store import (
|
|
8
|
+
InMemoryMetadataStore,
|
|
9
|
+
MetadataStore,
|
|
10
|
+
)
|
|
11
|
+
from metaxy.migrations import (
|
|
12
|
+
BaseOperation,
|
|
13
|
+
CustomMigration,
|
|
14
|
+
DataVersionReconciliation,
|
|
15
|
+
DiffMigration,
|
|
16
|
+
FullGraphMigration,
|
|
17
|
+
MetadataBackfill,
|
|
18
|
+
Migration,
|
|
19
|
+
MigrationExecutor,
|
|
20
|
+
MigrationResult,
|
|
21
|
+
SystemTableStorage,
|
|
22
|
+
detect_migration,
|
|
23
|
+
)
|
|
24
|
+
from metaxy.models.feature import Feature, FeatureGraph, get_feature_by_key, graph
|
|
25
|
+
from metaxy.models.feature_spec import FeatureDep, FeatureSpec
|
|
26
|
+
from metaxy.models.field import FieldDep, FieldSpec, SpecialFieldDep
|
|
27
|
+
from metaxy.models.types import FeatureDepMetadata, FeatureKey, FieldKey
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"Feature",
|
|
31
|
+
"FeatureGraph",
|
|
32
|
+
"graph",
|
|
33
|
+
"get_feature_by_key",
|
|
34
|
+
"FeatureDep",
|
|
35
|
+
"FeatureDepMetadata",
|
|
36
|
+
"FeatureSpec",
|
|
37
|
+
"FieldDep",
|
|
38
|
+
"FieldSpec",
|
|
39
|
+
"SpecialFieldDep",
|
|
40
|
+
"FeatureKey",
|
|
41
|
+
"FieldKey",
|
|
42
|
+
"MetadataStore",
|
|
43
|
+
"InMemoryMetadataStore",
|
|
44
|
+
"load_features",
|
|
45
|
+
"load_config_entrypoints",
|
|
46
|
+
"load_module_entrypoint",
|
|
47
|
+
"load_package_entrypoints",
|
|
48
|
+
"Migration",
|
|
49
|
+
"DiffMigration",
|
|
50
|
+
"FullGraphMigration",
|
|
51
|
+
"CustomMigration",
|
|
52
|
+
"MigrationResult",
|
|
53
|
+
"MigrationExecutor",
|
|
54
|
+
"SystemTableStorage",
|
|
55
|
+
"BaseOperation",
|
|
56
|
+
"DataVersionReconciliation",
|
|
57
|
+
"MetadataBackfill",
|
|
58
|
+
"detect_migration",
|
|
59
|
+
"MetaxyConfig",
|
|
60
|
+
"StoreConfig",
|
|
61
|
+
]
|
metaxy/_testing.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
from functools import cached_property
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from metaxy import (
|
|
9
|
+
FeatureSpec,
|
|
10
|
+
)
|
|
11
|
+
from metaxy.config import MetaxyConfig
|
|
12
|
+
from metaxy.data_versioning.hash_algorithms import HashAlgorithm
|
|
13
|
+
from metaxy.metadata_store.base import MetadataStore
|
|
14
|
+
from metaxy.models.feature import FeatureGraph
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TempFeatureModule:
|
|
18
|
+
"""Helper to create temporary Python modules with feature definitions.
|
|
19
|
+
|
|
20
|
+
This allows features to be importable by historical graph reconstruction.
|
|
21
|
+
The same import path (e.g., 'temp_features.Upstream') can be used across
|
|
22
|
+
different feature versions by overwriting the module file.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, module_name: str = "temp_test_features"):
|
|
26
|
+
self.temp_dir = tempfile.mkdtemp(prefix="metaxy_test_")
|
|
27
|
+
self.module_name = module_name
|
|
28
|
+
self.module_path = Path(self.temp_dir) / f"{module_name}.py"
|
|
29
|
+
|
|
30
|
+
# Add to sys.path so module can be imported
|
|
31
|
+
sys.path.insert(0, self.temp_dir)
|
|
32
|
+
|
|
33
|
+
def write_features(self, feature_specs: dict[str, FeatureSpec]):
|
|
34
|
+
"""Write feature classes to the module file.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
feature_specs: Dict mapping class names to FeatureSpec objects
|
|
38
|
+
"""
|
|
39
|
+
code_lines = [
|
|
40
|
+
"# Auto-generated test feature module",
|
|
41
|
+
"from metaxy import Feature, FeatureSpec, FieldSpec, FieldKey, FeatureDep, FeatureKey, FieldDep, SpecialFieldDep",
|
|
42
|
+
"from metaxy.models.feature import FeatureGraph",
|
|
43
|
+
"",
|
|
44
|
+
"# Use a dedicated graph for this temp module",
|
|
45
|
+
"_graph = FeatureGraph()",
|
|
46
|
+
"",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
for class_name, spec in feature_specs.items():
|
|
50
|
+
# Generate the spec definition
|
|
51
|
+
spec_dict = spec.model_dump(mode="python")
|
|
52
|
+
spec_repr = self._generate_spec_repr(spec_dict)
|
|
53
|
+
|
|
54
|
+
code_lines.extend(
|
|
55
|
+
[
|
|
56
|
+
f"# Define {class_name} in the temp graph context",
|
|
57
|
+
"with _graph.use():",
|
|
58
|
+
f" class {class_name}(",
|
|
59
|
+
" Feature,",
|
|
60
|
+
f" spec={spec_repr}",
|
|
61
|
+
" ):",
|
|
62
|
+
" pass",
|
|
63
|
+
"",
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Write the file
|
|
68
|
+
self.module_path.write_text("\n".join(code_lines))
|
|
69
|
+
|
|
70
|
+
# Reload module if it was already imported
|
|
71
|
+
if self.module_name in sys.modules:
|
|
72
|
+
importlib.reload(sys.modules[self.module_name])
|
|
73
|
+
|
|
74
|
+
def _generate_spec_repr(self, spec_dict: dict[str, Any]) -> str:
|
|
75
|
+
"""Generate FeatureSpec constructor call from dict."""
|
|
76
|
+
# This is a simple representation - could be made more robust
|
|
77
|
+
parts = []
|
|
78
|
+
|
|
79
|
+
# key
|
|
80
|
+
key = spec_dict["key"]
|
|
81
|
+
parts.append(f"key=FeatureKey({key!r})")
|
|
82
|
+
|
|
83
|
+
# deps
|
|
84
|
+
deps = spec_dict.get("deps")
|
|
85
|
+
if deps is None:
|
|
86
|
+
parts.append("deps=None")
|
|
87
|
+
else:
|
|
88
|
+
deps_repr = [f"FeatureDep(key=FeatureKey({d['key']!r}))" for d in deps]
|
|
89
|
+
parts.append(f"deps=[{', '.join(deps_repr)}]")
|
|
90
|
+
|
|
91
|
+
# fields
|
|
92
|
+
fields = spec_dict.get("fields", [])
|
|
93
|
+
if fields:
|
|
94
|
+
field_reprs = []
|
|
95
|
+
for c in fields:
|
|
96
|
+
c_parts = [
|
|
97
|
+
f"key=FieldKey({c['key']!r})",
|
|
98
|
+
f"code_version={c['code_version']}",
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
# Handle deps
|
|
102
|
+
deps_val = c.get("deps")
|
|
103
|
+
if deps_val == "__METAXY_ALL_DEP__":
|
|
104
|
+
c_parts.append("deps=SpecialFieldDep.ALL")
|
|
105
|
+
elif isinstance(deps_val, list) and deps_val:
|
|
106
|
+
# Field deps (list of FieldDep)
|
|
107
|
+
cdeps: list[str] = [] # type: ignore[misc]
|
|
108
|
+
for cd in deps_val:
|
|
109
|
+
fields_val = cd.get("fields")
|
|
110
|
+
if fields_val == "__METAXY_ALL_DEP__":
|
|
111
|
+
cdeps.append( # type: ignore[arg-type]
|
|
112
|
+
f"FieldDep(feature_key=FeatureKey({cd['feature_key']!r}), fields=SpecialFieldDep.ALL)"
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
# Build list of FieldKey objects
|
|
116
|
+
field_keys = [f"FieldKey({k!r})" for k in fields_val]
|
|
117
|
+
cdeps.append(
|
|
118
|
+
f"FieldDep(feature_key=FeatureKey({cd['feature_key']!r}), fields=[{', '.join(field_keys)}])"
|
|
119
|
+
)
|
|
120
|
+
c_parts.append(f"deps=[{', '.join(cdeps)}]")
|
|
121
|
+
|
|
122
|
+
field_reprs.append(f"FieldSpec({', '.join(c_parts)})") # type: ignore[arg-type]
|
|
123
|
+
|
|
124
|
+
parts.append(f"fields=[{', '.join(field_reprs)}]")
|
|
125
|
+
|
|
126
|
+
return f"FeatureSpec({', '.join(parts)})"
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def graph(self) -> FeatureGraph:
|
|
130
|
+
"""Get the FeatureGraph from the temp module.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
The _graph instance from the imported module
|
|
134
|
+
"""
|
|
135
|
+
# Import the module to get its _graph
|
|
136
|
+
module = importlib.import_module(self.module_name)
|
|
137
|
+
return module._graph
|
|
138
|
+
|
|
139
|
+
def cleanup(self):
|
|
140
|
+
"""Remove temp directory and module from sys.path.
|
|
141
|
+
|
|
142
|
+
NOTE: Don't call this until the test session is completely done,
|
|
143
|
+
as historical graph loading may need to import from these modules.
|
|
144
|
+
"""
|
|
145
|
+
if self.temp_dir in sys.path:
|
|
146
|
+
sys.path.remove(self.temp_dir)
|
|
147
|
+
|
|
148
|
+
# Remove from sys.modules
|
|
149
|
+
if self.module_name in sys.modules:
|
|
150
|
+
del sys.modules[self.module_name]
|
|
151
|
+
|
|
152
|
+
# Delete temp directory
|
|
153
|
+
import shutil
|
|
154
|
+
|
|
155
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def assert_all_results_equal(results: dict[str, Any], snapshot=None) -> None:
|
|
159
|
+
"""Compare all results from different store type combinations.
|
|
160
|
+
|
|
161
|
+
Ensures all variants produce identical results, then optionally snapshots all results.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
results: Dict mapping store_type to result data
|
|
165
|
+
snapshot: Optional syrupy snapshot fixture to record all results
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
AssertionError: If any variants produce different results
|
|
169
|
+
"""
|
|
170
|
+
if not results:
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
# Get all result values as a list
|
|
174
|
+
all_results = list(results.items())
|
|
175
|
+
reference_key, reference_result = all_results[0]
|
|
176
|
+
|
|
177
|
+
# Compare each result to the reference
|
|
178
|
+
for key, result in all_results[1:]:
|
|
179
|
+
assert result == reference_result, (
|
|
180
|
+
f"{key} produced different results than {reference_key}:\n"
|
|
181
|
+
f"Expected: {reference_result}\n"
|
|
182
|
+
f"Got: {result}"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Snapshot ALL results if snapshot provided
|
|
186
|
+
if snapshot is not None:
|
|
187
|
+
assert results == snapshot
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class HashAlgorithmCases:
|
|
191
|
+
"""Test cases for different hash algorithms."""
|
|
192
|
+
|
|
193
|
+
def case_xxhash64(self) -> HashAlgorithm:
|
|
194
|
+
"""xxHash64 algorithm."""
|
|
195
|
+
return HashAlgorithm.XXHASH64
|
|
196
|
+
|
|
197
|
+
def case_xxhash32(self) -> HashAlgorithm:
|
|
198
|
+
"""xxHash32 algorithm."""
|
|
199
|
+
return HashAlgorithm.XXHASH32
|
|
200
|
+
|
|
201
|
+
def case_wyhash(self) -> HashAlgorithm:
|
|
202
|
+
"""WyHash algorithm."""
|
|
203
|
+
return HashAlgorithm.WYHASH
|
|
204
|
+
|
|
205
|
+
def case_sha256(self) -> HashAlgorithm:
|
|
206
|
+
"""SHA256 algorithm."""
|
|
207
|
+
return HashAlgorithm.SHA256
|
|
208
|
+
|
|
209
|
+
def case_md5(self) -> HashAlgorithm:
|
|
210
|
+
"""MD5 algorithm."""
|
|
211
|
+
return HashAlgorithm.MD5
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class MetaxyProject:
|
|
215
|
+
"""Base class for Metaxy projects.
|
|
216
|
+
|
|
217
|
+
Provides common functionality for running CLI commands with proper
|
|
218
|
+
environment setup and accessing project configuration.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def __init__(self, project_dir: Path):
|
|
222
|
+
"""Initialize a Metaxy project.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
project_dir: Path to project directory containing metaxy.toml
|
|
226
|
+
"""
|
|
227
|
+
self.project_dir = Path(project_dir)
|
|
228
|
+
|
|
229
|
+
def run_cli(
|
|
230
|
+
self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
|
|
231
|
+
):
|
|
232
|
+
"""Run CLI command with proper environment setup.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
*args: CLI command arguments (e.g., "graph", "push")
|
|
236
|
+
check: If True (default), raises CalledProcessError on non-zero exit
|
|
237
|
+
env: Optional dict of additional environment variables
|
|
238
|
+
**kwargs: Additional arguments to pass to subprocess.run()
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
subprocess.CompletedProcess: Result of the CLI command
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
subprocess.CalledProcessError: If check=True and command fails
|
|
245
|
+
|
|
246
|
+
Example:
|
|
247
|
+
>>> result = project.run_cli("graph", "history", "--limit", "5")
|
|
248
|
+
>>> print(result.stdout)
|
|
249
|
+
"""
|
|
250
|
+
import os
|
|
251
|
+
import subprocess
|
|
252
|
+
|
|
253
|
+
# Start with current environment
|
|
254
|
+
cmd_env = os.environ.copy()
|
|
255
|
+
|
|
256
|
+
# Add project directory to PYTHONPATH so modules can be imported
|
|
257
|
+
pythonpath = str(self.project_dir)
|
|
258
|
+
if "PYTHONPATH" in cmd_env:
|
|
259
|
+
pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
|
|
260
|
+
cmd_env["PYTHONPATH"] = pythonpath
|
|
261
|
+
|
|
262
|
+
# Apply additional env overrides
|
|
263
|
+
if env:
|
|
264
|
+
cmd_env.update(env)
|
|
265
|
+
|
|
266
|
+
# Run CLI command
|
|
267
|
+
result = subprocess.run(
|
|
268
|
+
[sys.executable, "-m", "metaxy.cli.app", *args],
|
|
269
|
+
cwd=str(self.project_dir),
|
|
270
|
+
capture_output=True,
|
|
271
|
+
text=True,
|
|
272
|
+
env=cmd_env,
|
|
273
|
+
check=check,
|
|
274
|
+
**kwargs,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return result
|
|
278
|
+
|
|
279
|
+
@cached_property
|
|
280
|
+
def config(self) -> MetaxyConfig:
|
|
281
|
+
"""Load configuration from project's metaxy.toml."""
|
|
282
|
+
return MetaxyConfig.load(self.project_dir / "metaxy.toml")
|
|
283
|
+
|
|
284
|
+
@cached_property
|
|
285
|
+
def stores(self) -> dict[str, MetadataStore]:
|
|
286
|
+
"""Get all configured stores from project config."""
|
|
287
|
+
return {k: self.config.get_store(k) for k in self.config.stores}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class ExternalMetaxyProject(MetaxyProject):
|
|
291
|
+
"""Helper for working with existing Metaxy projects.
|
|
292
|
+
|
|
293
|
+
Use this class to interact with pre-existing projects like examples,
|
|
294
|
+
running CLI commands and accessing their configuration.
|
|
295
|
+
|
|
296
|
+
Example:
|
|
297
|
+
>>> project = ExternalMetaxyProject(Path("examples/src/examples/migration"))
|
|
298
|
+
>>> result = project.run_cli("graph", "push", env={"STAGE": "1"})
|
|
299
|
+
>>> assert result.returncode == 0
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
def __init__(self, project_dir: Path):
|
|
303
|
+
"""Initialize an external Metaxy project.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
project_dir: Path to existing project directory containing metaxy.toml
|
|
307
|
+
"""
|
|
308
|
+
super().__init__(project_dir)
|
|
309
|
+
if not (self.project_dir / "metaxy.toml").exists():
|
|
310
|
+
raise ValueError(
|
|
311
|
+
f"No metaxy.toml found in {self.project_dir}. "
|
|
312
|
+
"ExternalMetaxyProject requires an existing project configuration."
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class TempMetaxyProject(MetaxyProject):
|
|
317
|
+
"""Helper for creating temporary Metaxy projects.
|
|
318
|
+
|
|
319
|
+
Provides a context manager API for dynamically creating feature modules
|
|
320
|
+
and running CLI commands with proper entrypoint configuration.
|
|
321
|
+
|
|
322
|
+
Example:
|
|
323
|
+
>>> project = TempMetaxyProject(tmp_path)
|
|
324
|
+
>>>
|
|
325
|
+
>>> def features():
|
|
326
|
+
... from metaxy import Feature, FeatureSpec, FeatureKey, FieldSpec, FieldKey
|
|
327
|
+
...
|
|
328
|
+
... class MyFeature(Feature, spec=FeatureSpec(
|
|
329
|
+
... key=FeatureKey(["my_feature"]),
|
|
330
|
+
... deps=None,
|
|
331
|
+
... fields=[FieldSpec(key=FieldKey(["default"]), code_version=1)]
|
|
332
|
+
... )):
|
|
333
|
+
... pass
|
|
334
|
+
>>>
|
|
335
|
+
>>> with project.with_features(features):
|
|
336
|
+
... result = project.run_cli("graph", "push")
|
|
337
|
+
... assert result.returncode == 0
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
def __init__(self, tmp_path: Path, config_content: str | None = None):
|
|
341
|
+
"""Initialize a temporary Metaxy project.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
tmp_path: Temporary directory path (usually from pytest tmp_path fixture)
|
|
345
|
+
config_content: Optional custom configuration content for metaxy.toml.
|
|
346
|
+
If not provided, uses default DuckDB configuration.
|
|
347
|
+
"""
|
|
348
|
+
super().__init__(tmp_path)
|
|
349
|
+
self.project_dir.mkdir(exist_ok=True)
|
|
350
|
+
self._feature_modules: list[str] = []
|
|
351
|
+
self._custom_config = config_content
|
|
352
|
+
self._write_config()
|
|
353
|
+
|
|
354
|
+
def _write_config(self):
|
|
355
|
+
"""Write metaxy.toml configuration file."""
|
|
356
|
+
if self._custom_config is not None:
|
|
357
|
+
# Use custom config content
|
|
358
|
+
config_content = self._custom_config
|
|
359
|
+
else:
|
|
360
|
+
# Default DuckDB store configuration
|
|
361
|
+
dev_db_path = self.project_dir / "metadata.duckdb"
|
|
362
|
+
staging_db_path = self.project_dir / "metadata_staging.duckdb"
|
|
363
|
+
config_content = f'''store = "dev"
|
|
364
|
+
|
|
365
|
+
[stores.dev]
|
|
366
|
+
type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
|
|
367
|
+
|
|
368
|
+
[stores.dev.config]
|
|
369
|
+
database = "{dev_db_path}"
|
|
370
|
+
|
|
371
|
+
[stores.staging]
|
|
372
|
+
type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
|
|
373
|
+
|
|
374
|
+
[stores.staging.config]
|
|
375
|
+
database = "{staging_db_path}"
|
|
376
|
+
'''
|
|
377
|
+
(self.project_dir / "metaxy.toml").write_text(config_content)
|
|
378
|
+
|
|
379
|
+
def with_features(self, features_func, module_name: str | None = None):
|
|
380
|
+
"""Context manager that sets up features for the duration of the block.
|
|
381
|
+
|
|
382
|
+
Extracts source code from features_func (skipping the function definition line),
|
|
383
|
+
writes it to a Python module file, and tracks it for METAXY_ENTRYPOINTS__N
|
|
384
|
+
environment variable configuration.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
features_func: Function containing feature class definitions.
|
|
388
|
+
All imports must be inside the function body.
|
|
389
|
+
module_name: Optional module name. If not provided, generates
|
|
390
|
+
"features_N" based on number of existing modules.
|
|
391
|
+
|
|
392
|
+
Yields:
|
|
393
|
+
str: The module name that was created
|
|
394
|
+
|
|
395
|
+
Example:
|
|
396
|
+
>>> def my_features():
|
|
397
|
+
... from metaxy import Feature, FeatureSpec, FeatureKey
|
|
398
|
+
...
|
|
399
|
+
... class MyFeature(Feature, spec=...):
|
|
400
|
+
... pass
|
|
401
|
+
>>>
|
|
402
|
+
>>> with project.with_features(my_features) as module:
|
|
403
|
+
... print(module) # "features_0"
|
|
404
|
+
... result = project.run_cli("graph", "push")
|
|
405
|
+
"""
|
|
406
|
+
import inspect
|
|
407
|
+
import textwrap
|
|
408
|
+
from contextlib import contextmanager
|
|
409
|
+
|
|
410
|
+
@contextmanager
|
|
411
|
+
def _context():
|
|
412
|
+
# Generate module name if not provided
|
|
413
|
+
nonlocal module_name
|
|
414
|
+
if module_name is None:
|
|
415
|
+
module_name = f"features_{len(self._feature_modules)}"
|
|
416
|
+
|
|
417
|
+
# Extract source code from function
|
|
418
|
+
source = inspect.getsource(features_func)
|
|
419
|
+
|
|
420
|
+
# Remove function definition line and dedent
|
|
421
|
+
lines = source.split("\n")
|
|
422
|
+
# Find the first line that's not a decorator or function def
|
|
423
|
+
body_start = 0
|
|
424
|
+
for i, line in enumerate(lines):
|
|
425
|
+
if line.strip().startswith("def ") and ":" in line:
|
|
426
|
+
body_start = i + 1
|
|
427
|
+
break
|
|
428
|
+
|
|
429
|
+
body_lines = lines[body_start:]
|
|
430
|
+
dedented = textwrap.dedent("\n".join(body_lines))
|
|
431
|
+
|
|
432
|
+
# Write to file in project directory
|
|
433
|
+
feature_file = self.project_dir / f"{module_name}.py"
|
|
434
|
+
feature_file.write_text(dedented)
|
|
435
|
+
|
|
436
|
+
# Track this module
|
|
437
|
+
self._feature_modules.append(module_name)
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
yield module_name
|
|
441
|
+
finally:
|
|
442
|
+
# Cleanup: remove from tracking (file stays for debugging)
|
|
443
|
+
if module_name in self._feature_modules:
|
|
444
|
+
self._feature_modules.remove(module_name)
|
|
445
|
+
|
|
446
|
+
return _context()
|
|
447
|
+
|
|
448
|
+
def run_cli(
|
|
449
|
+
self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
|
|
450
|
+
):
|
|
451
|
+
"""Run CLI command with current feature modules loaded.
|
|
452
|
+
|
|
453
|
+
Automatically sets METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc.
|
|
454
|
+
based on active with_features() context managers.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
*args: CLI command arguments (e.g., "graph", "push")
|
|
458
|
+
check: If True (default), raises CalledProcessError on non-zero exit
|
|
459
|
+
env: Optional dict of additional environment variables
|
|
460
|
+
**kwargs: Additional arguments to pass to subprocess.run()
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
subprocess.CompletedProcess: Result of the CLI command
|
|
464
|
+
|
|
465
|
+
Raises:
|
|
466
|
+
subprocess.CalledProcessError: If check=True and command fails
|
|
467
|
+
|
|
468
|
+
Example:
|
|
469
|
+
>>> result = project.run_cli("graph", "history", "--limit", "5")
|
|
470
|
+
>>> print(result.stdout)
|
|
471
|
+
"""
|
|
472
|
+
import os
|
|
473
|
+
import subprocess
|
|
474
|
+
|
|
475
|
+
# Start with current environment
|
|
476
|
+
cmd_env = os.environ.copy()
|
|
477
|
+
|
|
478
|
+
# Add project directory to PYTHONPATH so modules can be imported
|
|
479
|
+
pythonpath = str(self.project_dir)
|
|
480
|
+
if "PYTHONPATH" in cmd_env:
|
|
481
|
+
pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
|
|
482
|
+
cmd_env["PYTHONPATH"] = pythonpath
|
|
483
|
+
|
|
484
|
+
# Set entrypoints for all tracked modules
|
|
485
|
+
# Use METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc. (single underscore for list indexing)
|
|
486
|
+
for idx, module_name in enumerate(self._feature_modules):
|
|
487
|
+
cmd_env[f"METAXY_ENTRYPOINT_{idx}"] = module_name
|
|
488
|
+
|
|
489
|
+
# Apply additional env overrides
|
|
490
|
+
if env:
|
|
491
|
+
cmd_env.update(env)
|
|
492
|
+
|
|
493
|
+
# Run CLI command
|
|
494
|
+
result = subprocess.run(
|
|
495
|
+
[sys.executable, "-m", "metaxy.cli.app", *args],
|
|
496
|
+
cwd=str(self.project_dir),
|
|
497
|
+
capture_output=True,
|
|
498
|
+
text=True,
|
|
499
|
+
env=cmd_env,
|
|
500
|
+
check=check,
|
|
501
|
+
**kwargs,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
return result
|
|
505
|
+
|
|
506
|
+
@property
|
|
507
|
+
def entrypoints(self):
|
|
508
|
+
return [f"METAXY_ENTRYPOINT_{idx}" for idx in range(len(self._feature_modules))]
|
|
509
|
+
|
|
510
|
+
@property
|
|
511
|
+
def graph(self) -> FeatureGraph:
|
|
512
|
+
"""Load features from the project's feature modules into a graph.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
FeatureGraph with all features from tracked modules loaded
|
|
516
|
+
"""
|
|
517
|
+
import importlib
|
|
518
|
+
import sys
|
|
519
|
+
|
|
520
|
+
graph = FeatureGraph()
|
|
521
|
+
|
|
522
|
+
# Ensure project dir is in sys.path
|
|
523
|
+
project_dir_str = str(self.project_dir)
|
|
524
|
+
was_in_path = project_dir_str in sys.path
|
|
525
|
+
if not was_in_path:
|
|
526
|
+
sys.path.insert(0, project_dir_str)
|
|
527
|
+
|
|
528
|
+
try:
|
|
529
|
+
with graph.use():
|
|
530
|
+
# Import feature modules directly
|
|
531
|
+
for module_name in self._feature_modules:
|
|
532
|
+
# Import or reload the module
|
|
533
|
+
if module_name in sys.modules:
|
|
534
|
+
importlib.reload(sys.modules[module_name])
|
|
535
|
+
else:
|
|
536
|
+
importlib.import_module(module_name)
|
|
537
|
+
finally:
|
|
538
|
+
# Clean up sys.path if we added it
|
|
539
|
+
if not was_in_path and project_dir_str in sys.path:
|
|
540
|
+
sys.path.remove(project_dir_str)
|
|
541
|
+
|
|
542
|
+
return graph
|
metaxy/_utils.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import narwhals as nw
|
|
4
|
+
import polars as pl
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def collect_to_polars(lazy_frame: nw.LazyFrame[Any]) -> pl.DataFrame:
|
|
8
|
+
"""Helper to collect a Narwhals LazyFrame and convert to Polars DataFrame.
|
|
9
|
+
|
|
10
|
+
This handles all backend conversions (Polars, DuckDB/PyArrow, etc.) transparently.
|
|
11
|
+
Used throughout tests for materializing query results.
|
|
12
|
+
"""
|
|
13
|
+
return lazy_frame.collect().to_polars()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = ["collect_to_polars"]
|
metaxy/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.0"
|
metaxy/cli/app.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Main Metaxy CLI application."""
|
|
2
|
+
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import cyclopts
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
from metaxy._version import __version__
|
|
9
|
+
from metaxy.config import MetaxyConfig
|
|
10
|
+
|
|
11
|
+
# Rich console for formatted output
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
# Main app
|
|
15
|
+
app = cyclopts.App(
|
|
16
|
+
name="metaxy", # pyrefly: ignore[unexpected-keyword]
|
|
17
|
+
help="Metaxy - Feature Metadata Management", # pyrefly: ignore[unexpected-keyword]
|
|
18
|
+
version=__version__, # pyrefly: ignore[unexpected-keyword]
|
|
19
|
+
console=console, # pyrefly: ignore[unexpected-keyword]
|
|
20
|
+
config=cyclopts.config.Env( # pyrefly: ignore[unexpected-keyword,implicit-import]
|
|
21
|
+
"METAXY_", # Every environment variable for setting the arguments will begin with this. # pyrefly: ignore[bad-argument-count]
|
|
22
|
+
),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.command
|
|
27
|
+
def shell():
|
|
28
|
+
"""Start interactive shell."""
|
|
29
|
+
app.interactive_shell()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Meta app for global parameters
|
|
33
|
+
@app.meta.default
|
|
34
|
+
def launcher(
|
|
35
|
+
*tokens: Annotated[str, cyclopts.Parameter(show=False, allow_leading_hyphen=True)],
|
|
36
|
+
):
|
|
37
|
+
"""Metaxy CLI.
|
|
38
|
+
|
|
39
|
+
Auto-discovers config file (metaxy.toml or pyproject.toml) by searching
|
|
40
|
+
current directory and parent directories.
|
|
41
|
+
|
|
42
|
+
Environment variables can override config (METAXY_STORE, METAXY_MIGRATIONS_DIR, etc).
|
|
43
|
+
"""
|
|
44
|
+
import logging
|
|
45
|
+
|
|
46
|
+
logging.getLogger().setLevel(logging.INFO)
|
|
47
|
+
|
|
48
|
+
# Load Metaxy configuration with parent directory search
|
|
49
|
+
# This handles TOML discovery, env vars, and entrypoint loading
|
|
50
|
+
metaxy_config = MetaxyConfig.load(search_parents=True)
|
|
51
|
+
|
|
52
|
+
# Store config in context for commands to access
|
|
53
|
+
# Commands will instantiate and open store as needed
|
|
54
|
+
from metaxy.cli.context import set_config
|
|
55
|
+
|
|
56
|
+
set_config(metaxy_config)
|
|
57
|
+
|
|
58
|
+
# Run the actual command
|
|
59
|
+
app(tokens)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Register subcommands (lazy loading via import strings)
|
|
63
|
+
app.command("metaxy.cli.migrations:app", name="migrations")
|
|
64
|
+
app.command("metaxy.cli.graph:app", name="graph")
|
|
65
|
+
app.command("metaxy.cli.graph_diff:app", name="graph-diff")
|
|
66
|
+
app.command("metaxy.cli.list:app", name="list")
|
|
67
|
+
app.command("metaxy.cli.metadata:app", name="metadata")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def main():
|
|
71
|
+
"""Entry point for the CLI."""
|
|
72
|
+
app.meta()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
main()
|