metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,780 @@
1
+ import importlib
2
+ import inspect
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ import tempfile
7
+ import textwrap
8
+ from contextlib import contextmanager
9
+ from functools import cached_property
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from metaxy.config import MetaxyConfig
14
+ from metaxy.metadata_store.base import MetadataStore
15
+ from metaxy.models.feature import FeatureGraph
16
+ from metaxy.models.feature_spec import (
17
+ FeatureSpecWithIDColumns,
18
+ )
19
+ from metaxy.versioning.types import HashAlgorithm
20
+
21
+ DEFAULT_ID_COLUMNS = ["sample_uid"]
22
+
23
+ __all__ = [
24
+ "TempFeatureModule",
25
+ "assert_all_results_equal",
26
+ "HashAlgorithmCases",
27
+ "MetaxyProject",
28
+ "ExternalMetaxyProject",
29
+ "TempMetaxyProject", # Backward compatibility alias
30
+ "DEFAULT_ID_COLUMNS",
31
+ ]
32
+
33
+
34
+ class TempFeatureModule:
35
+ """Helper to create temporary Python modules with feature definitions.
36
+
37
+ This allows features to be importable by historical graph reconstruction.
38
+ The same import path (e.g., 'temp_features.Upstream') can be used across
39
+ different feature versions by overwriting the module file.
40
+ """
41
+
42
+ def __init__(self, module_name: str = "temp_test_features"):
43
+ self.temp_dir = tempfile.mkdtemp(prefix="metaxy_test_")
44
+ self.module_name = module_name
45
+ self.module_path = Path(self.temp_dir) / f"{module_name}.py"
46
+
47
+ # Add to sys.path so module can be imported
48
+ sys.path.insert(0, self.temp_dir)
49
+
50
+ def write_features(self, feature_specs: dict[str, FeatureSpecWithIDColumns]):
51
+ """Write feature classes to the module file.
52
+
53
+ Args:
54
+ feature_specs: Dict mapping class names to FeatureSpec objects
55
+ """
56
+ code_lines = [
57
+ "# Auto-generated test feature module",
58
+ "from metaxy import BaseFeature as Feature, FeatureSpec, FieldSpec, FieldKey, FeatureDep, FeatureKey, FieldDep, SpecialFieldDep",
59
+ "from metaxy._testing.models import SampleFeatureSpec",
60
+ "from metaxy.models.feature import FeatureGraph",
61
+ "",
62
+ "# Use a dedicated graph for this temp module",
63
+ "_graph = FeatureGraph()",
64
+ "",
65
+ ]
66
+
67
+ for class_name, spec in feature_specs.items():
68
+ # Generate the spec definition
69
+ spec_dict = spec.model_dump(mode="python")
70
+ spec_class_name = spec.__class__.__name__
71
+ spec_repr = self._generate_spec_repr(
72
+ spec_dict, spec_class_name=spec_class_name
73
+ )
74
+
75
+ code_lines.extend(
76
+ [
77
+ f"# Define {class_name} in the temp graph context",
78
+ "with _graph.use():",
79
+ f" class {class_name}(",
80
+ " Feature,",
81
+ f" spec={spec_repr}",
82
+ " ):",
83
+ " pass",
84
+ "",
85
+ ]
86
+ )
87
+
88
+ # Write the file
89
+ self.module_path.write_text("\n".join(code_lines))
90
+
91
+ # Reload module if it was already imported
92
+ if self.module_name in sys.modules:
93
+ importlib.reload(sys.modules[self.module_name])
94
+
95
+ def _generate_spec_repr(
96
+ self, spec_dict: dict[str, Any], spec_class_name: str = "FeatureSpec"
97
+ ) -> str:
98
+ """Generate FeatureSpec constructor call from dict.
99
+
100
+ Args:
101
+ spec_dict: Dictionary representation of the spec
102
+ spec_class_name: Name of the spec class to use (e.g., "SampleFeatureSpec", "FeatureSpec")
103
+ """
104
+ # This is a simple representation - could be made more robust
105
+ parts = []
106
+
107
+ # key
108
+ key = spec_dict["key"]
109
+ parts.append(f"key=FeatureKey({key!r})")
110
+
111
+ # deps
112
+ deps = spec_dict.get("deps") or []
113
+ deps_repr = [f"FeatureDep(feature=FeatureKey({d['feature']!r}))" for d in deps]
114
+ parts.append(f"deps=[{', '.join(deps_repr)}]")
115
+
116
+ # fields
117
+ fields = spec_dict.get("fields", [])
118
+ if fields:
119
+ field_reprs = []
120
+ for c in fields:
121
+ c_parts = [
122
+ f"key=FieldKey({c['key']!r})",
123
+ f"code_version={c['code_version']!r}",
124
+ ]
125
+
126
+ # Handle deps
127
+ deps_val = c.get("deps")
128
+ if deps_val == "__METAXY_ALL_DEP__":
129
+ c_parts.append("deps=SpecialFieldDep.ALL")
130
+ elif isinstance(deps_val, list) and deps_val:
131
+ # Field deps (list of FieldDep)
132
+ cdeps: list[str] = [] # type: ignore[misc]
133
+ for cd in deps_val:
134
+ fields_val = cd.get("fields")
135
+ if fields_val == "__METAXY_ALL_DEP__":
136
+ cdeps.append( # type: ignore[arg-type]
137
+ f"FieldDep(feature=FeatureKey({cd['feature']!r}), fields=SpecialFieldDep.ALL)"
138
+ )
139
+ else:
140
+ # Build list of FieldKey objects
141
+ field_keys = [f"FieldKey({k!r})" for k in fields_val]
142
+ cdeps.append(
143
+ f"FieldDep(feature=FeatureKey({cd['feature']!r}), fields=[{', '.join(field_keys)}])"
144
+ )
145
+ c_parts.append(f"deps=[{', '.join(cdeps)}]")
146
+
147
+ field_reprs.append(f"FieldSpec({', '.join(c_parts)})") # type: ignore[arg-type]
148
+
149
+ parts.append(f"fields=[{', '.join(field_reprs)}]")
150
+
151
+ # Note: id_columns is handled by the concrete spec class (SampleFeatureSpec has default)
152
+ # so we don't need to include it here explicitly
153
+
154
+ return f"{spec_class_name}({', '.join(parts)})"
155
+
156
+ @property
157
+ def graph(self) -> FeatureGraph:
158
+ """Get the FeatureGraph from the temp module.
159
+
160
+ Returns:
161
+ The _graph instance from the imported module
162
+ """
163
+ # Import the module to get its _graph
164
+ module = importlib.import_module(self.module_name)
165
+ return module._graph
166
+
167
+ def cleanup(self):
168
+ """Remove temp directory and module from sys.path.
169
+
170
+ NOTE: Don't call this until the test session is completely done,
171
+ as historical graph loading may need to import from these modules.
172
+ """
173
+ if self.temp_dir in sys.path:
174
+ sys.path.remove(self.temp_dir)
175
+
176
+ # Remove from sys.modules
177
+ if self.module_name in sys.modules:
178
+ del sys.modules[self.module_name]
179
+
180
+ # Delete temp directory
181
+ import shutil
182
+
183
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
184
+
185
+
186
+ def assert_all_results_equal(results: dict[str, Any], snapshot=None) -> None:
187
+ """Compare all results from different store type combinations.
188
+
189
+ Ensures all variants produce identical results, then optionally snapshots all results.
190
+
191
+ Args:
192
+ results: Dict mapping store_type to result data
193
+ snapshot: Optional syrupy snapshot fixture to record all results
194
+
195
+ Raises:
196
+ AssertionError: If any variants produce different results
197
+ """
198
+ if not results:
199
+ return
200
+
201
+ # Get all result values as a list
202
+ all_results = list(results.items())
203
+ reference_key, reference_result = all_results[0]
204
+
205
+ # Compare each result to the reference
206
+ for key, result in all_results[1:]:
207
+ assert result == reference_result, (
208
+ f"{key} produced different results than {reference_key}:\n"
209
+ f"Expected: {reference_result}\n"
210
+ f"Got: {result}"
211
+ )
212
+
213
+ # Snapshot ALL results if snapshot provided
214
+ # Sort by keys to ensure deterministic ordering across test runs
215
+ if snapshot is not None:
216
+ sorted_results = dict(sorted(results.items()))
217
+ assert sorted_results == snapshot
218
+
219
+
220
+ class HashAlgorithmCases:
221
+ """Test cases for different hash algorithms."""
222
+
223
+ def case_xxhash64(self) -> HashAlgorithm:
224
+ """xxHash64 algorithm."""
225
+ return HashAlgorithm.XXHASH64
226
+
227
+ def case_xxhash32(self) -> HashAlgorithm:
228
+ """xxHash32 algorithm."""
229
+ return HashAlgorithm.XXHASH32
230
+
231
+ def case_wyhash(self) -> HashAlgorithm:
232
+ """WyHash algorithm."""
233
+ return HashAlgorithm.WYHASH
234
+
235
+ def case_sha256(self) -> HashAlgorithm:
236
+ """SHA256 algorithm."""
237
+ return HashAlgorithm.SHA256
238
+
239
+ def case_md5(self) -> HashAlgorithm:
240
+ """MD5 algorithm."""
241
+ return HashAlgorithm.MD5
242
+
243
+
244
+ class MetaxyProject:
245
+ """Base class for Metaxy projects.
246
+
247
+ Provides common functionality for running CLI commands with proper
248
+ environment setup and accessing project configuration.
249
+ """
250
+
251
+ def __init__(self, project_dir: Path):
252
+ """Initialize a Metaxy project.
253
+
254
+ Args:
255
+ project_dir: Path to project directory containing metaxy.toml
256
+ """
257
+ self.project_dir = Path(project_dir)
258
+
259
+ def run_cli(
260
+ self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
261
+ ):
262
+ """Run CLI command with proper environment setup.
263
+
264
+ Args:
265
+ *args: CLI command arguments (e.g., "graph", "push")
266
+ check: If True (default), raises CalledProcessError on non-zero exit
267
+ env: Optional dict of additional environment variables
268
+ **kwargs: Additional arguments to pass to subprocess.run()
269
+
270
+ Returns:
271
+ subprocess.CompletedProcess: Result of the CLI command
272
+
273
+ Raises:
274
+ subprocess.CalledProcessError: If check=True and command fails
275
+
276
+ Example:
277
+ ```py
278
+ result = project.run_cli("graph", "history", "--limit", "5")
279
+ print(result.stdout)
280
+ ```
281
+ """
282
+ # Start with current environment
283
+ cmd_env = os.environ.copy()
284
+
285
+ # Add project directory to PYTHONPATH so modules can be imported
286
+ pythonpath = str(self.project_dir)
287
+ if "PYTHONPATH" in cmd_env:
288
+ pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
289
+ cmd_env["PYTHONPATH"] = pythonpath
290
+
291
+ # Apply additional env overrides
292
+ if env:
293
+ cmd_env.update(env)
294
+
295
+ # Run CLI command
296
+ try:
297
+ result = subprocess.run(
298
+ [sys.executable, "-m", "metaxy.cli.app", *args],
299
+ cwd=str(self.project_dir),
300
+ capture_output=True,
301
+ text=True,
302
+ env=cmd_env,
303
+ check=check,
304
+ **kwargs,
305
+ )
306
+ except subprocess.CalledProcessError as e:
307
+ # Re-raise with stderr output for better debugging
308
+ error_msg = f"CLI command failed: {' '.join(args)}\n"
309
+ error_msg += f"Exit code: {e.returncode}\n"
310
+ if e.stdout:
311
+ error_msg += f"STDOUT:\n{e.stdout}\n"
312
+ if e.stderr:
313
+ error_msg += f"STDERR:\n{e.stderr}\n"
314
+ raise RuntimeError(error_msg) from e
315
+
316
+ return result
317
+
318
+ @cached_property
319
+ def config(self) -> MetaxyConfig:
320
+ """Load configuration from project's metaxy.toml."""
321
+ return MetaxyConfig.load(self.project_dir / "metaxy.toml")
322
+
323
+ @cached_property
324
+ def stores(self) -> dict[str, MetadataStore]:
325
+ """Get all configured stores from project config."""
326
+ return {k: self.config.get_store(k) for k in self.config.stores}
327
+
328
+
329
+ class ExternalMetaxyProject(MetaxyProject):
330
+ """Helper for working with existing Metaxy projects.
331
+
332
+ Use this class to interact with pre-existing projects like examples,
333
+ running CLI commands and accessing their configuration.
334
+
335
+ Example:
336
+ ```py
337
+ project = ExternalMetaxyProject(Path("examples/example-migration"))
338
+ result = project.run_cli("graph", "push", env={"STAGE": "1"})
339
+ assert result.returncode == 0
340
+ print(project.package_name) # "example_migration"
341
+ ```
342
+ """
343
+
344
+ def __init__(self, project_dir: Path, require_config: bool = True):
345
+ """Initialize an external Metaxy project.
346
+
347
+ Args:
348
+ project_dir: Path to existing project directory (may contain metaxy.toml)
349
+ require_config: If True, requires metaxy.toml to exist (default: True)
350
+ """
351
+ super().__init__(project_dir)
352
+ if require_config and not (self.project_dir / "metaxy.toml").exists():
353
+ raise ValueError(
354
+ f"No metaxy.toml found in {self.project_dir}. "
355
+ "ExternalMetaxyProject requires an existing project configuration."
356
+ )
357
+ self._venv_path: Path | None = None
358
+ self._venv_python: Path | None = None
359
+
360
+ def setup_venv(self, venv_path: Path, install_metaxy_from: Path | None = None):
361
+ """Create a virtual environment and install the project.
362
+
363
+ Args:
364
+ venv_path: Path where the venv should be created
365
+ install_metaxy_from: Optional path to metaxy source to install (defaults to current)
366
+
367
+ Returns:
368
+ Path to the Python interpreter in the venv
369
+
370
+ Example:
371
+ ```py
372
+ project = ExternalMetaxyProject(Path("tests/fixtures/test-project"))
373
+ with tempfile.TemporaryDirectory() as tmpdir:
374
+ project.setup_venv(Path(tmpdir) / "venv")
375
+ result = project.run_in_venv("python", "-c", "import test_metaxy_project")
376
+ ```
377
+ """
378
+ import os
379
+ import subprocess
380
+
381
+ # Create venv using uv
382
+ subprocess.run(
383
+ ["uv", "venv", str(venv_path), "--python", str(sys.executable)], check=True
384
+ )
385
+
386
+ # Install metaxy using the venv's pip directly
387
+ if install_metaxy_from is None:
388
+ # Default to metaxy package location (get the repo root)
389
+ # metaxy.__file__ -> .../src/metaxy/__init__.py
390
+ # .parent -> .../src/metaxy
391
+ # .parent -> .../src
392
+ # .parent -> repo root
393
+ import metaxy
394
+
395
+ install_metaxy_from = Path(metaxy.__file__).parent.parent.parent
396
+
397
+ # Set VIRTUAL_ENV to activate the venv
398
+ venv_env = os.environ.copy()
399
+ venv_env["VIRTUAL_ENV"] = str(venv_path)
400
+ # Remove PYTHONHOME if set (can interfere with venv)
401
+ venv_env.pop("PYTHONHOME", None)
402
+
403
+ # Use uv pip to install packages into the venv
404
+ result = subprocess.run(
405
+ [
406
+ "uv",
407
+ "pip",
408
+ "install",
409
+ "-e",
410
+ str(install_metaxy_from),
411
+ ],
412
+ env=venv_env,
413
+ capture_output=True,
414
+ text=True,
415
+ check=False,
416
+ )
417
+ if result.returncode != 0:
418
+ raise RuntimeError(
419
+ f"Failed to install metaxy from {install_metaxy_from}\n"
420
+ f"STDOUT: {result.stdout}\n"
421
+ f"STDERR: {result.stderr}"
422
+ )
423
+
424
+ # Install the project itself using uv pip
425
+ result = subprocess.run(
426
+ [
427
+ "uv",
428
+ "pip",
429
+ "install",
430
+ "-e",
431
+ str(self.project_dir),
432
+ ],
433
+ env=venv_env,
434
+ capture_output=True,
435
+ text=True,
436
+ check=False,
437
+ )
438
+ if result.returncode != 0:
439
+ raise RuntimeError(
440
+ f"Failed to install project from {self.project_dir}\n"
441
+ f"STDOUT: {result.stdout}\n"
442
+ f"STDERR: {result.stderr}"
443
+ )
444
+
445
+ self._venv_path = venv_path
446
+
447
+ def run_in_venv(
448
+ self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
449
+ ):
450
+ """Run a command in the configured venv.
451
+
452
+ Args:
453
+ *args: Command and arguments (e.g., "python", "-c", "print('hello')")
454
+ check: If True (default), raises CalledProcessError on non-zero exit
455
+ env: Optional dict of additional environment variables
456
+ **kwargs: Additional arguments to pass to subprocess.run()
457
+
458
+ Returns:
459
+ subprocess.CompletedProcess: Result of the command
460
+
461
+ Raises:
462
+ RuntimeError: If setup_venv() hasn't been called yet
463
+ subprocess.CalledProcessError: If check=True and command fails
464
+
465
+ Example:
466
+ ```py
467
+ project.setup_venv(Path("/tmp/venv"))
468
+ result = project.run_in_venv("python", "-m", "my_module")
469
+ ```
470
+ """
471
+ import subprocess
472
+
473
+ if self._venv_path is None:
474
+ raise RuntimeError("No venv configured. Call setup_venv() first.")
475
+
476
+ # Start with current environment
477
+ import os
478
+
479
+ cmd_env = os.environ.copy()
480
+
481
+ # Set VIRTUAL_ENV to activate the venv
482
+ cmd_env["VIRTUAL_ENV"] = str(self._venv_path)
483
+ # Remove PYTHONHOME if set (can interfere with venv)
484
+ cmd_env.pop("PYTHONHOME", None)
485
+
486
+ # Apply additional env overrides
487
+ if env:
488
+ cmd_env.update(env)
489
+
490
+ # Run command with venv python
491
+ result = subprocess.run(
492
+ ["uv", "run", "--active", *args],
493
+ cwd=str(self.project_dir),
494
+ capture_output=True,
495
+ text=True,
496
+ env=cmd_env,
497
+ check=check,
498
+ **kwargs,
499
+ )
500
+
501
+ return result
502
+
503
+ @cached_property
504
+ def package_name(self) -> str:
505
+ """Get the Python package name from pyproject.toml.
506
+
507
+ Converts the project name (e.g., "example-migration") to a valid
508
+ Python module name (e.g., "example_migration") by replacing hyphens
509
+ with underscores.
510
+
511
+ Returns:
512
+ The Python package/module name
513
+
514
+ Raises:
515
+ FileNotFoundError: If pyproject.toml doesn't exist
516
+ ValueError: If pyproject.toml doesn't contain project.name
517
+ """
518
+ pyproject_path = self.project_dir / "pyproject.toml"
519
+ if not pyproject_path.exists():
520
+ raise FileNotFoundError(
521
+ f"No pyproject.toml found in {self.project_dir}. "
522
+ "Cannot determine package name."
523
+ )
524
+
525
+ # Parse TOML to get project name
526
+ import tomli
527
+
528
+ with open(pyproject_path, "rb") as f:
529
+ pyproject = tomli.load(f)
530
+
531
+ project_name = pyproject.get("project", {}).get("name")
532
+ if not project_name:
533
+ raise ValueError(
534
+ f"No project.name found in {pyproject_path}. "
535
+ "Cannot determine package name."
536
+ )
537
+
538
+ # Convert project name to valid Python package name (replace hyphens with underscores)
539
+ return project_name.replace("-", "_")
540
+
541
+
542
+ class TempMetaxyProject(MetaxyProject):
543
+ """Helper for creating temporary Metaxy projects.
544
+
545
+ Provides a context manager API for dynamically creating feature modules
546
+ and running CLI commands with proper entrypoint configuration.
547
+
548
+ Example:
549
+ ```py
550
+ project = TempMetaxyProject(tmp_path)
551
+
552
+ def features():
553
+ from metaxy import BaseFeature as Feature, FeatureSpec, FeatureKey, FieldSpec, FieldKey
554
+
555
+ class MyFeature(Feature, spec=FeatureSpec(
556
+ key=FeatureKey(["my_feature"]),
557
+ fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")]
558
+ )):
559
+ pass
560
+
561
+ with project.with_features(features):
562
+ result = project.run_cli("graph", "push")
563
+ assert result.returncode == 0
564
+ ```
565
+ """
566
+
567
+ def __init__(self, tmp_path: Path, config_content: str | None = None):
568
+ """Initialize a temporary Metaxy project.
569
+
570
+ Args:
571
+ tmp_path: Temporary directory path (usually from pytest tmp_path fixture)
572
+ config_content: Optional custom configuration content for metaxy.toml.
573
+ If not provided, uses default DuckDB configuration.
574
+ """
575
+ super().__init__(tmp_path)
576
+ self.project_dir.mkdir(exist_ok=True)
577
+ self._feature_modules: list[str] = []
578
+ self._module_counter = 0
579
+ self._custom_config = config_content
580
+ self._write_config()
581
+
582
+ def _write_config(self):
583
+ """Write metaxy.toml configuration file."""
584
+ if self._custom_config is not None:
585
+ # Use custom config content
586
+ config_content = self._custom_config
587
+ else:
588
+ # Default DuckDB store configuration
589
+ dev_db_path = self.project_dir / "metadata.duckdb"
590
+ staging_db_path = self.project_dir / "metadata_staging.duckdb"
591
+ config_content = f'''project = "test"
592
+ store = "dev"
593
+
594
+ [stores.dev]
595
+ type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
596
+
597
+ [stores.dev.config]
598
+ database = "{dev_db_path}"
599
+
600
+ [stores.staging]
601
+ type = "metaxy.metadata_store.duckdb.DuckDBMetadataStore"
602
+
603
+ [stores.staging.config]
604
+ database = "{staging_db_path}"
605
+ '''
606
+ (self.project_dir / "metaxy.toml").write_text(config_content)
607
+
608
+ def with_features(self, features_func, module_name: str | None = None):
609
+ """Context manager that sets up features for the duration of the block.
610
+
611
+ Extracts source code from features_func (skipping the function definition line),
612
+ writes it to a Python module file, and tracks it for METAXY_ENTRYPOINTS__N
613
+ environment variable configuration.
614
+
615
+ Args:
616
+ features_func: Function containing feature class definitions.
617
+ All imports must be inside the function body.
618
+ module_name: Optional module name. If not provided, generates
619
+ "features_N" based on number of existing modules.
620
+
621
+ Yields:
622
+ str: The module name that was created
623
+
624
+ Example:
625
+ ```py
626
+ def my_features():
627
+ from metaxy import BaseFeature as Feature, FeatureSpec, FeatureKey
628
+
629
+ class MyFeature(Feature, spec=...):
630
+ pass
631
+
632
+ with project.with_features(my_features) as module:
633
+ print(module) # "features_0"
634
+ result = project.run_cli("graph", "push")
635
+ ```
636
+ """
637
+
638
+ @contextmanager
639
+ def _context():
640
+ # Generate module name if not provided
641
+ nonlocal module_name
642
+ if module_name is None:
643
+ module_name = f"features_{self._module_counter}"
644
+ self._module_counter += 1
645
+
646
+ # Extract source code from function
647
+ source = inspect.getsource(features_func)
648
+
649
+ # Remove function definition line and dedent
650
+ lines = source.split("\n")
651
+ # Find the first line that's not a decorator or function def
652
+ body_start = 0
653
+ for i, line in enumerate(lines):
654
+ if line.strip().startswith("def ") and ":" in line:
655
+ body_start = i + 1
656
+ break
657
+
658
+ body_lines = lines[body_start:]
659
+ dedented = textwrap.dedent("\n".join(body_lines))
660
+
661
+ # Write to file in project directory
662
+ feature_file = self.project_dir / f"{module_name}.py"
663
+ feature_file.write_text(dedented)
664
+
665
+ # Track this module
666
+ self._feature_modules.append(module_name)
667
+
668
+ try:
669
+ yield module_name
670
+ finally:
671
+ # Cleanup: remove from tracking (file stays for debugging)
672
+ if module_name in self._feature_modules:
673
+ self._feature_modules.remove(module_name)
674
+
675
+ return _context()
676
+
677
+ def run_cli(
678
+ self, *args, check: bool = True, env: dict[str, str] | None = None, **kwargs
679
+ ):
680
+ """Run CLI command with current feature modules loaded.
681
+
682
+ Automatically sets METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc.
683
+ based on active with_features() context managers.
684
+
685
+ Args:
686
+ *args: CLI command arguments (e.g., "graph", "push")
687
+ check: If True (default), raises CalledProcessError on non-zero exit
688
+ env: Optional dict of additional environment variables
689
+ **kwargs: Additional arguments to pass to subprocess.run()
690
+
691
+ Returns:
692
+ subprocess.CompletedProcess: Result of the CLI command
693
+
694
+ Raises:
695
+ subprocess.CalledProcessError: If check=True and command fails
696
+
697
+ Example:
698
+ ```py
699
+ result = project.run_cli("graph", "history", "--limit", "5")
700
+ print(result.stdout)
701
+ ```
702
+ """
703
+ # Start with current environment
704
+ cmd_env = os.environ.copy()
705
+
706
+ # Add project directory to PYTHONPATH so modules can be imported
707
+ pythonpath = str(self.project_dir)
708
+ if "PYTHONPATH" in cmd_env:
709
+ pythonpath = f"{pythonpath}{os.pathsep}{cmd_env['PYTHONPATH']}"
710
+ cmd_env["PYTHONPATH"] = pythonpath
711
+
712
+ # Set entrypoints for all tracked modules
713
+ # Use METAXY_ENTRYPOINT_0, METAXY_ENTRYPOINT_1, etc. (single underscore for list indexing)
714
+ for idx, module_name in enumerate(self._feature_modules):
715
+ cmd_env[f"METAXY_ENTRYPOINT_{idx}"] = module_name
716
+
717
+ # Apply additional env overrides
718
+ if env:
719
+ cmd_env.update(env)
720
+
721
+ # Run CLI command
722
+ try:
723
+ result = subprocess.run(
724
+ [sys.executable, "-m", "metaxy.cli.app", *args],
725
+ cwd=str(self.project_dir),
726
+ capture_output=True,
727
+ text=True,
728
+ env=cmd_env,
729
+ check=check,
730
+ **kwargs,
731
+ )
732
+ except subprocess.CalledProcessError as e:
733
+ # Re-raise with stderr output for better debugging
734
+ error_msg = f"CLI command failed: {' '.join(args)}\n"
735
+ error_msg += f"Exit code: {e.returncode}\n"
736
+ if e.stdout:
737
+ error_msg += f"STDOUT:\n{e.stdout}\n"
738
+ if e.stderr:
739
+ error_msg += f"STDERR:\n{e.stderr}\n"
740
+ raise RuntimeError(error_msg) from e
741
+
742
+ return result
743
+
744
+ @property
745
+ def entrypoints(self):
746
+ return [f"METAXY_ENTRYPOINT_{idx}" for idx in range(len(self._feature_modules))]
747
+
748
+ @property
749
+ def graph(self) -> FeatureGraph:
750
+ """Load features from the project's feature modules into a graph.
751
+
752
+ Returns:
753
+ FeatureGraph with all features from tracked modules loaded
754
+ """
755
+ import importlib
756
+ import sys
757
+
758
+ graph = FeatureGraph()
759
+
760
+ # Ensure project dir is in sys.path
761
+ project_dir_str = str(self.project_dir)
762
+ was_in_path = project_dir_str in sys.path
763
+ if not was_in_path:
764
+ sys.path.insert(0, project_dir_str)
765
+
766
+ try:
767
+ with graph.use():
768
+ # Import feature modules directly
769
+ for module_name in self._feature_modules:
770
+ # Import or reload the module
771
+ if module_name in sys.modules:
772
+ importlib.reload(sys.modules[module_name])
773
+ else:
774
+ importlib.import_module(module_name)
775
+ finally:
776
+ # Clean up sys.path if we added it
777
+ if not was_in_path and project_dir_str in sys.path:
778
+ sys.path.remove(project_dir_str)
779
+
780
+ return graph