metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
metaxy/__init__.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from metaxy._version import __version__
|
|
4
|
+
from metaxy.config import MetaxyConfig, StoreConfig
|
|
5
|
+
from metaxy.entrypoints import (
|
|
6
|
+
load_features,
|
|
7
|
+
load_module_entrypoint,
|
|
8
|
+
load_package_entrypoints,
|
|
9
|
+
)
|
|
10
|
+
from metaxy.metadata_store import AccessMode, InMemoryMetadataStore, MetadataStore
|
|
11
|
+
from metaxy.migrations import (
|
|
12
|
+
BaseOperation,
|
|
13
|
+
DataVersionReconciliation,
|
|
14
|
+
DiffMigration,
|
|
15
|
+
FullGraphMigration,
|
|
16
|
+
MetadataBackfill,
|
|
17
|
+
Migration,
|
|
18
|
+
MigrationExecutor,
|
|
19
|
+
MigrationResult,
|
|
20
|
+
SystemTableStorage,
|
|
21
|
+
detect_diff_migration,
|
|
22
|
+
)
|
|
23
|
+
from metaxy.models.feature import (
|
|
24
|
+
BaseFeature,
|
|
25
|
+
FeatureGraph,
|
|
26
|
+
current_graph,
|
|
27
|
+
get_feature_by_key,
|
|
28
|
+
graph,
|
|
29
|
+
)
|
|
30
|
+
from metaxy.models.feature_spec import (
|
|
31
|
+
FeatureDep,
|
|
32
|
+
FeatureSpec,
|
|
33
|
+
FeatureSpecWithIDColumns,
|
|
34
|
+
IDColumns,
|
|
35
|
+
)
|
|
36
|
+
from metaxy.models.field import (
|
|
37
|
+
FieldDep,
|
|
38
|
+
FieldSpec,
|
|
39
|
+
SpecialFieldDep,
|
|
40
|
+
)
|
|
41
|
+
from metaxy.models.fields_mapping import (
|
|
42
|
+
AllFieldsMapping,
|
|
43
|
+
DefaultFieldsMapping,
|
|
44
|
+
FieldsMapping,
|
|
45
|
+
FieldsMappingType,
|
|
46
|
+
)
|
|
47
|
+
from metaxy.models.lineage import LineageRelationship
|
|
48
|
+
from metaxy.models.types import (
|
|
49
|
+
CoercibleToFeatureKey,
|
|
50
|
+
CoercibleToFieldKey,
|
|
51
|
+
FeatureDepMetadata,
|
|
52
|
+
FeatureKey,
|
|
53
|
+
FieldKey,
|
|
54
|
+
ValidatedFeatureKey,
|
|
55
|
+
ValidatedFeatureKeyAdapter,
|
|
56
|
+
ValidatedFeatureKeySequence,
|
|
57
|
+
ValidatedFeatureKeySequenceAdapter,
|
|
58
|
+
ValidatedFieldKey,
|
|
59
|
+
ValidatedFieldKeyAdapter,
|
|
60
|
+
ValidatedFieldKeySequence,
|
|
61
|
+
ValidatedFieldKeySequenceAdapter,
|
|
62
|
+
)
|
|
63
|
+
from metaxy.versioning.types import HashAlgorithm
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def coerce_to_feature_key(value: CoercibleToFeatureKey) -> FeatureKey:
|
|
67
|
+
"""Coerce a value to a [`FeatureKey`][metaxy.FeatureKey].
|
|
68
|
+
|
|
69
|
+
Accepts:
|
|
70
|
+
|
|
71
|
+
- slashed `str`: `"a/b/c"`
|
|
72
|
+
- `Sequence[str]`: `["a", "b", "c"]`
|
|
73
|
+
- `FeatureKey`: pass through
|
|
74
|
+
- `type[BaseFeature]`: extracts `.spec().key`
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
value: Value to coerce to `FeatureKey`
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The coerced `FeatureKey`
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
ValidationError: If the value cannot be coerced to a `FeatureKey`
|
|
84
|
+
"""
|
|
85
|
+
return ValidatedFeatureKeyAdapter.validate_python(value)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def init_metaxy(
|
|
89
|
+
config_file: Path | None = None, search_parents: bool = True
|
|
90
|
+
) -> MetaxyConfig:
|
|
91
|
+
"""Main user-facing initialization function for Metaxy. It loads the configuration and features.
|
|
92
|
+
|
|
93
|
+
Features are [discovered](../../guide/learn/feature-discovery.md) from installed Python packages metadata.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
config_file (Path | None, optional): Path to the configuration file.
|
|
97
|
+
|
|
98
|
+
Will be auto-discovered in current or parent directories if not provided.
|
|
99
|
+
|
|
100
|
+
!!! tip
|
|
101
|
+
`METAXY_CONFIG` environment variable can be used to set this parameter
|
|
102
|
+
|
|
103
|
+
search_parents (bool, optional): Whether to search parent directories for configuration files. Defaults to True.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
MetaxyConfig: The initialized Metaxy configuration.
|
|
107
|
+
"""
|
|
108
|
+
cfg = MetaxyConfig.load(
|
|
109
|
+
config_file=config_file,
|
|
110
|
+
search_parents=search_parents,
|
|
111
|
+
)
|
|
112
|
+
load_features(cfg.entrypoints)
|
|
113
|
+
return cfg
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
__all__ = [
|
|
117
|
+
"BaseFeature",
|
|
118
|
+
"FeatureGraph",
|
|
119
|
+
"graph",
|
|
120
|
+
"FeatureSpec",
|
|
121
|
+
"get_feature_by_key",
|
|
122
|
+
"FeatureDep",
|
|
123
|
+
"FeatureDepMetadata",
|
|
124
|
+
"FeatureSpec",
|
|
125
|
+
"FeatureSpecWithIDColumns",
|
|
126
|
+
"AllFieldsMapping",
|
|
127
|
+
"DefaultFieldsMapping",
|
|
128
|
+
"FieldsMapping",
|
|
129
|
+
"FieldsMappingType",
|
|
130
|
+
"FieldDep",
|
|
131
|
+
"FieldSpec",
|
|
132
|
+
"SpecialFieldDep",
|
|
133
|
+
"FeatureKey",
|
|
134
|
+
"FieldKey",
|
|
135
|
+
"CoercibleToFeatureKey",
|
|
136
|
+
"CoercibleToFieldKey",
|
|
137
|
+
"coerce_to_feature_key",
|
|
138
|
+
"ValidatedFeatureKey",
|
|
139
|
+
"ValidatedFieldKey",
|
|
140
|
+
"ValidatedFeatureKeySequence",
|
|
141
|
+
"ValidatedFieldKeySequence",
|
|
142
|
+
"MetadataStore",
|
|
143
|
+
"InMemoryMetadataStore",
|
|
144
|
+
"load_features",
|
|
145
|
+
"load_module_entrypoint",
|
|
146
|
+
"load_package_entrypoints",
|
|
147
|
+
"Migration",
|
|
148
|
+
"DiffMigration",
|
|
149
|
+
"FullGraphMigration",
|
|
150
|
+
"MigrationResult",
|
|
151
|
+
"MigrationExecutor",
|
|
152
|
+
"SystemTableStorage",
|
|
153
|
+
"BaseOperation",
|
|
154
|
+
"DataVersionReconciliation",
|
|
155
|
+
"MetadataBackfill",
|
|
156
|
+
"detect_diff_migration",
|
|
157
|
+
"MetaxyConfig",
|
|
158
|
+
"StoreConfig",
|
|
159
|
+
"init_metaxy",
|
|
160
|
+
"IDColumns",
|
|
161
|
+
"HashAlgorithm",
|
|
162
|
+
"LineageRelationship",
|
|
163
|
+
"AccessMode",
|
|
164
|
+
"current_graph",
|
|
165
|
+
"ValidatedFeatureKeyAdapter",
|
|
166
|
+
"ValidatedFieldKeyAdapter",
|
|
167
|
+
"ValidatedFeatureKeySequenceAdapter",
|
|
168
|
+
"ValidatedFieldKeySequenceAdapter",
|
|
169
|
+
"__version__",
|
|
170
|
+
]
|
metaxy/_packaging.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Packaging utilities for Metaxy project detection.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to detect which Metaxy project a Feature belongs to
|
|
4
|
+
based on package metadata entry points.
|
|
5
|
+
|
|
6
|
+
Entry point format in pyproject.toml:
|
|
7
|
+
[project.entry-points."metaxy.project"]
|
|
8
|
+
my-project = "my_package.entrypoint"
|
|
9
|
+
|
|
10
|
+
The entry point name is the project name, and the value is the entrypoint module.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from functools import lru_cache
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from importlib.metadata import entry_points
|
|
17
|
+
except ImportError:
|
|
18
|
+
from importlib_metadata import entry_points # type: ignore
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def detect_project_from_entrypoints(module_name: str) -> str | None:
|
|
22
|
+
"""Detect project name from metaxy.project entry points.
|
|
23
|
+
|
|
24
|
+
Checks if the module was loaded via a declared metaxy entry point.
|
|
25
|
+
|
|
26
|
+
Entry point format:
|
|
27
|
+
[project.entry-points."metaxy.project"]
|
|
28
|
+
my-project = "my_package:init"
|
|
29
|
+
|
|
30
|
+
The entry point should point to a function, but the function is never called.
|
|
31
|
+
Metaxy only uses the module path portion for project detection.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
module_name: Fully qualified module name (e.g., "my_package.features")
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Project name if found, None otherwise
|
|
38
|
+
"""
|
|
39
|
+
# Use cached entry points to avoid repeated queries
|
|
40
|
+
all_entrypoints = get_all_project_entrypoints()
|
|
41
|
+
|
|
42
|
+
# Check if our module matches any entry point
|
|
43
|
+
for project_name, ep_module in all_entrypoints.items():
|
|
44
|
+
if module_name == ep_module or module_name.startswith(ep_module + "."):
|
|
45
|
+
# The entry point name is the project name
|
|
46
|
+
return project_name
|
|
47
|
+
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@lru_cache(maxsize=1)
|
|
52
|
+
def get_all_project_entrypoints() -> dict[str, str]:
|
|
53
|
+
"""Get all declared metaxy.project entry points.
|
|
54
|
+
|
|
55
|
+
This function is cached since entry points don't change during program execution.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Dictionary mapping project names to entrypoint module paths (without :function)
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ValueError: If a package declares multiple entry points (only one per package is allowed)
|
|
62
|
+
"""
|
|
63
|
+
result: dict[str, str] = {}
|
|
64
|
+
|
|
65
|
+
# Track which top-level package each entry point belongs to
|
|
66
|
+
package_to_projects: dict[str, list[str]] = {}
|
|
67
|
+
|
|
68
|
+
# Query the metaxy.project group directly - no deprecation warning
|
|
69
|
+
group_eps = entry_points(group="metaxy.project")
|
|
70
|
+
|
|
71
|
+
# Map project name (entry point name) to entrypoint module (entry point value)
|
|
72
|
+
for ep in group_eps:
|
|
73
|
+
# Extract module path from entry point value (before ':' if present)
|
|
74
|
+
ep_module = ep.value.split(":")[0] if ":" in ep.value else ep.value
|
|
75
|
+
|
|
76
|
+
# Get the top-level package name (first component of module path)
|
|
77
|
+
top_level_package = ep_module.split(".")[0]
|
|
78
|
+
|
|
79
|
+
# Track this project for this package
|
|
80
|
+
if top_level_package not in package_to_projects:
|
|
81
|
+
package_to_projects[top_level_package] = []
|
|
82
|
+
package_to_projects[top_level_package].append(ep.name)
|
|
83
|
+
|
|
84
|
+
result[ep.name] = ep_module
|
|
85
|
+
|
|
86
|
+
# Validate that each package only declares one entry point
|
|
87
|
+
for package, projects in package_to_projects.items():
|
|
88
|
+
if len(projects) > 1:
|
|
89
|
+
# Format the entry points list
|
|
90
|
+
entries = ", ".join(f"'{p}'" for p in projects)
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"Found multiple entries in `metaxy.project` entrypoints group: {entries}. "
|
|
93
|
+
f"The key should be the Metaxy project name, thus only one entry is allowed."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return result
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Testing infrastructure for Metaxy examples and runbooks.
|
|
2
|
+
|
|
3
|
+
This is a private module (_testing) containing testing utilities organized into:
|
|
4
|
+
- runbook: Runbook system for testing and documenting examples
|
|
5
|
+
- metaxy_project: Project helpers for creating and managing temporary Metaxy projects
|
|
6
|
+
- pytest_helpers: Testing helpers for pytest tests
|
|
7
|
+
- models: Testing-specific model implementations
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# Runbook system
|
|
11
|
+
# Metaxy project helpers
|
|
12
|
+
from metaxy._testing.metaxy_project import (
|
|
13
|
+
ExternalMetaxyProject,
|
|
14
|
+
HashAlgorithmCases,
|
|
15
|
+
MetaxyProject,
|
|
16
|
+
TempFeatureModule,
|
|
17
|
+
TempMetaxyProject,
|
|
18
|
+
assert_all_results_equal,
|
|
19
|
+
)
|
|
20
|
+
from metaxy._testing.models import SampleFeature, SampleFeatureSpec
|
|
21
|
+
from metaxy._testing.pytest_helpers import add_metaxy_provenance_column
|
|
22
|
+
from metaxy._testing.runbook import (
|
|
23
|
+
ApplyPatchStep,
|
|
24
|
+
AssertOutputStep,
|
|
25
|
+
BaseStep,
|
|
26
|
+
Runbook,
|
|
27
|
+
RunbookRunner,
|
|
28
|
+
RunCommandStep,
|
|
29
|
+
Scenario,
|
|
30
|
+
StepType,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
# Runbook system
|
|
35
|
+
"Runbook",
|
|
36
|
+
"Scenario",
|
|
37
|
+
"BaseStep",
|
|
38
|
+
"RunCommandStep",
|
|
39
|
+
"ApplyPatchStep",
|
|
40
|
+
"AssertOutputStep",
|
|
41
|
+
"StepType",
|
|
42
|
+
"RunbookRunner",
|
|
43
|
+
# Metaxy project helpers
|
|
44
|
+
"TempFeatureModule",
|
|
45
|
+
"HashAlgorithmCases",
|
|
46
|
+
"MetaxyProject",
|
|
47
|
+
"ExternalMetaxyProject",
|
|
48
|
+
"TempMetaxyProject",
|
|
49
|
+
"assert_all_results_equal",
|
|
50
|
+
# Pytest helpers
|
|
51
|
+
"add_metaxy_provenance_column",
|
|
52
|
+
# Testing models
|
|
53
|
+
"SampleFeatureSpec",
|
|
54
|
+
"SampleFeature",
|
|
55
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Test configuration classes for documentation generation testing."""
|
|
2
|
+
|
|
3
|
+
from pydantic import Field as PydanticField
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SamplePluginConfig(BaseSettings):
|
|
8
|
+
"""Sample plugin configuration for doc generation testing.
|
|
9
|
+
|
|
10
|
+
This is a minimal config class used to test documentation generation
|
|
11
|
+
without depending on the full Metaxy config structure.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
model_config = SettingsConfigDict(
|
|
15
|
+
env_prefix="SAMPLE_PLUGIN_",
|
|
16
|
+
env_nested_delimiter="__",
|
|
17
|
+
frozen=True,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
enable: bool = PydanticField(
|
|
21
|
+
default=False,
|
|
22
|
+
description="Whether to enable the test plugin",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
name: str = PydanticField(
|
|
26
|
+
default="test",
|
|
27
|
+
description="Name of the test plugin",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
port: int = PydanticField(
|
|
31
|
+
default=8080,
|
|
32
|
+
description="Port number for the test service",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
debug: bool = PydanticField(
|
|
36
|
+
default=False,
|
|
37
|
+
description="Enable debug mode",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
optional_setting: str | None = PydanticField(
|
|
41
|
+
default=None,
|
|
42
|
+
description="Optional configuration setting",
|
|
43
|
+
)
|