metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
metaxy/entrypoints.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Entrypoint discovery and loading for Metaxy features.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to automatically discover and load Feature
|
|
4
|
+
classes from modules, supporting both:
|
|
5
|
+
- Config-based entrypoints (list of module paths)
|
|
6
|
+
- Environment-based entrypoints (environment variables starting with METAXY_ENTRYPOINT)
|
|
7
|
+
- Package-based entrypoints (via importlib.metadata)
|
|
8
|
+
|
|
9
|
+
Features are automatically registered to the active FeatureGraph when their
|
|
10
|
+
containing modules are imported (via the Feature metaclass).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import importlib
|
|
14
|
+
import os
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from metaxy.models.feature import FeatureGraph
|
|
19
|
+
|
|
20
|
+
from importlib.metadata import entry_points # type: ignore[import-not-found]
|
|
21
|
+
|
|
22
|
+
# Default entry point group name for package-based discovery
|
|
23
|
+
DEFAULT_ENTRY_POINT_GROUP = "metaxy.project"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EntrypointLoadError(Exception):
|
|
27
|
+
"""Raised when an entrypoint fails to load."""
|
|
28
|
+
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_module_entrypoint(
|
|
33
|
+
module_path: str,
|
|
34
|
+
*,
|
|
35
|
+
graph: "FeatureGraph | None" = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""Load a single module entrypoint.
|
|
38
|
+
|
|
39
|
+
Imports the specified module, which should contain Feature class definitions.
|
|
40
|
+
Features will be automatically registered to the active graph via the
|
|
41
|
+
Feature metaclass.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
module_path: Fully qualified module path (e.g., "myapp.features.video")
|
|
45
|
+
graph: Target graph. If None, uses FeatureGraph.get_active()
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
EntrypointLoadError: If module import fails
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
```py
|
|
52
|
+
from metaxy.entrypoints import load_module_entrypoint
|
|
53
|
+
load_module_entrypoint("myapp.features.core")
|
|
54
|
+
# Features from myapp.features.core are now registered
|
|
55
|
+
```
|
|
56
|
+
"""
|
|
57
|
+
from metaxy.models.feature import FeatureGraph
|
|
58
|
+
|
|
59
|
+
target_graph = graph or FeatureGraph.get_active()
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# Set graph as active during import so Features register to it
|
|
63
|
+
with target_graph.use():
|
|
64
|
+
importlib.import_module(module_path)
|
|
65
|
+
except ImportError as e:
|
|
66
|
+
raise EntrypointLoadError(
|
|
67
|
+
f"Failed to import entrypoint module '{module_path}': {e}"
|
|
68
|
+
) from e
|
|
69
|
+
except Exception as e:
|
|
70
|
+
raise EntrypointLoadError(
|
|
71
|
+
f"Error loading entrypoint module '{module_path}': {e}"
|
|
72
|
+
) from e
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def load_entrypoints(
|
|
76
|
+
entrypoints: list[str],
|
|
77
|
+
*,
|
|
78
|
+
graph: "FeatureGraph | None" = None,
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Load multiple module entrypoints from a list.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
entrypoints: List of module paths to import
|
|
84
|
+
graph: Target graph. If None, uses FeatureGraph.get_active()
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
EntrypointLoadError: If any module import fails
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
```py
|
|
91
|
+
from metaxy.entrypoints import load_config_entrypoints
|
|
92
|
+
load_config_entrypoints([
|
|
93
|
+
"myapp.features.video",
|
|
94
|
+
"myapp.features.audio",
|
|
95
|
+
"myapp.features.text"
|
|
96
|
+
])
|
|
97
|
+
```
|
|
98
|
+
"""
|
|
99
|
+
from metaxy.models.feature import FeatureGraph
|
|
100
|
+
|
|
101
|
+
target_graph = graph or FeatureGraph.get_active()
|
|
102
|
+
|
|
103
|
+
for module_path in entrypoints:
|
|
104
|
+
load_module_entrypoint(module_path, graph=target_graph)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def load_package_entrypoints(
|
|
108
|
+
group: str = DEFAULT_ENTRY_POINT_GROUP,
|
|
109
|
+
*,
|
|
110
|
+
graph: "FeatureGraph | None" = None,
|
|
111
|
+
) -> None:
|
|
112
|
+
"""Load entrypoints from installed packages using importlib.metadata.
|
|
113
|
+
|
|
114
|
+
Discovers and loads all entry points registered in the specified group.
|
|
115
|
+
This is the package-based entrypoint mechanism using standard Python
|
|
116
|
+
packaging infrastructure.
|
|
117
|
+
|
|
118
|
+
Packages declare entrypoints in their pyproject.toml:
|
|
119
|
+
[project.entry-points."metaxy.project"]
|
|
120
|
+
my-project = "mypackage:init"
|
|
121
|
+
# or point directly to a module
|
|
122
|
+
my-project = "mypackage.features"
|
|
123
|
+
|
|
124
|
+
The entry point can reference either:
|
|
125
|
+
- A callable function (module:function syntax) that will be invoked to load features
|
|
126
|
+
- A module (module syntax) that contains Feature definitions (importing registers them)
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
group: Entry point group name (default: "metaxy.project")
|
|
130
|
+
graph: Target graph. If None, uses FeatureGraph.get_active()
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
EntrypointLoadError: If any entrypoint fails to load
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
```py
|
|
137
|
+
from metaxy.entrypoints import load_package_entrypoints
|
|
138
|
+
# Discover and load all installed plugins
|
|
139
|
+
load_package_entrypoints()
|
|
140
|
+
```
|
|
141
|
+
"""
|
|
142
|
+
from metaxy.models.feature import FeatureGraph
|
|
143
|
+
|
|
144
|
+
target_graph = graph or FeatureGraph.get_active()
|
|
145
|
+
|
|
146
|
+
# Discover entry points
|
|
147
|
+
# Note: Python 3.10+ returns SelectableGroups, 3.9 returns dict
|
|
148
|
+
discovered = entry_points()
|
|
149
|
+
|
|
150
|
+
# Handle different return types across Python versions
|
|
151
|
+
if hasattr(discovered, "select"):
|
|
152
|
+
# Python 3.10+: SelectableGroups with select() method
|
|
153
|
+
eps = discovered.select(group=group)
|
|
154
|
+
else:
|
|
155
|
+
# Python 3.9: dict-like interface
|
|
156
|
+
eps = discovered.get(group, [])
|
|
157
|
+
|
|
158
|
+
eps_list = list(eps)
|
|
159
|
+
|
|
160
|
+
if not eps_list:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
for ep in eps_list:
|
|
164
|
+
try:
|
|
165
|
+
# Load the entry point (imports the module and returns the object)
|
|
166
|
+
with target_graph.use():
|
|
167
|
+
loaded = ep.load()
|
|
168
|
+
# If it's callable (module:function syntax), call it
|
|
169
|
+
# If it's a module (just module syntax), importing already registered features
|
|
170
|
+
if callable(loaded):
|
|
171
|
+
loaded()
|
|
172
|
+
except Exception as e:
|
|
173
|
+
raise EntrypointLoadError(
|
|
174
|
+
f"Failed to load package entrypoint '{ep.name}' ({ep.value}): {e}"
|
|
175
|
+
) from e
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def load_env_entrypoints() -> None:
|
|
179
|
+
"""Load entrypoints from environment variables.
|
|
180
|
+
|
|
181
|
+
Discovers and loads all entry points from environment variables matching
|
|
182
|
+
the pattern METAXY_ENTRYPOINT*. Each variable should contain a
|
|
183
|
+
comma-separated list of module paths.
|
|
184
|
+
|
|
185
|
+
Environment variables:
|
|
186
|
+
METAXY_ENTRYPOINT="myapp.features.core,myapp.features.extra"
|
|
187
|
+
METAXY_ENTRYPOINT_PLUGINS="plugin1.features,plugin2.features"
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
graph: Target graph. If None, uses FeatureGraph.get_active()
|
|
191
|
+
|
|
192
|
+
Raises:
|
|
193
|
+
EntrypointLoadError: If any entrypoint fails to load
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
```py
|
|
197
|
+
import os
|
|
198
|
+
os.environ["METAXY_ENTRYPOINT"] = "myapp.features.core"
|
|
199
|
+
from metaxy.entrypoints import load_env_entrypoints
|
|
200
|
+
load_env_entrypoints()
|
|
201
|
+
```
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
# Find all environment variables matching METAXY_ENTRYPOINT*
|
|
205
|
+
env_vars = {
|
|
206
|
+
key: value
|
|
207
|
+
for key, value in os.environ.items()
|
|
208
|
+
if key.startswith("METAXY_ENTRYPOINT")
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if not env_vars:
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
# Collect all module paths from all matching env vars
|
|
215
|
+
all_module_paths = []
|
|
216
|
+
for env_var, value in sorted(env_vars.items()):
|
|
217
|
+
# Split by comma and strip whitespace
|
|
218
|
+
module_paths = [path.strip() for path in value.split(",") if path.strip()]
|
|
219
|
+
all_module_paths.extend(module_paths)
|
|
220
|
+
|
|
221
|
+
if not all_module_paths:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
# Load each module path
|
|
225
|
+
for module_path in all_module_paths:
|
|
226
|
+
load_module_entrypoint(module_path)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def load_features(
|
|
230
|
+
entrypoints: list[str] | None = None,
|
|
231
|
+
package_entrypoint_group: str = DEFAULT_ENTRY_POINT_GROUP,
|
|
232
|
+
*,
|
|
233
|
+
load_config: bool = True,
|
|
234
|
+
load_packages: bool = True,
|
|
235
|
+
load_env: bool = True,
|
|
236
|
+
) -> "FeatureGraph":
|
|
237
|
+
"""Discover and load all entrypoints from config, packages, and environment.
|
|
238
|
+
|
|
239
|
+
This is the main entry point for loading features. It combines config-based,
|
|
240
|
+
package-based, and environment-based entrypoint discovery.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
entrypoints: List of module paths (optional)
|
|
244
|
+
package_entrypoint_group: Entry point group for package discovery
|
|
245
|
+
load_config: Whether to load config-based entrypoints (default: True)
|
|
246
|
+
load_packages: Whether to load package-based entrypoints (default: True)
|
|
247
|
+
load_env: Whether to load environment-based entrypoints (default: True)
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
The graph that was populated (useful for testing/inspection)
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
EntrypointLoadError: If any entrypoint fails to load
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
```py
|
|
257
|
+
from metaxy.entrypoints import load_features
|
|
258
|
+
|
|
259
|
+
# Load from all sources
|
|
260
|
+
graph = load_features(
|
|
261
|
+
entrypoints=["myapp.features.core"],
|
|
262
|
+
load_packages=True,
|
|
263
|
+
load_env=True
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Load only from config
|
|
267
|
+
graph = load_features(
|
|
268
|
+
entrypoints=["myapp.features.core"],
|
|
269
|
+
load_packages=False,
|
|
270
|
+
load_env=False
|
|
271
|
+
)
|
|
272
|
+
```
|
|
273
|
+
"""
|
|
274
|
+
from metaxy.config import MetaxyConfig
|
|
275
|
+
from metaxy.models.feature import FeatureGraph
|
|
276
|
+
|
|
277
|
+
target_graph = FeatureGraph.get_active()
|
|
278
|
+
|
|
279
|
+
# Load explicit entrypoints
|
|
280
|
+
if entrypoints:
|
|
281
|
+
load_entrypoints(entrypoints)
|
|
282
|
+
|
|
283
|
+
# Load config-based entrypoints
|
|
284
|
+
if load_config:
|
|
285
|
+
config = MetaxyConfig.load(search_parents=True)
|
|
286
|
+
load_entrypoints(config.entrypoints)
|
|
287
|
+
|
|
288
|
+
# Load package-based entrypoints
|
|
289
|
+
if load_packages:
|
|
290
|
+
load_package_entrypoints(package_entrypoint_group)
|
|
291
|
+
|
|
292
|
+
# Load environment-based entrypoints
|
|
293
|
+
if load_env:
|
|
294
|
+
load_env_entrypoints()
|
|
295
|
+
|
|
296
|
+
return target_graph
|
metaxy/ext/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Integrations with third-party software."""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from metaxy.ext.dagster.constants import (
|
|
2
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY,
|
|
3
|
+
DAGSTER_METAXY_INFO_METADATA_KEY,
|
|
4
|
+
DAGSTER_METAXY_KIND,
|
|
5
|
+
DAGSTER_METAXY_PARTITION_KEY,
|
|
6
|
+
DAGSTER_METAXY_PROJECT_TAG_KEY,
|
|
7
|
+
METAXY_DAGSTER_METADATA_KEY,
|
|
8
|
+
)
|
|
9
|
+
from metaxy.ext.dagster.dagster_type import feature_to_dagster_type
|
|
10
|
+
from metaxy.ext.dagster.io_manager import MetaxyIOManager, MetaxyOutput
|
|
11
|
+
from metaxy.ext.dagster.metaxify import metaxify
|
|
12
|
+
from metaxy.ext.dagster.observable import observable_metaxy_asset
|
|
13
|
+
from metaxy.ext.dagster.resources import MetaxyStoreFromConfigResource
|
|
14
|
+
from metaxy.ext.dagster.selection import select_metaxy_assets
|
|
15
|
+
from metaxy.ext.dagster.table_metadata import (
|
|
16
|
+
build_column_lineage,
|
|
17
|
+
build_column_schema,
|
|
18
|
+
build_table_preview_metadata,
|
|
19
|
+
)
|
|
20
|
+
from metaxy.ext.dagster.utils import (
|
|
21
|
+
FeatureStats,
|
|
22
|
+
build_partition_filter,
|
|
23
|
+
compute_feature_stats,
|
|
24
|
+
compute_stats_from_lazy_frame,
|
|
25
|
+
generate_materialize_results,
|
|
26
|
+
generate_observe_results,
|
|
27
|
+
get_partition_filter,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"metaxify",
|
|
32
|
+
"feature_to_dagster_type",
|
|
33
|
+
"build_column_schema",
|
|
34
|
+
"build_column_lineage",
|
|
35
|
+
"build_table_preview_metadata",
|
|
36
|
+
"observable_metaxy_asset",
|
|
37
|
+
"select_metaxy_assets",
|
|
38
|
+
"generate_materialize_results",
|
|
39
|
+
"generate_observe_results",
|
|
40
|
+
"compute_feature_stats",
|
|
41
|
+
"compute_stats_from_lazy_frame",
|
|
42
|
+
"get_partition_filter",
|
|
43
|
+
"build_partition_filter",
|
|
44
|
+
"FeatureStats",
|
|
45
|
+
"MetaxyStoreFromConfigResource",
|
|
46
|
+
"MetaxyIOManager",
|
|
47
|
+
"MetaxyOutput",
|
|
48
|
+
"METAXY_DAGSTER_METADATA_KEY",
|
|
49
|
+
"DAGSTER_METAXY_FEATURE_METADATA_KEY",
|
|
50
|
+
"DAGSTER_METAXY_INFO_METADATA_KEY",
|
|
51
|
+
"DAGSTER_METAXY_KIND",
|
|
52
|
+
"DAGSTER_METAXY_PARTITION_KEY",
|
|
53
|
+
"DAGSTER_METAXY_PROJECT_TAG_KEY",
|
|
54
|
+
]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY = "metaxy/feature"
|
|
2
|
+
DAGSTER_METAXY_KIND = "metaxy"
|
|
3
|
+
DAGSTER_METAXY_PARTITION_KEY = "partition_by"
|
|
4
|
+
DAGSTER_METAXY_PROJECT_TAG_KEY = "metaxy/project"
|
|
5
|
+
|
|
6
|
+
DAGSTER_METAXY_INFO_METADATA_KEY = "metaxy/info"
|
|
7
|
+
DAGSTER_COLUMN_SCHEMA_METADATA_KEY = "dagster/column_schema"
|
|
8
|
+
DAGSTER_COLUMN_LINEAGE_METADATA_KEY = "dagster/column_lineage"
|
|
9
|
+
|
|
10
|
+
METAXY_DAGSTER_METADATA_KEY = "dagster/attributes"
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""DagsterType builder for Metaxy features.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for creating Dagster types that validate
|
|
4
|
+
Metaxy feature outputs with proper metadata injection (table schema, etc.).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections.abc import Callable, Mapping
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import dagster as dg
|
|
11
|
+
import narwhals as nw
|
|
12
|
+
|
|
13
|
+
import metaxy as mx
|
|
14
|
+
from metaxy.ext.dagster.constants import (
|
|
15
|
+
DAGSTER_COLUMN_LINEAGE_METADATA_KEY,
|
|
16
|
+
DAGSTER_COLUMN_SCHEMA_METADATA_KEY,
|
|
17
|
+
DAGSTER_METAXY_INFO_METADATA_KEY,
|
|
18
|
+
)
|
|
19
|
+
from metaxy.ext.dagster.table_metadata import build_column_lineage, build_column_schema
|
|
20
|
+
from metaxy.ext.dagster.utils import build_feature_info_metadata
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _create_type_check_fn(
|
|
24
|
+
feature_key: mx.FeatureKey,
|
|
25
|
+
) -> Callable[[dg.TypeCheckContext, Any], dg.TypeCheck]:
|
|
26
|
+
"""Create a type check function for a Metaxy feature.
|
|
27
|
+
|
|
28
|
+
The type check function validates that the output is either:
|
|
29
|
+
- None (allowed for MetaxyOutput)
|
|
30
|
+
- A narwhals-compatible dataframe (IntoFrame)
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
feature_key: The Metaxy feature key for error messages.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A callable type check function for DagsterType.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def type_check_fn(context: dg.TypeCheckContext, value: Any) -> dg.TypeCheck:
|
|
40
|
+
# None is a valid MetaxyOutput (indicates no data to write)
|
|
41
|
+
if value is None:
|
|
42
|
+
return dg.TypeCheck(success=True)
|
|
43
|
+
|
|
44
|
+
# Try to convert to narwhals frame - this validates the type
|
|
45
|
+
try:
|
|
46
|
+
nw.from_native(value)
|
|
47
|
+
return dg.TypeCheck(success=True)
|
|
48
|
+
except TypeError as e:
|
|
49
|
+
return dg.TypeCheck(
|
|
50
|
+
success=False,
|
|
51
|
+
description=(
|
|
52
|
+
f"Expected a narwhals-compatible dataframe or None for "
|
|
53
|
+
f"Metaxy feature '{feature_key.to_string()}', "
|
|
54
|
+
f"but got {type(value).__name__}:\n{e}"
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return type_check_fn
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def feature_to_dagster_type(
|
|
62
|
+
feature: mx.CoercibleToFeatureKey,
|
|
63
|
+
*,
|
|
64
|
+
name: str | None = None,
|
|
65
|
+
description: str | None = None,
|
|
66
|
+
inject_column_schema: bool = True,
|
|
67
|
+
inject_column_lineage: bool = True,
|
|
68
|
+
metadata: Mapping[str, Any] | None = None,
|
|
69
|
+
) -> dg.DagsterType:
|
|
70
|
+
"""Build a Dagster type from a Metaxy feature.
|
|
71
|
+
|
|
72
|
+
Creates a `dagster.DagsterType` that validates outputs are
|
|
73
|
+
[`MetaxyOutput`][metaxy.ext.dagster.MetaxyOutput] (i.e., narwhals-compatible
|
|
74
|
+
dataframes or `None`) and includes metadata derived from the feature's Pydantic
|
|
75
|
+
model fields.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
feature: The Metaxy feature to create a type for. Can be a feature class,
|
|
79
|
+
feature key, or string that can be coerced to a feature key.
|
|
80
|
+
name: Optional custom name for the DagsterType. Defaults to the feature's
|
|
81
|
+
table name (e.g., "project__feature_name").
|
|
82
|
+
description: Optional custom description. Defaults to the feature class
|
|
83
|
+
docstring or a generated description.
|
|
84
|
+
inject_column_schema: Whether to inject the column schema as metadata.
|
|
85
|
+
The schema is derived from Pydantic model fields.
|
|
86
|
+
inject_column_lineage: Whether to inject column lineage as metadata.
|
|
87
|
+
The lineage is derived from feature dependencies.
|
|
88
|
+
metadata: Optional custom metadata to inject into the DagsterType.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
A DagsterType configured for the Metaxy feature with appropriate
|
|
92
|
+
type checking and metadata.
|
|
93
|
+
|
|
94
|
+
!!! tip
|
|
95
|
+
This is automatically injected by [`@metaxify`][metaxy.ext.dagster.metaxify.metaxify]
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
```python
|
|
99
|
+
import dagster as dg
|
|
100
|
+
import polars as pl
|
|
101
|
+
import metaxy.ext.dagster as mxd
|
|
102
|
+
from myproject.features import MyFeature # Your Metaxy feature class
|
|
103
|
+
|
|
104
|
+
@mxd.metaxify(feature=MyFeature)
|
|
105
|
+
@dg.asset(dagster_type=mxd.feature_to_dagster_type(MyFeature))
|
|
106
|
+
def my_asset():
|
|
107
|
+
return pl.DataFrame({"id": [1, 2, 3], "value": ["a", "b", "c"]})
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
!!! info "See also"
|
|
111
|
+
- [`metaxify`][metaxy.ext.dagster.metaxify.metaxify]: Decorator for injecting
|
|
112
|
+
Metaxy metadata into Dagster assets.
|
|
113
|
+
- [`MetaxyOutput`][metaxy.ext.dagster.MetaxyOutput]: The type alias for valid
|
|
114
|
+
Metaxy outputs.
|
|
115
|
+
"""
|
|
116
|
+
from metaxy.ext.dagster.io_manager import MetaxyOutput
|
|
117
|
+
|
|
118
|
+
feature_key = mx.coerce_to_feature_key(feature)
|
|
119
|
+
feature_cls = mx.get_feature_by_key(feature_key)
|
|
120
|
+
|
|
121
|
+
# Determine name
|
|
122
|
+
type_name = name or feature_key.table_name
|
|
123
|
+
|
|
124
|
+
# Determine description
|
|
125
|
+
if description is None:
|
|
126
|
+
if feature_cls.__doc__:
|
|
127
|
+
import inspect
|
|
128
|
+
|
|
129
|
+
description = inspect.cleandoc(feature_cls.__doc__)
|
|
130
|
+
else:
|
|
131
|
+
description = f"Metaxy feature '{feature_key.to_string()}'."
|
|
132
|
+
|
|
133
|
+
# Build metadata - start with custom metadata if provided
|
|
134
|
+
final_metadata: dict[str, Any] = dict(metadata) if metadata else {}
|
|
135
|
+
final_metadata[DAGSTER_METAXY_INFO_METADATA_KEY] = build_feature_info_metadata(
|
|
136
|
+
feature_key
|
|
137
|
+
)
|
|
138
|
+
if inject_column_schema:
|
|
139
|
+
column_schema = build_column_schema(feature_cls)
|
|
140
|
+
if column_schema is not None:
|
|
141
|
+
final_metadata[DAGSTER_COLUMN_SCHEMA_METADATA_KEY] = column_schema
|
|
142
|
+
|
|
143
|
+
if inject_column_lineage:
|
|
144
|
+
column_lineage = build_column_lineage(feature_cls)
|
|
145
|
+
if column_lineage is not None:
|
|
146
|
+
final_metadata[DAGSTER_COLUMN_LINEAGE_METADATA_KEY] = column_lineage
|
|
147
|
+
|
|
148
|
+
dagster_type = dg.DagsterType(
|
|
149
|
+
type_check_fn=_create_type_check_fn(feature_key),
|
|
150
|
+
name=type_name,
|
|
151
|
+
description=description,
|
|
152
|
+
typing_type=MetaxyOutput,
|
|
153
|
+
metadata=final_metadata,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return dagster_type
|