metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Any, TypeVar, overload
|
|
3
|
+
|
|
4
|
+
import dagster as dg
|
|
5
|
+
from dagster._core.definitions.events import (
|
|
6
|
+
CoercibleToAssetKey,
|
|
7
|
+
CoercibleToAssetKeyPrefix,
|
|
8
|
+
)
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
import metaxy as mx
|
|
12
|
+
from metaxy.ext.dagster.constants import (
|
|
13
|
+
DAGSTER_COLUMN_LINEAGE_METADATA_KEY,
|
|
14
|
+
DAGSTER_COLUMN_SCHEMA_METADATA_KEY,
|
|
15
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY,
|
|
16
|
+
DAGSTER_METAXY_INFO_METADATA_KEY,
|
|
17
|
+
DAGSTER_METAXY_KIND,
|
|
18
|
+
DAGSTER_METAXY_PROJECT_TAG_KEY,
|
|
19
|
+
METAXY_DAGSTER_METADATA_KEY,
|
|
20
|
+
)
|
|
21
|
+
from metaxy.ext.dagster.table_metadata import (
|
|
22
|
+
_get_type_string,
|
|
23
|
+
build_column_lineage,
|
|
24
|
+
)
|
|
25
|
+
from metaxy.ext.dagster.utils import (
|
|
26
|
+
build_feature_info_metadata,
|
|
27
|
+
get_asset_key_for_metaxy_feature_spec,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
_T = TypeVar("_T", dg.AssetsDefinition, dg.AssetSpec)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class metaxify:
|
|
34
|
+
"""Inject Metaxy metadata into a Dagster [`AssetsDefinition`][dg.AssetsDefinition] or [`AssetSpec`][dg.AssetSpec].
|
|
35
|
+
|
|
36
|
+
Affects assets with `metaxy/feature` metadata set.
|
|
37
|
+
|
|
38
|
+
Learn more about `@metaxify` and see example screenshots [here](metaxify.md).
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
key: Explicit asset key that overrides all other key resolution logic. Cannot be used
|
|
42
|
+
with `key_prefix` or with multi-asset definitions that produce multiple outputs.
|
|
43
|
+
key_prefix: Prefix to prepend to the resolved asset key. Also applied to upstream
|
|
44
|
+
dependency keys. Cannot be used with `key`.
|
|
45
|
+
inject_metaxy_kind: Whether to inject `"metaxy"` kind into asset kinds.
|
|
46
|
+
Currently, kinds count is limited by 3, and `metaxify` will skip kind injection
|
|
47
|
+
if there are already 3 kinds on the asset.
|
|
48
|
+
inject_code_version: Whether to inject the Metaxy feature code version into the asset's
|
|
49
|
+
code version. The version is appended in the format `metaxy:<version>`.
|
|
50
|
+
set_description: Whether to set the asset description from the feature class docstring
|
|
51
|
+
if the asset doesn't already have a description.
|
|
52
|
+
inject_column_schema: Whether to inject Pydantic field definitions as Dagster column schema.
|
|
53
|
+
Field types are converted to strings, and field descriptions are used as column descriptions.
|
|
54
|
+
inject_column_lineage: Whether to inject column-level lineage into the asset metadata under
|
|
55
|
+
`dagster/column_lineage`. Uses Pydantic model fields to track
|
|
56
|
+
column provenance via `FeatureDep.rename`, `FeatureSpec.lineage`, and direct pass-through.
|
|
57
|
+
|
|
58
|
+
!!! tip
|
|
59
|
+
Multiple Dagster assets can contribute to the same Metaxy feature by setting the same
|
|
60
|
+
`"metaxy/feature"` metadata. This is a perfectly valid setup since Metaxy writes are append-only.
|
|
61
|
+
|
|
62
|
+
!!! example
|
|
63
|
+
```py {hl_lines="8"}
|
|
64
|
+
import dagster as dg
|
|
65
|
+
import metaxy as mx
|
|
66
|
+
import metaxy.ext.dagster as mxd
|
|
67
|
+
|
|
68
|
+
@mxd.metaxify()
|
|
69
|
+
@dg.asset(
|
|
70
|
+
metadata={
|
|
71
|
+
"metaxy/feature": "my/feature/key"
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
def my_asset(store: mx.MetadataStore):
|
|
75
|
+
with store:
|
|
76
|
+
increment = store.resolve_update("my/feature/key")
|
|
77
|
+
...
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
??? example "With `@multi_asset`"
|
|
81
|
+
Multiple Metaxy features can be produced by the same `@multi_asset`. (1)
|
|
82
|
+
{ .annotate }
|
|
83
|
+
|
|
84
|
+
1. Typically, they are produced independently of each other
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
@mxd.metaxify()
|
|
88
|
+
@dg.multi_asset(
|
|
89
|
+
specs=[
|
|
90
|
+
dg.AssetSpec("output_a", metadata={"metaxy/feature": "feature/a"}),
|
|
91
|
+
dg.AssetSpec("output_b", metadata={"metaxy/feature": "feature/b"}),
|
|
92
|
+
]
|
|
93
|
+
)
|
|
94
|
+
def my_multi_asset():
|
|
95
|
+
...
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
??? example "With `dagster.AssetSpec`"
|
|
99
|
+
```py
|
|
100
|
+
asset_spec = dg.AssetSpec(
|
|
101
|
+
key="my_asset",
|
|
102
|
+
metadata={"metaxy/feature": "my/feature/key"},
|
|
103
|
+
)
|
|
104
|
+
asset_spec = mxd.metaxify()(asset_spec)
|
|
105
|
+
```
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
key: dg.AssetKey | None
|
|
109
|
+
key_prefix: dg.AssetKey | None
|
|
110
|
+
inject_metaxy_kind: bool
|
|
111
|
+
inject_code_version: bool
|
|
112
|
+
set_description: bool
|
|
113
|
+
inject_column_schema: bool
|
|
114
|
+
inject_column_lineage: bool
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
_asset: "_T | None" = None,
|
|
119
|
+
*,
|
|
120
|
+
key: CoercibleToAssetKey | None = None,
|
|
121
|
+
key_prefix: CoercibleToAssetKeyPrefix | None = None,
|
|
122
|
+
inject_metaxy_kind: bool = True,
|
|
123
|
+
inject_code_version: bool = True,
|
|
124
|
+
set_description: bool = True,
|
|
125
|
+
inject_column_schema: bool = True,
|
|
126
|
+
inject_column_lineage: bool = True,
|
|
127
|
+
) -> None:
|
|
128
|
+
# Actual initialization happens in __new__, but we set defaults here for type checkers
|
|
129
|
+
self.key = dg.AssetKey.from_coercible(key) if key is not None else None
|
|
130
|
+
self.key_prefix = (
|
|
131
|
+
dg.AssetKey.from_coercible(key_prefix) if key_prefix is not None else None
|
|
132
|
+
)
|
|
133
|
+
self.inject_metaxy_kind = inject_metaxy_kind
|
|
134
|
+
self.inject_code_version = inject_code_version
|
|
135
|
+
self.set_description = set_description
|
|
136
|
+
self.inject_column_schema = inject_column_schema
|
|
137
|
+
self.inject_column_lineage = inject_column_lineage
|
|
138
|
+
|
|
139
|
+
@overload
|
|
140
|
+
def __new__(cls, _asset: _T) -> _T: ...
|
|
141
|
+
|
|
142
|
+
@overload
|
|
143
|
+
def __new__(
|
|
144
|
+
cls,
|
|
145
|
+
_asset: None = None,
|
|
146
|
+
*,
|
|
147
|
+
key: CoercibleToAssetKey | None = None,
|
|
148
|
+
key_prefix: CoercibleToAssetKeyPrefix | None = None,
|
|
149
|
+
inject_metaxy_kind: bool = True,
|
|
150
|
+
inject_code_version: bool = True,
|
|
151
|
+
set_description: bool = True,
|
|
152
|
+
inject_column_schema: bool = True,
|
|
153
|
+
inject_column_lineage: bool = True,
|
|
154
|
+
) -> Self: ...
|
|
155
|
+
|
|
156
|
+
def __new__(
|
|
157
|
+
cls,
|
|
158
|
+
_asset: _T | None = None,
|
|
159
|
+
*,
|
|
160
|
+
key: CoercibleToAssetKey | None = None,
|
|
161
|
+
key_prefix: CoercibleToAssetKeyPrefix | None = None,
|
|
162
|
+
inject_metaxy_kind: bool = True,
|
|
163
|
+
inject_code_version: bool = True,
|
|
164
|
+
set_description: bool = True,
|
|
165
|
+
inject_column_schema: bool = True,
|
|
166
|
+
inject_column_lineage: bool = True,
|
|
167
|
+
) -> "Self | _T":
|
|
168
|
+
if key is not None and key_prefix is not None:
|
|
169
|
+
raise ValueError("Cannot specify both `key` and `key_prefix`")
|
|
170
|
+
|
|
171
|
+
coerced_key = dg.AssetKey.from_coercible(key) if key is not None else None
|
|
172
|
+
coerced_key_prefix = (
|
|
173
|
+
dg.AssetKey.from_coercible(key_prefix) if key_prefix is not None else None
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if _asset is not None:
|
|
177
|
+
# Called as @metaxify without parentheses
|
|
178
|
+
return cls._transform(
|
|
179
|
+
_asset,
|
|
180
|
+
key=coerced_key,
|
|
181
|
+
key_prefix=coerced_key_prefix,
|
|
182
|
+
inject_metaxy_kind=inject_metaxy_kind,
|
|
183
|
+
inject_code_version=inject_code_version,
|
|
184
|
+
set_description=set_description,
|
|
185
|
+
inject_column_schema=inject_column_schema,
|
|
186
|
+
inject_column_lineage=inject_column_lineage,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Called as @metaxify() with parentheses - return instance for __call__
|
|
190
|
+
instance = object.__new__(cls)
|
|
191
|
+
instance.key = coerced_key
|
|
192
|
+
instance.key_prefix = coerced_key_prefix
|
|
193
|
+
instance.inject_metaxy_kind = inject_metaxy_kind
|
|
194
|
+
instance.inject_code_version = inject_code_version
|
|
195
|
+
instance.set_description = set_description
|
|
196
|
+
instance.inject_column_schema = inject_column_schema
|
|
197
|
+
instance.inject_column_lineage = inject_column_lineage
|
|
198
|
+
return instance
|
|
199
|
+
|
|
200
|
+
def __call__(self, asset: _T) -> _T:
|
|
201
|
+
return self._transform(
|
|
202
|
+
asset,
|
|
203
|
+
key=self.key,
|
|
204
|
+
key_prefix=self.key_prefix,
|
|
205
|
+
inject_metaxy_kind=self.inject_metaxy_kind,
|
|
206
|
+
inject_code_version=self.inject_code_version,
|
|
207
|
+
set_description=self.set_description,
|
|
208
|
+
inject_column_schema=self.inject_column_schema,
|
|
209
|
+
inject_column_lineage=self.inject_column_lineage,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def _transform(
|
|
214
|
+
asset: _T,
|
|
215
|
+
*,
|
|
216
|
+
key: dg.AssetKey | None,
|
|
217
|
+
key_prefix: dg.AssetKey | None,
|
|
218
|
+
inject_metaxy_kind: bool,
|
|
219
|
+
inject_code_version: bool,
|
|
220
|
+
set_description: bool,
|
|
221
|
+
inject_column_schema: bool,
|
|
222
|
+
inject_column_lineage: bool,
|
|
223
|
+
) -> _T:
|
|
224
|
+
"""Transform an AssetsDefinition or AssetSpec with Metaxy metadata."""
|
|
225
|
+
if isinstance(asset, dg.AssetSpec):
|
|
226
|
+
return _metaxify_spec(
|
|
227
|
+
asset,
|
|
228
|
+
key=key,
|
|
229
|
+
key_prefix=key_prefix,
|
|
230
|
+
inject_metaxy_kind=inject_metaxy_kind,
|
|
231
|
+
inject_code_version=inject_code_version,
|
|
232
|
+
set_description=set_description,
|
|
233
|
+
inject_column_schema=inject_column_schema,
|
|
234
|
+
inject_column_lineage=inject_column_lineage,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Handle AssetsDefinition
|
|
238
|
+
# Validate that key argument is not used with multi-asset
|
|
239
|
+
if key is not None and len(asset.keys) > 1:
|
|
240
|
+
raise ValueError(
|
|
241
|
+
f"Cannot use `key` argument with multi-asset `{asset.node_def.name}` "
|
|
242
|
+
f"that produces {len(asset.keys)} outputs. "
|
|
243
|
+
f"Use `key_prefix` instead to apply a common prefix to all outputs."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
keys_to_replace: dict[dg.AssetKey, dg.AssetKey] = {}
|
|
247
|
+
transformed_specs: list[dg.AssetSpec] = []
|
|
248
|
+
|
|
249
|
+
for orig_key, asset_spec in asset.specs_by_key.items():
|
|
250
|
+
new_spec = _metaxify_spec(
|
|
251
|
+
asset_spec,
|
|
252
|
+
key=key,
|
|
253
|
+
key_prefix=key_prefix,
|
|
254
|
+
inject_metaxy_kind=inject_metaxy_kind,
|
|
255
|
+
inject_code_version=inject_code_version,
|
|
256
|
+
set_description=set_description,
|
|
257
|
+
inject_column_schema=inject_column_schema,
|
|
258
|
+
inject_column_lineage=inject_column_lineage,
|
|
259
|
+
)
|
|
260
|
+
if new_spec.key != orig_key:
|
|
261
|
+
keys_to_replace[orig_key] = new_spec.key
|
|
262
|
+
transformed_specs.append(new_spec)
|
|
263
|
+
|
|
264
|
+
return _replace_specs_on_assets_definition(
|
|
265
|
+
asset, transformed_specs, keys_to_replace
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _replace_specs_on_assets_definition(
|
|
270
|
+
asset: dg.AssetsDefinition,
|
|
271
|
+
new_specs: list[dg.AssetSpec],
|
|
272
|
+
keys_to_replace: dict[dg.AssetKey, dg.AssetKey],
|
|
273
|
+
) -> dg.AssetsDefinition:
|
|
274
|
+
"""Replace specs on an AssetsDefinition without triggering Dagster's InputDefinition bug.
|
|
275
|
+
|
|
276
|
+
Dagster's `map_asset_specs` and `replace_specs_on_asset` have a bug where they fail
|
|
277
|
+
on assets with input definitions (from `ins=` parameter with `dg.AssetIn` objects).
|
|
278
|
+
The bug occurs because `OpDefinition.with_replaced_properties` creates an `ins` dict
|
|
279
|
+
mixing `InputDefinition` objects with `In` objects, and then `OpDefinition.__init__`
|
|
280
|
+
tries to call `to_definition()` on `InputDefinition` objects which don't have that method.
|
|
281
|
+
|
|
282
|
+
This function works around the bug by using `dagster_internal_init` directly,
|
|
283
|
+
which only updates the specs without modifying the underlying node_def.
|
|
284
|
+
This means new deps added to specs won't be reflected as actual inputs to the op,
|
|
285
|
+
but they will be tracked correctly by Dagster's asset graph for dependency purposes.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
asset: The original AssetsDefinition to transform.
|
|
289
|
+
new_specs: The transformed specs to use.
|
|
290
|
+
keys_to_replace: A mapping of old keys to new keys for assets whose keys changed.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
A new AssetsDefinition with the transformed specs.
|
|
294
|
+
"""
|
|
295
|
+
# Get the current attributes from the asset
|
|
296
|
+
attrs = asset.get_attributes_dict()
|
|
297
|
+
|
|
298
|
+
# Update the specs
|
|
299
|
+
attrs["specs"] = new_specs
|
|
300
|
+
|
|
301
|
+
# If there are key replacements, also update keys_by_output_name and selected_asset_keys
|
|
302
|
+
if keys_to_replace:
|
|
303
|
+
attrs["keys_by_output_name"] = {
|
|
304
|
+
output_name: keys_to_replace.get(key, key)
|
|
305
|
+
for output_name, key in attrs["keys_by_output_name"].items()
|
|
306
|
+
}
|
|
307
|
+
attrs["selected_asset_keys"] = {
|
|
308
|
+
keys_to_replace.get(key, key) for key in attrs["selected_asset_keys"]
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
# Create a new AssetsDefinition with the updated attributes
|
|
312
|
+
# This bypasses the buggy code path in Dagster's replace_specs_on_asset
|
|
313
|
+
result = asset.__class__.dagster_internal_init(**attrs)
|
|
314
|
+
|
|
315
|
+
# Use with_attributes to update check specs - Dagster handles this automatically
|
|
316
|
+
# when asset_key_replacements is provided
|
|
317
|
+
if keys_to_replace:
|
|
318
|
+
result = result.with_attributes(asset_key_replacements=keys_to_replace)
|
|
319
|
+
|
|
320
|
+
return result
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _metaxify_spec(
|
|
324
|
+
spec: dg.AssetSpec,
|
|
325
|
+
*,
|
|
326
|
+
key: dg.AssetKey | None,
|
|
327
|
+
key_prefix: dg.AssetKey | None,
|
|
328
|
+
inject_metaxy_kind: bool,
|
|
329
|
+
inject_code_version: bool,
|
|
330
|
+
set_description: bool,
|
|
331
|
+
inject_column_schema: bool,
|
|
332
|
+
inject_column_lineage: bool,
|
|
333
|
+
) -> dg.AssetSpec:
|
|
334
|
+
"""Transform a single AssetSpec with Metaxy metadata.
|
|
335
|
+
|
|
336
|
+
Returns the spec unchanged if `metaxy/feature` metadata is not set,
|
|
337
|
+
unless `key_prefix` is provided (which applies to all specs).
|
|
338
|
+
"""
|
|
339
|
+
metadata_feature_key = spec.metadata.get(DAGSTER_METAXY_FEATURE_METADATA_KEY)
|
|
340
|
+
|
|
341
|
+
# Feature key must come from metadata
|
|
342
|
+
if metadata_feature_key is None:
|
|
343
|
+
# No feature key set - but still apply key_prefix if provided
|
|
344
|
+
if key_prefix is not None:
|
|
345
|
+
new_key = dg.AssetKey([*key_prefix.path, *spec.key.path])
|
|
346
|
+
return spec.replace_attributes(key=new_key)
|
|
347
|
+
return spec
|
|
348
|
+
|
|
349
|
+
feature_key = mx.coerce_to_feature_key(metadata_feature_key)
|
|
350
|
+
feature_cls = mx.get_feature_by_key(feature_key)
|
|
351
|
+
feature_spec = feature_cls.spec()
|
|
352
|
+
|
|
353
|
+
# Determine the final asset key
|
|
354
|
+
# Priority: key > key_prefix + resolved_key > resolved_key
|
|
355
|
+
if key is not None:
|
|
356
|
+
# Explicit key overrides everything
|
|
357
|
+
final_key = key
|
|
358
|
+
else:
|
|
359
|
+
# Resolve key from feature spec
|
|
360
|
+
resolved_key = get_asset_key_for_metaxy_feature_spec(feature_spec)
|
|
361
|
+
if key_prefix is not None:
|
|
362
|
+
# Prepend prefix to resolved key
|
|
363
|
+
final_key = dg.AssetKey([*key_prefix.path, *resolved_key.path])
|
|
364
|
+
else:
|
|
365
|
+
final_key = resolved_key
|
|
366
|
+
|
|
367
|
+
# Build deps from feature dependencies
|
|
368
|
+
deps_to_add: set[dg.AssetDep] = set()
|
|
369
|
+
for dep in feature_spec.deps:
|
|
370
|
+
upstream_feature_spec = mx.get_feature_by_key(dep.feature).spec()
|
|
371
|
+
upstream_key = get_asset_key_for_metaxy_feature_spec(upstream_feature_spec)
|
|
372
|
+
# Apply key_prefix to upstream deps as well
|
|
373
|
+
if key_prefix is not None:
|
|
374
|
+
upstream_key = dg.AssetKey([*key_prefix.path, *upstream_key.path])
|
|
375
|
+
deps_to_add.add(dg.AssetDep(asset=upstream_key))
|
|
376
|
+
|
|
377
|
+
# Build kinds
|
|
378
|
+
kinds_to_add: set[str] = set()
|
|
379
|
+
if inject_metaxy_kind and len(spec.kinds) < 3:
|
|
380
|
+
kinds_to_add.add(DAGSTER_METAXY_KIND)
|
|
381
|
+
|
|
382
|
+
# Extract dagster attributes (excluding asset_key which is handled separately)
|
|
383
|
+
dagster_attrs: dict[str, Any] = {}
|
|
384
|
+
raw_dagster_attrs = feature_spec.metadata.get(METAXY_DAGSTER_METADATA_KEY)
|
|
385
|
+
if raw_dagster_attrs is not None:
|
|
386
|
+
if not isinstance(raw_dagster_attrs, dict):
|
|
387
|
+
raise ValueError(
|
|
388
|
+
f"Invalid metadata format for `{feature_spec.key}` "
|
|
389
|
+
f"Metaxy feature metadata key {METAXY_DAGSTER_METADATA_KEY}: "
|
|
390
|
+
f"expected dict, got {type(raw_dagster_attrs).__name__}"
|
|
391
|
+
)
|
|
392
|
+
dagster_attrs = {k: v for k, v in raw_dagster_attrs.items() if k != "asset_key"}
|
|
393
|
+
|
|
394
|
+
# Build code version: append metaxy version to existing code version if present
|
|
395
|
+
if inject_code_version:
|
|
396
|
+
metaxy_code_version = f"metaxy:{feature_spec.code_version}"
|
|
397
|
+
if spec.code_version:
|
|
398
|
+
final_code_version = f"{spec.code_version},{metaxy_code_version}"
|
|
399
|
+
else:
|
|
400
|
+
final_code_version = metaxy_code_version
|
|
401
|
+
else:
|
|
402
|
+
final_code_version = spec.code_version
|
|
403
|
+
|
|
404
|
+
# Use feature class docstring as description if not set on asset spec
|
|
405
|
+
final_description = spec.description
|
|
406
|
+
if set_description and final_description is None and feature_cls.__doc__:
|
|
407
|
+
final_description = inspect.cleandoc(feature_cls.__doc__)
|
|
408
|
+
|
|
409
|
+
# Build tags for project and feature
|
|
410
|
+
# Note: Dagster tag values only allow alpha-numeric, '_', '-', '.'
|
|
411
|
+
# so we use table_name which uses '__' separator
|
|
412
|
+
tags_to_add: dict[str, str] = {
|
|
413
|
+
DAGSTER_METAXY_PROJECT_TAG_KEY: mx.MetaxyConfig.get().project,
|
|
414
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.table_name,
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
# Build column schema from Pydantic fields (includes inherited system columns)
|
|
418
|
+
# Respects existing user-defined column schema and appends Metaxy columns
|
|
419
|
+
column_schema: dg.TableSchema | None = None
|
|
420
|
+
if inject_column_schema:
|
|
421
|
+
# Start with user-defined columns if present
|
|
422
|
+
existing_schema = spec.metadata.get(DAGSTER_COLUMN_SCHEMA_METADATA_KEY)
|
|
423
|
+
existing_columns: list[dg.TableColumn] = []
|
|
424
|
+
existing_column_names: set[str] = set()
|
|
425
|
+
if existing_schema is not None:
|
|
426
|
+
existing_columns = list(existing_schema.columns)
|
|
427
|
+
existing_column_names = {col.name for col in existing_columns}
|
|
428
|
+
|
|
429
|
+
# Add Metaxy columns that aren't already defined by user
|
|
430
|
+
# (user-defined columns take precedence)
|
|
431
|
+
metaxy_columns: list[dg.TableColumn] = []
|
|
432
|
+
for field_name, field_info in feature_cls.model_fields.items():
|
|
433
|
+
if field_name not in existing_column_names:
|
|
434
|
+
metaxy_columns.append(
|
|
435
|
+
dg.TableColumn(
|
|
436
|
+
name=field_name,
|
|
437
|
+
type=_get_type_string(field_info.annotation),
|
|
438
|
+
description=field_info.description,
|
|
439
|
+
)
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
all_columns = existing_columns + metaxy_columns
|
|
443
|
+
if all_columns:
|
|
444
|
+
# Sort columns alphabetically by name
|
|
445
|
+
all_columns.sort(key=lambda col: col.name)
|
|
446
|
+
column_schema = dg.TableSchema(columns=all_columns)
|
|
447
|
+
|
|
448
|
+
# Build column lineage from upstream dependencies
|
|
449
|
+
# Respects existing user-defined column lineage and merges with Metaxy lineage
|
|
450
|
+
column_lineage: dg.TableColumnLineage | None = None
|
|
451
|
+
if inject_column_lineage and feature_spec.deps:
|
|
452
|
+
# Start with user-defined lineage if present
|
|
453
|
+
existing_lineage = spec.metadata.get(DAGSTER_COLUMN_LINEAGE_METADATA_KEY)
|
|
454
|
+
existing_deps_by_column: dict[str, list[dg.TableColumnDep]] = {}
|
|
455
|
+
if existing_lineage is not None:
|
|
456
|
+
existing_deps_by_column = dict(existing_lineage.deps_by_column)
|
|
457
|
+
|
|
458
|
+
metaxy_lineage = build_column_lineage(
|
|
459
|
+
feature_cls=feature_cls,
|
|
460
|
+
feature_spec=feature_spec,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
if metaxy_lineage is not None:
|
|
464
|
+
# Merge: user-defined lineage takes precedence for same columns
|
|
465
|
+
merged_deps_by_column: dict[str, list[dg.TableColumnDep]] = {
|
|
466
|
+
col: list(deps) for col, deps in metaxy_lineage.deps_by_column.items()
|
|
467
|
+
}
|
|
468
|
+
for col, deps in existing_deps_by_column.items():
|
|
469
|
+
if col in merged_deps_by_column:
|
|
470
|
+
# Append user deps to metaxy deps (user can add extra lineage)
|
|
471
|
+
merged_deps_by_column[col] = merged_deps_by_column[col] + deps
|
|
472
|
+
else:
|
|
473
|
+
merged_deps_by_column[col] = deps
|
|
474
|
+
# Sort columns alphabetically
|
|
475
|
+
sorted_deps = {
|
|
476
|
+
k: merged_deps_by_column[k] for k in sorted(merged_deps_by_column)
|
|
477
|
+
}
|
|
478
|
+
column_lineage = dg.TableColumnLineage(deps_by_column=sorted_deps)
|
|
479
|
+
elif existing_deps_by_column:
|
|
480
|
+
# Sort columns alphabetically
|
|
481
|
+
sorted_deps = {
|
|
482
|
+
k: existing_deps_by_column[k] for k in sorted(existing_deps_by_column)
|
|
483
|
+
}
|
|
484
|
+
column_lineage = dg.TableColumnLineage(deps_by_column=sorted_deps)
|
|
485
|
+
|
|
486
|
+
# Build the replacement attributes
|
|
487
|
+
metadata_to_add: dict[str, Any] = {
|
|
488
|
+
**spec.metadata,
|
|
489
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.to_string(),
|
|
490
|
+
DAGSTER_METAXY_INFO_METADATA_KEY: build_feature_info_metadata(feature_key),
|
|
491
|
+
}
|
|
492
|
+
if column_schema is not None:
|
|
493
|
+
metadata_to_add[DAGSTER_COLUMN_SCHEMA_METADATA_KEY] = column_schema
|
|
494
|
+
if column_lineage is not None:
|
|
495
|
+
metadata_to_add[DAGSTER_COLUMN_LINEAGE_METADATA_KEY] = column_lineage
|
|
496
|
+
|
|
497
|
+
replace_attrs: dict[str, Any] = {
|
|
498
|
+
"key": final_key,
|
|
499
|
+
"deps": {*spec.deps, *deps_to_add},
|
|
500
|
+
"metadata": metadata_to_add,
|
|
501
|
+
"kinds": {*spec.kinds, *kinds_to_add},
|
|
502
|
+
"tags": {**spec.tags, **tags_to_add},
|
|
503
|
+
**dagster_attrs,
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
if final_code_version is not None:
|
|
507
|
+
replace_attrs["code_version"] = final_code_version
|
|
508
|
+
|
|
509
|
+
if final_description is not None:
|
|
510
|
+
replace_attrs["description"] = final_description
|
|
511
|
+
|
|
512
|
+
return spec.replace_attributes(**replace_attrs)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Observable source assets for Metaxy features."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import dagster as dg
|
|
7
|
+
|
|
8
|
+
import metaxy as mx
|
|
9
|
+
from metaxy.ext.dagster.constants import DAGSTER_METAXY_FEATURE_METADATA_KEY
|
|
10
|
+
from metaxy.ext.dagster.metaxify import metaxify
|
|
11
|
+
from metaxy.ext.dagster.utils import compute_stats_from_lazy_frame
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def observable_metaxy_asset(
|
|
15
|
+
feature: mx.CoercibleToFeatureKey,
|
|
16
|
+
*,
|
|
17
|
+
store_resource_key: str = "store",
|
|
18
|
+
# metaxify kwargs
|
|
19
|
+
inject_metaxy_kind: bool = True,
|
|
20
|
+
inject_code_version: bool = True,
|
|
21
|
+
set_description: bool = True,
|
|
22
|
+
# observable_source_asset kwargs
|
|
23
|
+
**observable_kwargs: Any,
|
|
24
|
+
):
|
|
25
|
+
"""Decorator to create an observable source asset for a Metaxy feature.
|
|
26
|
+
|
|
27
|
+
The observation reads the feature's metadata from the store, counts rows,
|
|
28
|
+
and uses `mean(metaxy_created_at)` as the data version to track changes.
|
|
29
|
+
Using mean ensures that both additions and deletions are detected.
|
|
30
|
+
|
|
31
|
+
The decorated function receives `(context, store, lazy_df)` and can return
|
|
32
|
+
a dict of additional metadata to include in the observation.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
feature: The Metaxy feature to observe.
|
|
36
|
+
store_resource_key: Resource key for the MetadataStore (default: `"store"`).
|
|
37
|
+
inject_metaxy_kind: Whether to inject `"metaxy"` kind into asset kinds.
|
|
38
|
+
inject_code_version: Whether to inject the Metaxy feature code version.
|
|
39
|
+
set_description: Whether to set description from feature class docstring.
|
|
40
|
+
**observable_kwargs: Passed to `@observable_source_asset`
|
|
41
|
+
(key, group_name, tags, metadata, description, partitions_def, etc.)
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
```python
|
|
45
|
+
import metaxy.ext.dagster as mxd
|
|
46
|
+
from myproject.features import ExternalFeature
|
|
47
|
+
|
|
48
|
+
@mxd.observable_metaxy_asset(feature=ExternalFeature)
|
|
49
|
+
def external_data(context, store, lazy_df):
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
# With custom metadata - return a dict
|
|
53
|
+
@mxd.observable_metaxy_asset(feature=ExternalFeature)
|
|
54
|
+
def external_data_with_metrics(context, store, lazy_df):
|
|
55
|
+
# Run aggregations in the database
|
|
56
|
+
total = lazy_df.select(nw.col("value").sum()).collect().item(0, 0)
|
|
57
|
+
return {"custom/total": total}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Note:
|
|
61
|
+
`observable_source_asset` does not support `deps`. Upstream Metaxy feature
|
|
62
|
+
dependencies from the feature spec are not propagated to the SourceAsset.
|
|
63
|
+
"""
|
|
64
|
+
feature_key = mx.coerce_to_feature_key(feature)
|
|
65
|
+
|
|
66
|
+
def decorator(fn: Callable[..., Any]) -> dg.SourceAsset:
|
|
67
|
+
# Build an AssetSpec from kwargs and enrich with metaxify
|
|
68
|
+
# Merge user metadata with metaxy/feature
|
|
69
|
+
user_metadata = observable_kwargs.pop("metadata", None) or {}
|
|
70
|
+
spec = dg.AssetSpec(
|
|
71
|
+
key=observable_kwargs.pop("key", None) or fn.__name__,
|
|
72
|
+
group_name=observable_kwargs.pop("group_name", None),
|
|
73
|
+
tags=observable_kwargs.pop("tags", None),
|
|
74
|
+
metadata={
|
|
75
|
+
**user_metadata,
|
|
76
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.to_string(),
|
|
77
|
+
},
|
|
78
|
+
description=observable_kwargs.pop("description", None),
|
|
79
|
+
)
|
|
80
|
+
enriched = metaxify(
|
|
81
|
+
inject_metaxy_kind=inject_metaxy_kind,
|
|
82
|
+
inject_code_version=inject_code_version,
|
|
83
|
+
set_description=set_description,
|
|
84
|
+
)(spec)
|
|
85
|
+
|
|
86
|
+
def _observe(context: dg.AssetExecutionContext) -> dg.ObserveResult:
|
|
87
|
+
store: mx.MetadataStore = getattr(context.resources, store_resource_key)
|
|
88
|
+
|
|
89
|
+
with store:
|
|
90
|
+
lazy_df = store.read_metadata(feature_key)
|
|
91
|
+
stats = compute_stats_from_lazy_frame(lazy_df)
|
|
92
|
+
|
|
93
|
+
# Call the user's function - it can return additional metadata
|
|
94
|
+
extra_metadata = fn(context, store, lazy_df) or {}
|
|
95
|
+
|
|
96
|
+
metadata: dict[str, Any] = {"dagster/row_count": stats.row_count}
|
|
97
|
+
metadata.update(extra_metadata)
|
|
98
|
+
|
|
99
|
+
return dg.ObserveResult(
|
|
100
|
+
data_version=stats.data_version,
|
|
101
|
+
metadata=metadata,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Apply observable_source_asset decorator
|
|
105
|
+
return dg.observable_source_asset(
|
|
106
|
+
key=enriched.key,
|
|
107
|
+
description=enriched.description,
|
|
108
|
+
group_name=enriched.group_name,
|
|
109
|
+
tags=dict(enriched.tags) if enriched.tags else None,
|
|
110
|
+
metadata=dict(enriched.metadata) if enriched.metadata else None,
|
|
111
|
+
required_resource_keys={store_resource_key},
|
|
112
|
+
**observable_kwargs,
|
|
113
|
+
)(_observe)
|
|
114
|
+
|
|
115
|
+
return decorator
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import dagster as dg
|
|
2
|
+
|
|
3
|
+
import metaxy as mx
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MetaxyStoreFromConfigResource(dg.ConfigurableResource[mx.MetadataStore]):
|
|
7
|
+
"""This resource creates a [`metaxy.MetadataStore`][metaxy.MetadataStore] based on the current Metaxy configuration (`metaxy.toml`).
|
|
8
|
+
|
|
9
|
+
If `name` is not provided, the default store will be used.
|
|
10
|
+
It can be set with `store = "my_name"` in `metaxy.toml` or with` $METAXY_STORE` environment variable.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
name: str | None = None
|
|
14
|
+
|
|
15
|
+
def create_resource(self, context: dg.InitResourceContext) -> mx.MetadataStore:
|
|
16
|
+
"""Create a MetadataStore from the Metaxy configuration.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
context: Dagster resource initialization context.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
A MetadataStore configured with the Dagster run ID as the materialization ID.
|
|
23
|
+
"""
|
|
24
|
+
assert context.run is not None
|
|
25
|
+
return mx.MetaxyConfig.get().get_store(
|
|
26
|
+
self.name, materialization_id=context.run.run_id
|
|
27
|
+
)
|