metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,512 @@
1
+ import inspect
2
+ from typing import Any, TypeVar, overload
3
+
4
+ import dagster as dg
5
+ from dagster._core.definitions.events import (
6
+ CoercibleToAssetKey,
7
+ CoercibleToAssetKeyPrefix,
8
+ )
9
+ from typing_extensions import Self
10
+
11
+ import metaxy as mx
12
+ from metaxy.ext.dagster.constants import (
13
+ DAGSTER_COLUMN_LINEAGE_METADATA_KEY,
14
+ DAGSTER_COLUMN_SCHEMA_METADATA_KEY,
15
+ DAGSTER_METAXY_FEATURE_METADATA_KEY,
16
+ DAGSTER_METAXY_INFO_METADATA_KEY,
17
+ DAGSTER_METAXY_KIND,
18
+ DAGSTER_METAXY_PROJECT_TAG_KEY,
19
+ METAXY_DAGSTER_METADATA_KEY,
20
+ )
21
+ from metaxy.ext.dagster.table_metadata import (
22
+ _get_type_string,
23
+ build_column_lineage,
24
+ )
25
+ from metaxy.ext.dagster.utils import (
26
+ build_feature_info_metadata,
27
+ get_asset_key_for_metaxy_feature_spec,
28
+ )
29
+
30
+ _T = TypeVar("_T", dg.AssetsDefinition, dg.AssetSpec)
31
+
32
+
33
+ class metaxify:
34
+ """Inject Metaxy metadata into a Dagster [`AssetsDefinition`][dg.AssetsDefinition] or [`AssetSpec`][dg.AssetSpec].
35
+
36
+ Affects assets with `metaxy/feature` metadata set.
37
+
38
+ Learn more about `@metaxify` and see example screenshots [here](metaxify.md).
39
+
40
+ Args:
41
+ key: Explicit asset key that overrides all other key resolution logic. Cannot be used
42
+ with `key_prefix` or with multi-asset definitions that produce multiple outputs.
43
+ key_prefix: Prefix to prepend to the resolved asset key. Also applied to upstream
44
+ dependency keys. Cannot be used with `key`.
45
+ inject_metaxy_kind: Whether to inject `"metaxy"` kind into asset kinds.
46
+ Currently, kinds count is limited by 3, and `metaxify` will skip kind injection
47
+ if there are already 3 kinds on the asset.
48
+ inject_code_version: Whether to inject the Metaxy feature code version into the asset's
49
+ code version. The version is appended in the format `metaxy:<version>`.
50
+ set_description: Whether to set the asset description from the feature class docstring
51
+ if the asset doesn't already have a description.
52
+ inject_column_schema: Whether to inject Pydantic field definitions as Dagster column schema.
53
+ Field types are converted to strings, and field descriptions are used as column descriptions.
54
+ inject_column_lineage: Whether to inject column-level lineage into the asset metadata under
55
+ `dagster/column_lineage`. Uses Pydantic model fields to track
56
+ column provenance via `FeatureDep.rename`, `FeatureSpec.lineage`, and direct pass-through.
57
+
58
+ !!! tip
59
+ Multiple Dagster assets can contribute to the same Metaxy feature by setting the same
60
+ `"metaxy/feature"` metadata. This is a perfectly valid setup since Metaxy writes are append-only.
61
+
62
+ !!! example
63
+ ```py {hl_lines="8"}
64
+ import dagster as dg
65
+ import metaxy as mx
66
+ import metaxy.ext.dagster as mxd
67
+
68
+ @mxd.metaxify()
69
+ @dg.asset(
70
+ metadata={
71
+ "metaxy/feature": "my/feature/key"
72
+ },
73
+ )
74
+ def my_asset(store: mx.MetadataStore):
75
+ with store:
76
+ increment = store.resolve_update("my/feature/key")
77
+ ...
78
+ ```
79
+
80
+ ??? example "With `@multi_asset`"
81
+ Multiple Metaxy features can be produced by the same `@multi_asset`. (1)
82
+ { .annotate }
83
+
84
+ 1. Typically, they are produced independently of each other
85
+
86
+ ```python
87
+ @mxd.metaxify()
88
+ @dg.multi_asset(
89
+ specs=[
90
+ dg.AssetSpec("output_a", metadata={"metaxy/feature": "feature/a"}),
91
+ dg.AssetSpec("output_b", metadata={"metaxy/feature": "feature/b"}),
92
+ ]
93
+ )
94
+ def my_multi_asset():
95
+ ...
96
+ ```
97
+
98
+ ??? example "With `dagster.AssetSpec`"
99
+ ```py
100
+ asset_spec = dg.AssetSpec(
101
+ key="my_asset",
102
+ metadata={"metaxy/feature": "my/feature/key"},
103
+ )
104
+ asset_spec = mxd.metaxify()(asset_spec)
105
+ ```
106
+ """
107
+
108
+ key: dg.AssetKey | None
109
+ key_prefix: dg.AssetKey | None
110
+ inject_metaxy_kind: bool
111
+ inject_code_version: bool
112
+ set_description: bool
113
+ inject_column_schema: bool
114
+ inject_column_lineage: bool
115
+
116
+ def __init__(
117
+ self,
118
+ _asset: "_T | None" = None,
119
+ *,
120
+ key: CoercibleToAssetKey | None = None,
121
+ key_prefix: CoercibleToAssetKeyPrefix | None = None,
122
+ inject_metaxy_kind: bool = True,
123
+ inject_code_version: bool = True,
124
+ set_description: bool = True,
125
+ inject_column_schema: bool = True,
126
+ inject_column_lineage: bool = True,
127
+ ) -> None:
128
+ # Actual initialization happens in __new__, but we set defaults here for type checkers
129
+ self.key = dg.AssetKey.from_coercible(key) if key is not None else None
130
+ self.key_prefix = (
131
+ dg.AssetKey.from_coercible(key_prefix) if key_prefix is not None else None
132
+ )
133
+ self.inject_metaxy_kind = inject_metaxy_kind
134
+ self.inject_code_version = inject_code_version
135
+ self.set_description = set_description
136
+ self.inject_column_schema = inject_column_schema
137
+ self.inject_column_lineage = inject_column_lineage
138
+
139
+ @overload
140
+ def __new__(cls, _asset: _T) -> _T: ...
141
+
142
+ @overload
143
+ def __new__(
144
+ cls,
145
+ _asset: None = None,
146
+ *,
147
+ key: CoercibleToAssetKey | None = None,
148
+ key_prefix: CoercibleToAssetKeyPrefix | None = None,
149
+ inject_metaxy_kind: bool = True,
150
+ inject_code_version: bool = True,
151
+ set_description: bool = True,
152
+ inject_column_schema: bool = True,
153
+ inject_column_lineage: bool = True,
154
+ ) -> Self: ...
155
+
156
+ def __new__(
157
+ cls,
158
+ _asset: _T | None = None,
159
+ *,
160
+ key: CoercibleToAssetKey | None = None,
161
+ key_prefix: CoercibleToAssetKeyPrefix | None = None,
162
+ inject_metaxy_kind: bool = True,
163
+ inject_code_version: bool = True,
164
+ set_description: bool = True,
165
+ inject_column_schema: bool = True,
166
+ inject_column_lineage: bool = True,
167
+ ) -> "Self | _T":
168
+ if key is not None and key_prefix is not None:
169
+ raise ValueError("Cannot specify both `key` and `key_prefix`")
170
+
171
+ coerced_key = dg.AssetKey.from_coercible(key) if key is not None else None
172
+ coerced_key_prefix = (
173
+ dg.AssetKey.from_coercible(key_prefix) if key_prefix is not None else None
174
+ )
175
+
176
+ if _asset is not None:
177
+ # Called as @metaxify without parentheses
178
+ return cls._transform(
179
+ _asset,
180
+ key=coerced_key,
181
+ key_prefix=coerced_key_prefix,
182
+ inject_metaxy_kind=inject_metaxy_kind,
183
+ inject_code_version=inject_code_version,
184
+ set_description=set_description,
185
+ inject_column_schema=inject_column_schema,
186
+ inject_column_lineage=inject_column_lineage,
187
+ )
188
+
189
+ # Called as @metaxify() with parentheses - return instance for __call__
190
+ instance = object.__new__(cls)
191
+ instance.key = coerced_key
192
+ instance.key_prefix = coerced_key_prefix
193
+ instance.inject_metaxy_kind = inject_metaxy_kind
194
+ instance.inject_code_version = inject_code_version
195
+ instance.set_description = set_description
196
+ instance.inject_column_schema = inject_column_schema
197
+ instance.inject_column_lineage = inject_column_lineage
198
+ return instance
199
+
200
+ def __call__(self, asset: _T) -> _T:
201
+ return self._transform(
202
+ asset,
203
+ key=self.key,
204
+ key_prefix=self.key_prefix,
205
+ inject_metaxy_kind=self.inject_metaxy_kind,
206
+ inject_code_version=self.inject_code_version,
207
+ set_description=self.set_description,
208
+ inject_column_schema=self.inject_column_schema,
209
+ inject_column_lineage=self.inject_column_lineage,
210
+ )
211
+
212
+ @staticmethod
213
+ def _transform(
214
+ asset: _T,
215
+ *,
216
+ key: dg.AssetKey | None,
217
+ key_prefix: dg.AssetKey | None,
218
+ inject_metaxy_kind: bool,
219
+ inject_code_version: bool,
220
+ set_description: bool,
221
+ inject_column_schema: bool,
222
+ inject_column_lineage: bool,
223
+ ) -> _T:
224
+ """Transform an AssetsDefinition or AssetSpec with Metaxy metadata."""
225
+ if isinstance(asset, dg.AssetSpec):
226
+ return _metaxify_spec(
227
+ asset,
228
+ key=key,
229
+ key_prefix=key_prefix,
230
+ inject_metaxy_kind=inject_metaxy_kind,
231
+ inject_code_version=inject_code_version,
232
+ set_description=set_description,
233
+ inject_column_schema=inject_column_schema,
234
+ inject_column_lineage=inject_column_lineage,
235
+ )
236
+
237
+ # Handle AssetsDefinition
238
+ # Validate that key argument is not used with multi-asset
239
+ if key is not None and len(asset.keys) > 1:
240
+ raise ValueError(
241
+ f"Cannot use `key` argument with multi-asset `{asset.node_def.name}` "
242
+ f"that produces {len(asset.keys)} outputs. "
243
+ f"Use `key_prefix` instead to apply a common prefix to all outputs."
244
+ )
245
+
246
+ keys_to_replace: dict[dg.AssetKey, dg.AssetKey] = {}
247
+ transformed_specs: list[dg.AssetSpec] = []
248
+
249
+ for orig_key, asset_spec in asset.specs_by_key.items():
250
+ new_spec = _metaxify_spec(
251
+ asset_spec,
252
+ key=key,
253
+ key_prefix=key_prefix,
254
+ inject_metaxy_kind=inject_metaxy_kind,
255
+ inject_code_version=inject_code_version,
256
+ set_description=set_description,
257
+ inject_column_schema=inject_column_schema,
258
+ inject_column_lineage=inject_column_lineage,
259
+ )
260
+ if new_spec.key != orig_key:
261
+ keys_to_replace[orig_key] = new_spec.key
262
+ transformed_specs.append(new_spec)
263
+
264
+ return _replace_specs_on_assets_definition(
265
+ asset, transformed_specs, keys_to_replace
266
+ )
267
+
268
+
269
+ def _replace_specs_on_assets_definition(
270
+ asset: dg.AssetsDefinition,
271
+ new_specs: list[dg.AssetSpec],
272
+ keys_to_replace: dict[dg.AssetKey, dg.AssetKey],
273
+ ) -> dg.AssetsDefinition:
274
+ """Replace specs on an AssetsDefinition without triggering Dagster's InputDefinition bug.
275
+
276
+ Dagster's `map_asset_specs` and `replace_specs_on_asset` have a bug where they fail
277
+ on assets with input definitions (from `ins=` parameter with `dg.AssetIn` objects).
278
+ The bug occurs because `OpDefinition.with_replaced_properties` creates an `ins` dict
279
+ mixing `InputDefinition` objects with `In` objects, and then `OpDefinition.__init__`
280
+ tries to call `to_definition()` on `InputDefinition` objects which don't have that method.
281
+
282
+ This function works around the bug by using `dagster_internal_init` directly,
283
+ which only updates the specs without modifying the underlying node_def.
284
+ This means new deps added to specs won't be reflected as actual inputs to the op,
285
+ but they will be tracked correctly by Dagster's asset graph for dependency purposes.
286
+
287
+ Args:
288
+ asset: The original AssetsDefinition to transform.
289
+ new_specs: The transformed specs to use.
290
+ keys_to_replace: A mapping of old keys to new keys for assets whose keys changed.
291
+
292
+ Returns:
293
+ A new AssetsDefinition with the transformed specs.
294
+ """
295
+ # Get the current attributes from the asset
296
+ attrs = asset.get_attributes_dict()
297
+
298
+ # Update the specs
299
+ attrs["specs"] = new_specs
300
+
301
+ # If there are key replacements, also update keys_by_output_name and selected_asset_keys
302
+ if keys_to_replace:
303
+ attrs["keys_by_output_name"] = {
304
+ output_name: keys_to_replace.get(key, key)
305
+ for output_name, key in attrs["keys_by_output_name"].items()
306
+ }
307
+ attrs["selected_asset_keys"] = {
308
+ keys_to_replace.get(key, key) for key in attrs["selected_asset_keys"]
309
+ }
310
+
311
+ # Create a new AssetsDefinition with the updated attributes
312
+ # This bypasses the buggy code path in Dagster's replace_specs_on_asset
313
+ result = asset.__class__.dagster_internal_init(**attrs)
314
+
315
+ # Use with_attributes to update check specs - Dagster handles this automatically
316
+ # when asset_key_replacements is provided
317
+ if keys_to_replace:
318
+ result = result.with_attributes(asset_key_replacements=keys_to_replace)
319
+
320
+ return result
321
+
322
+
323
+ def _metaxify_spec(
324
+ spec: dg.AssetSpec,
325
+ *,
326
+ key: dg.AssetKey | None,
327
+ key_prefix: dg.AssetKey | None,
328
+ inject_metaxy_kind: bool,
329
+ inject_code_version: bool,
330
+ set_description: bool,
331
+ inject_column_schema: bool,
332
+ inject_column_lineage: bool,
333
+ ) -> dg.AssetSpec:
334
+ """Transform a single AssetSpec with Metaxy metadata.
335
+
336
+ Returns the spec unchanged if `metaxy/feature` metadata is not set,
337
+ unless `key_prefix` is provided (which applies to all specs).
338
+ """
339
+ metadata_feature_key = spec.metadata.get(DAGSTER_METAXY_FEATURE_METADATA_KEY)
340
+
341
+ # Feature key must come from metadata
342
+ if metadata_feature_key is None:
343
+ # No feature key set - but still apply key_prefix if provided
344
+ if key_prefix is not None:
345
+ new_key = dg.AssetKey([*key_prefix.path, *spec.key.path])
346
+ return spec.replace_attributes(key=new_key)
347
+ return spec
348
+
349
+ feature_key = mx.coerce_to_feature_key(metadata_feature_key)
350
+ feature_cls = mx.get_feature_by_key(feature_key)
351
+ feature_spec = feature_cls.spec()
352
+
353
+ # Determine the final asset key
354
+ # Priority: key > key_prefix + resolved_key > resolved_key
355
+ if key is not None:
356
+ # Explicit key overrides everything
357
+ final_key = key
358
+ else:
359
+ # Resolve key from feature spec
360
+ resolved_key = get_asset_key_for_metaxy_feature_spec(feature_spec)
361
+ if key_prefix is not None:
362
+ # Prepend prefix to resolved key
363
+ final_key = dg.AssetKey([*key_prefix.path, *resolved_key.path])
364
+ else:
365
+ final_key = resolved_key
366
+
367
+ # Build deps from feature dependencies
368
+ deps_to_add: set[dg.AssetDep] = set()
369
+ for dep in feature_spec.deps:
370
+ upstream_feature_spec = mx.get_feature_by_key(dep.feature).spec()
371
+ upstream_key = get_asset_key_for_metaxy_feature_spec(upstream_feature_spec)
372
+ # Apply key_prefix to upstream deps as well
373
+ if key_prefix is not None:
374
+ upstream_key = dg.AssetKey([*key_prefix.path, *upstream_key.path])
375
+ deps_to_add.add(dg.AssetDep(asset=upstream_key))
376
+
377
+ # Build kinds
378
+ kinds_to_add: set[str] = set()
379
+ if inject_metaxy_kind and len(spec.kinds) < 3:
380
+ kinds_to_add.add(DAGSTER_METAXY_KIND)
381
+
382
+ # Extract dagster attributes (excluding asset_key which is handled separately)
383
+ dagster_attrs: dict[str, Any] = {}
384
+ raw_dagster_attrs = feature_spec.metadata.get(METAXY_DAGSTER_METADATA_KEY)
385
+ if raw_dagster_attrs is not None:
386
+ if not isinstance(raw_dagster_attrs, dict):
387
+ raise ValueError(
388
+ f"Invalid metadata format for `{feature_spec.key}` "
389
+ f"Metaxy feature metadata key {METAXY_DAGSTER_METADATA_KEY}: "
390
+ f"expected dict, got {type(raw_dagster_attrs).__name__}"
391
+ )
392
+ dagster_attrs = {k: v for k, v in raw_dagster_attrs.items() if k != "asset_key"}
393
+
394
+ # Build code version: append metaxy version to existing code version if present
395
+ if inject_code_version:
396
+ metaxy_code_version = f"metaxy:{feature_spec.code_version}"
397
+ if spec.code_version:
398
+ final_code_version = f"{spec.code_version},{metaxy_code_version}"
399
+ else:
400
+ final_code_version = metaxy_code_version
401
+ else:
402
+ final_code_version = spec.code_version
403
+
404
+ # Use feature class docstring as description if not set on asset spec
405
+ final_description = spec.description
406
+ if set_description and final_description is None and feature_cls.__doc__:
407
+ final_description = inspect.cleandoc(feature_cls.__doc__)
408
+
409
+ # Build tags for project and feature
410
+ # Note: Dagster tag values only allow alpha-numeric, '_', '-', '.'
411
+ # so we use table_name which uses '__' separator
412
+ tags_to_add: dict[str, str] = {
413
+ DAGSTER_METAXY_PROJECT_TAG_KEY: mx.MetaxyConfig.get().project,
414
+ DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.table_name,
415
+ }
416
+
417
+ # Build column schema from Pydantic fields (includes inherited system columns)
418
+ # Respects existing user-defined column schema and appends Metaxy columns
419
+ column_schema: dg.TableSchema | None = None
420
+ if inject_column_schema:
421
+ # Start with user-defined columns if present
422
+ existing_schema = spec.metadata.get(DAGSTER_COLUMN_SCHEMA_METADATA_KEY)
423
+ existing_columns: list[dg.TableColumn] = []
424
+ existing_column_names: set[str] = set()
425
+ if existing_schema is not None:
426
+ existing_columns = list(existing_schema.columns)
427
+ existing_column_names = {col.name for col in existing_columns}
428
+
429
+ # Add Metaxy columns that aren't already defined by user
430
+ # (user-defined columns take precedence)
431
+ metaxy_columns: list[dg.TableColumn] = []
432
+ for field_name, field_info in feature_cls.model_fields.items():
433
+ if field_name not in existing_column_names:
434
+ metaxy_columns.append(
435
+ dg.TableColumn(
436
+ name=field_name,
437
+ type=_get_type_string(field_info.annotation),
438
+ description=field_info.description,
439
+ )
440
+ )
441
+
442
+ all_columns = existing_columns + metaxy_columns
443
+ if all_columns:
444
+ # Sort columns alphabetically by name
445
+ all_columns.sort(key=lambda col: col.name)
446
+ column_schema = dg.TableSchema(columns=all_columns)
447
+
448
+ # Build column lineage from upstream dependencies
449
+ # Respects existing user-defined column lineage and merges with Metaxy lineage
450
+ column_lineage: dg.TableColumnLineage | None = None
451
+ if inject_column_lineage and feature_spec.deps:
452
+ # Start with user-defined lineage if present
453
+ existing_lineage = spec.metadata.get(DAGSTER_COLUMN_LINEAGE_METADATA_KEY)
454
+ existing_deps_by_column: dict[str, list[dg.TableColumnDep]] = {}
455
+ if existing_lineage is not None:
456
+ existing_deps_by_column = dict(existing_lineage.deps_by_column)
457
+
458
+ metaxy_lineage = build_column_lineage(
459
+ feature_cls=feature_cls,
460
+ feature_spec=feature_spec,
461
+ )
462
+
463
+ if metaxy_lineage is not None:
464
+ # Merge: user-defined lineage takes precedence for same columns
465
+ merged_deps_by_column: dict[str, list[dg.TableColumnDep]] = {
466
+ col: list(deps) for col, deps in metaxy_lineage.deps_by_column.items()
467
+ }
468
+ for col, deps in existing_deps_by_column.items():
469
+ if col in merged_deps_by_column:
470
+ # Append user deps to metaxy deps (user can add extra lineage)
471
+ merged_deps_by_column[col] = merged_deps_by_column[col] + deps
472
+ else:
473
+ merged_deps_by_column[col] = deps
474
+ # Sort columns alphabetically
475
+ sorted_deps = {
476
+ k: merged_deps_by_column[k] for k in sorted(merged_deps_by_column)
477
+ }
478
+ column_lineage = dg.TableColumnLineage(deps_by_column=sorted_deps)
479
+ elif existing_deps_by_column:
480
+ # Sort columns alphabetically
481
+ sorted_deps = {
482
+ k: existing_deps_by_column[k] for k in sorted(existing_deps_by_column)
483
+ }
484
+ column_lineage = dg.TableColumnLineage(deps_by_column=sorted_deps)
485
+
486
+ # Build the replacement attributes
487
+ metadata_to_add: dict[str, Any] = {
488
+ **spec.metadata,
489
+ DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.to_string(),
490
+ DAGSTER_METAXY_INFO_METADATA_KEY: build_feature_info_metadata(feature_key),
491
+ }
492
+ if column_schema is not None:
493
+ metadata_to_add[DAGSTER_COLUMN_SCHEMA_METADATA_KEY] = column_schema
494
+ if column_lineage is not None:
495
+ metadata_to_add[DAGSTER_COLUMN_LINEAGE_METADATA_KEY] = column_lineage
496
+
497
+ replace_attrs: dict[str, Any] = {
498
+ "key": final_key,
499
+ "deps": {*spec.deps, *deps_to_add},
500
+ "metadata": metadata_to_add,
501
+ "kinds": {*spec.kinds, *kinds_to_add},
502
+ "tags": {**spec.tags, **tags_to_add},
503
+ **dagster_attrs,
504
+ }
505
+
506
+ if final_code_version is not None:
507
+ replace_attrs["code_version"] = final_code_version
508
+
509
+ if final_description is not None:
510
+ replace_attrs["description"] = final_description
511
+
512
+ return spec.replace_attributes(**replace_attrs)
@@ -0,0 +1,115 @@
1
+ """Observable source assets for Metaxy features."""
2
+
3
+ from collections.abc import Callable
4
+ from typing import Any
5
+
6
+ import dagster as dg
7
+
8
+ import metaxy as mx
9
+ from metaxy.ext.dagster.constants import DAGSTER_METAXY_FEATURE_METADATA_KEY
10
+ from metaxy.ext.dagster.metaxify import metaxify
11
+ from metaxy.ext.dagster.utils import compute_stats_from_lazy_frame
12
+
13
+
14
+ def observable_metaxy_asset(
15
+ feature: mx.CoercibleToFeatureKey,
16
+ *,
17
+ store_resource_key: str = "store",
18
+ # metaxify kwargs
19
+ inject_metaxy_kind: bool = True,
20
+ inject_code_version: bool = True,
21
+ set_description: bool = True,
22
+ # observable_source_asset kwargs
23
+ **observable_kwargs: Any,
24
+ ):
25
+ """Decorator to create an observable source asset for a Metaxy feature.
26
+
27
+ The observation reads the feature's metadata from the store, counts rows,
28
+ and uses `mean(metaxy_created_at)` as the data version to track changes.
29
+ Using mean ensures that both additions and deletions are detected.
30
+
31
+ The decorated function receives `(context, store, lazy_df)` and can return
32
+ a dict of additional metadata to include in the observation.
33
+
34
+ Args:
35
+ feature: The Metaxy feature to observe.
36
+ store_resource_key: Resource key for the MetadataStore (default: `"store"`).
37
+ inject_metaxy_kind: Whether to inject `"metaxy"` kind into asset kinds.
38
+ inject_code_version: Whether to inject the Metaxy feature code version.
39
+ set_description: Whether to set description from feature class docstring.
40
+ **observable_kwargs: Passed to `@observable_source_asset`
41
+ (key, group_name, tags, metadata, description, partitions_def, etc.)
42
+
43
+ Example:
44
+ ```python
45
+ import metaxy.ext.dagster as mxd
46
+ from myproject.features import ExternalFeature
47
+
48
+ @mxd.observable_metaxy_asset(feature=ExternalFeature)
49
+ def external_data(context, store, lazy_df):
50
+ pass
51
+
52
+ # With custom metadata - return a dict
53
+ @mxd.observable_metaxy_asset(feature=ExternalFeature)
54
+ def external_data_with_metrics(context, store, lazy_df):
55
+ # Run aggregations in the database
56
+ total = lazy_df.select(nw.col("value").sum()).collect().item(0, 0)
57
+ return {"custom/total": total}
58
+ ```
59
+
60
+ Note:
61
+ `observable_source_asset` does not support `deps`. Upstream Metaxy feature
62
+ dependencies from the feature spec are not propagated to the SourceAsset.
63
+ """
64
+ feature_key = mx.coerce_to_feature_key(feature)
65
+
66
+ def decorator(fn: Callable[..., Any]) -> dg.SourceAsset:
67
+ # Build an AssetSpec from kwargs and enrich with metaxify
68
+ # Merge user metadata with metaxy/feature
69
+ user_metadata = observable_kwargs.pop("metadata", None) or {}
70
+ spec = dg.AssetSpec(
71
+ key=observable_kwargs.pop("key", None) or fn.__name__,
72
+ group_name=observable_kwargs.pop("group_name", None),
73
+ tags=observable_kwargs.pop("tags", None),
74
+ metadata={
75
+ **user_metadata,
76
+ DAGSTER_METAXY_FEATURE_METADATA_KEY: feature_key.to_string(),
77
+ },
78
+ description=observable_kwargs.pop("description", None),
79
+ )
80
+ enriched = metaxify(
81
+ inject_metaxy_kind=inject_metaxy_kind,
82
+ inject_code_version=inject_code_version,
83
+ set_description=set_description,
84
+ )(spec)
85
+
86
+ def _observe(context: dg.AssetExecutionContext) -> dg.ObserveResult:
87
+ store: mx.MetadataStore = getattr(context.resources, store_resource_key)
88
+
89
+ with store:
90
+ lazy_df = store.read_metadata(feature_key)
91
+ stats = compute_stats_from_lazy_frame(lazy_df)
92
+
93
+ # Call the user's function - it can return additional metadata
94
+ extra_metadata = fn(context, store, lazy_df) or {}
95
+
96
+ metadata: dict[str, Any] = {"dagster/row_count": stats.row_count}
97
+ metadata.update(extra_metadata)
98
+
99
+ return dg.ObserveResult(
100
+ data_version=stats.data_version,
101
+ metadata=metadata,
102
+ )
103
+
104
+ # Apply observable_source_asset decorator
105
+ return dg.observable_source_asset(
106
+ key=enriched.key,
107
+ description=enriched.description,
108
+ group_name=enriched.group_name,
109
+ tags=dict(enriched.tags) if enriched.tags else None,
110
+ metadata=dict(enriched.metadata) if enriched.metadata else None,
111
+ required_resource_keys={store_resource_key},
112
+ **observable_kwargs,
113
+ )(_observe)
114
+
115
+ return decorator
@@ -0,0 +1,27 @@
1
+ import dagster as dg
2
+
3
+ import metaxy as mx
4
+
5
+
6
+ class MetaxyStoreFromConfigResource(dg.ConfigurableResource[mx.MetadataStore]):
7
+ """This resource creates a [`metaxy.MetadataStore`][metaxy.MetadataStore] based on the current Metaxy configuration (`metaxy.toml`).
8
+
9
+ If `name` is not provided, the default store will be used.
10
+ It can be set with `store = "my_name"` in `metaxy.toml` or with` $METAXY_STORE` environment variable.
11
+ """
12
+
13
+ name: str | None = None
14
+
15
+ def create_resource(self, context: dg.InitResourceContext) -> mx.MetadataStore:
16
+ """Create a MetadataStore from the Metaxy configuration.
17
+
18
+ Args:
19
+ context: Dagster resource initialization context.
20
+
21
+ Returns:
22
+ A MetadataStore configured with the Dagster run ID as the materialization ID.
23
+ """
24
+ assert context.run is not None
25
+ return mx.MetaxyConfig.get().get_store(
26
+ self.name, materialization_id=context.run.run_id
27
+ )