metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,462 @@
1
+ from collections.abc import Iterable, Iterator
2
+ from typing import Any, NamedTuple
3
+
4
+ import dagster as dg
5
+ import narwhals as nw
6
+
7
+ import metaxy as mx
8
+ from metaxy.ext.dagster.constants import (
9
+ DAGSTER_METAXY_FEATURE_METADATA_KEY,
10
+ DAGSTER_METAXY_PARTITION_KEY,
11
+ METAXY_DAGSTER_METADATA_KEY,
12
+ )
13
+ from metaxy.ext.dagster.resources import MetaxyStoreFromConfigResource
14
+ from metaxy.metadata_store.exceptions import FeatureNotFoundError
15
+ from metaxy.models.constants import METAXY_CREATED_AT, METAXY_MATERIALIZATION_ID
16
+
17
+
18
+ class FeatureStats(NamedTuple):
19
+ """Statistics about a feature's metadata for Dagster events."""
20
+
21
+ row_count: int
22
+ data_version: dg.DataVersion
23
+
24
+
25
+ def build_partition_filter(
26
+ partition_col: str | None,
27
+ partition_key: str | None,
28
+ ) -> list[nw.Expr]:
29
+ """Build partition filter expressions from column name and partition key.
30
+
31
+ Args:
32
+ partition_col: The column to filter by (from `partition_by` metadata).
33
+ partition_key: The partition key value to filter for.
34
+
35
+ Returns:
36
+ List with a single filter expression, or empty list if either arg is None.
37
+ """
38
+ if partition_col is None or partition_key is None:
39
+ return []
40
+ return [nw.col(partition_col) == partition_key]
41
+
42
+
43
+ def get_partition_filter(
44
+ context: dg.AssetExecutionContext,
45
+ spec: dg.AssetSpec,
46
+ ) -> list[nw.Expr]:
47
+ """Get partition filter expressions for a partitioned asset.
48
+
49
+ Args:
50
+ context: The Dagster asset execution context.
51
+ spec: The AssetSpec containing `partition_by` metadata.
52
+
53
+ Returns:
54
+ List of filter expressions. Empty if not partitioned or no partition_by metadata.
55
+ """
56
+ if not context.has_partition_key:
57
+ return []
58
+
59
+ partition_col = spec.metadata.get(DAGSTER_METAXY_PARTITION_KEY)
60
+ if not isinstance(partition_col, str):
61
+ return []
62
+
63
+ return build_partition_filter(partition_col, context.partition_key)
64
+
65
+
66
+ def compute_row_count(lazy_df: nw.LazyFrame) -> int: # pyright: ignore[reportMissingTypeArgument]
67
+ """Compute row count from a narwhals LazyFrame.
68
+
69
+ Args:
70
+ lazy_df: A narwhals LazyFrame.
71
+
72
+ Returns:
73
+ The number of rows in the frame.
74
+ """
75
+ return lazy_df.select(nw.len()).collect().item(0, 0) # pyright: ignore[reportReturnType]
76
+
77
+
78
+ def compute_stats_from_lazy_frame(lazy_df: nw.LazyFrame) -> FeatureStats: # pyright: ignore[reportMissingTypeArgument]
79
+ """Compute statistics from a narwhals LazyFrame.
80
+
81
+ Computes row count and data version from the frame.
82
+ The data version is based on mean(metaxy_created_at) to detect both
83
+ additions and deletions.
84
+
85
+ Args:
86
+ lazy_df: A narwhals LazyFrame with metaxy metadata.
87
+
88
+ Returns:
89
+ FeatureStats with row_count and data_version.
90
+ """
91
+ stats = lazy_df.select(
92
+ nw.len().alias("__count"),
93
+ nw.col(METAXY_CREATED_AT).mean().alias("__mean_ts"),
94
+ ).collect()
95
+
96
+ row_count: int = stats.item(0, "__count")
97
+ if row_count == 0:
98
+ return FeatureStats(row_count=0, data_version=dg.DataVersion("empty"))
99
+
100
+ mean_ts = stats.item(0, "__mean_ts")
101
+ return FeatureStats(row_count=row_count, data_version=dg.DataVersion(str(mean_ts)))
102
+
103
+
104
+ def compute_feature_stats(
105
+ store: mx.MetadataStore,
106
+ feature: mx.CoercibleToFeatureKey,
107
+ ) -> FeatureStats:
108
+ """Compute statistics for a feature's metadata.
109
+
110
+ Reads the feature metadata and computes row count and data version.
111
+ The data version is based on mean(metaxy_created_at) to detect both
112
+ additions and deletions.
113
+
114
+ Args:
115
+ store: The Metaxy metadata store to read from.
116
+ feature: The feature to compute stats for.
117
+
118
+ Returns:
119
+ FeatureStats with row_count and data_version.
120
+ """
121
+ with store:
122
+ lazy_df = store.read_metadata(feature)
123
+ return compute_stats_from_lazy_frame(lazy_df)
124
+
125
+
126
+ def get_asset_key_for_metaxy_feature_spec(
127
+ feature_spec: mx.FeatureSpec,
128
+ ) -> dg.AssetKey:
129
+ """Get the Dagster asset key for a Metaxy feature spec.
130
+
131
+ Args:
132
+ feature_spec: The Metaxy feature spec.
133
+
134
+ Returns:
135
+ The Dagster asset key, determined as follows:
136
+
137
+ 1. If feature spec has `dagster/attributes.asset_key` set, that value is used.
138
+
139
+ 2. Otherwise, the feature key is used.
140
+ """
141
+ # If dagster/attributes.asset_key is set, use it as-is
142
+ dagster_attrs = feature_spec.metadata.get(METAXY_DAGSTER_METADATA_KEY)
143
+ if isinstance(dagster_attrs, dict) and (
144
+ custom_asset_key := dagster_attrs.get("asset_key")
145
+ ):
146
+ return dg.AssetKey(custom_asset_key) # pyright: ignore[reportArgumentType]
147
+
148
+ # Use the feature key as the asset key
149
+ return dg.AssetKey(list(feature_spec.key.parts))
150
+
151
+
152
+ def generate_materialize_results(
153
+ context: dg.AssetExecutionContext,
154
+ store: mx.MetadataStore | MetaxyStoreFromConfigResource,
155
+ specs: Iterable[dg.AssetSpec] | None = None,
156
+ ) -> Iterator[dg.MaterializeResult[None]]:
157
+ """Generate `dagster.MaterializeResult` events for assets in topological order.
158
+
159
+ Yields a `MaterializeResult` for each asset spec, sorted by their associated
160
+ Metaxy features in topological order (dependencies before dependents).
161
+ Each result includes the row count as `"dagster/row_count"` metadata.
162
+
163
+ Args:
164
+ context: The Dagster asset execution context.
165
+ store: The Metaxy metadata store to read from.
166
+ specs: Optional, concrete Dagster asset specs.
167
+ If missing, specs will be taken from the context.
168
+
169
+ Yields:
170
+ Materialization result for each asset in topological order.
171
+
172
+ Example:
173
+ ```python
174
+ specs = [
175
+ dg.AssetSpec("output_a", metadata={"metaxy/feature": "my/feature/a"}),
176
+ dg.AssetSpec("output_b", metadata={"metaxy/feature": "my/feature/b"}),
177
+ ]
178
+
179
+ @metaxify
180
+ @dg.multi_asset(specs=specs)
181
+ def my_multi_asset(context: dg.AssetExecutionContext, store: mx.MetadataStore):
182
+ # ... compute and write data ...
183
+ yield from generate_materialize_results(context, store)
184
+ ```
185
+ """
186
+ # Build mapping from feature key to asset spec
187
+ spec_by_feature_key: dict[mx.FeatureKey, dg.AssetSpec] = {}
188
+ specs = specs or context.assets_def.specs
189
+ for spec in specs:
190
+ if feature_key_raw := spec.metadata.get(DAGSTER_METAXY_FEATURE_METADATA_KEY):
191
+ feature_key = mx.coerce_to_feature_key(feature_key_raw)
192
+ spec_by_feature_key[feature_key] = spec
193
+
194
+ # Sort by topological order of feature keys
195
+ graph = mx.FeatureGraph.get_active()
196
+ sorted_keys = graph.topological_sort_features(list(spec_by_feature_key.keys()))
197
+
198
+ for key in sorted_keys:
199
+ asset_spec = spec_by_feature_key[key]
200
+ partition_filters = get_partition_filter(context, asset_spec)
201
+
202
+ with store:
203
+ try:
204
+ lazy_df = store.read_metadata(key, filters=partition_filters)
205
+ except FeatureNotFoundError:
206
+ context.log.exception(
207
+ f"Feature {key.to_string()} not found in store, skipping materialization result"
208
+ )
209
+ continue
210
+
211
+ stats = compute_stats_from_lazy_frame(lazy_df)
212
+
213
+ # Build runtime metadata using shared function, passing pre-computed row count
214
+ metadata = build_runtime_feature_metadata(
215
+ key, store, lazy_df, context, partition_row_count=stats.row_count
216
+ )
217
+
218
+ # Get materialized-in-run count if materialization_id is set
219
+ if store.materialization_id is not None:
220
+ mat_filters = partition_filters + [
221
+ nw.col(METAXY_MATERIALIZATION_ID) == store.materialization_id
222
+ ]
223
+ mat_df = store.read_metadata(key, filters=mat_filters)
224
+ metadata["metaxy/materialized_in_run"] = (
225
+ mat_df.select(nw.len()).collect().item(0, 0)
226
+ )
227
+
228
+ yield dg.MaterializeResult(
229
+ value=None,
230
+ asset_key=asset_spec.key,
231
+ metadata=metadata,
232
+ data_version=stats.data_version,
233
+ )
234
+
235
+
236
+ def build_feature_info_metadata(
237
+ feature: mx.CoercibleToFeatureKey,
238
+ ) -> dict[str, Any]:
239
+ """Build feature info metadata dict for Dagster assets.
240
+
241
+ Creates a dictionary with information about the Metaxy feature that can be
242
+ used as Dagster asset metadata under the `"metaxy/feature_info"` key.
243
+
244
+ Args:
245
+ feature: The Metaxy feature (class, key, or string).
246
+
247
+ Returns:
248
+ A nested dictionary containing:
249
+
250
+ - `feature`: Feature information
251
+ - `project`: The project name
252
+ - `spec`: The full feature spec as a dict (via `model_dump()`)
253
+ - `version`: The feature version string
254
+ - `type`: The feature class module path
255
+ - `metaxy`: Metaxy library information
256
+ - `version`: The metaxy library version
257
+
258
+ !!! tip
259
+ This is automatically injected by [`@metaxify`][metaxy.ext.dagster.metaxify.metaxify]
260
+
261
+ Example:
262
+ ```python
263
+ from metaxy.ext.dagster.utils import build_feature_info_metadata
264
+
265
+ info = build_feature_info_metadata(MyFeature)
266
+ # {
267
+ # "feature": {
268
+ # "project": "my_project",
269
+ # "spec": {...}, # Full FeatureSpec model_dump()
270
+ # "version": "my__feature@abc123",
271
+ # "type": "myproject.features",
272
+ # },
273
+ # "metaxy": {
274
+ # "version": "0.1.0",
275
+ # },
276
+ # }
277
+ ```
278
+ """
279
+ feature_key = mx.coerce_to_feature_key(feature)
280
+ feature_cls = mx.get_feature_by_key(feature_key)
281
+
282
+ return {
283
+ "feature": {
284
+ "project": feature_cls.project,
285
+ "spec": feature_cls.spec().model_dump(mode="json"),
286
+ "version": feature_cls.feature_version(),
287
+ "type": feature_cls.__module__,
288
+ },
289
+ "metaxy": {
290
+ "version": mx.__version__,
291
+ "plugins": mx.MetaxyConfig.get().plugins,
292
+ },
293
+ }
294
+
295
+
296
+ def build_runtime_feature_metadata(
297
+ feature_key: mx.FeatureKey,
298
+ store: mx.MetadataStore | MetaxyStoreFromConfigResource,
299
+ lazy_df: nw.LazyFrame[Any],
300
+ context: dg.AssetExecutionContext | dg.OutputContext,
301
+ *,
302
+ partition_row_count: int | None = None,
303
+ ) -> dict[str, Any]:
304
+ """Build runtime metadata for a Metaxy feature in Dagster.
305
+
306
+ This function consolidates all runtime metadata construction for Dagster events.
307
+ It is used by the IOManager, generate_materialize_results, and generate_observe_results.
308
+
309
+ Args:
310
+ feature_key: The Metaxy feature key.
311
+ store: The metadata store (used for store-specific metadata like URI, table_name).
312
+ lazy_df: The LazyFrame containing the feature data (for stats and preview).
313
+ For partitioned assets, this should be filtered to the current partition.
314
+ context: Dagster context for determining partition state and logging errors.
315
+ partition_row_count: Optional pre-computed partition row count to avoid re-computing.
316
+
317
+ Returns:
318
+ A dictionary containing all runtime metadata:
319
+ - `metaxy/feature`: Feature key as string
320
+ - `metaxy/info`: Feature and metaxy library information (from `build_feature_info_metadata`)
321
+ - `metaxy/store`: Store type and configuration
322
+ - `dagster/row_count`: Total row count (across all partitions)
323
+ - `dagster/partition_row_count`: Row count for current partition (only if partitioned)
324
+ - `dagster/table_name`: Table name from store (if available)
325
+ - `dagster/uri`: URI from store (if available)
326
+ - `dagster/table`: Table preview
327
+
328
+ Returns empty dict if an error occurs during metadata collection.
329
+
330
+ Example:
331
+ ```python
332
+ with store:
333
+ lazy_df = store.read_metadata(feature_key)
334
+ metadata = build_runtime_feature_metadata(feature_key, store, lazy_df, context)
335
+ context.add_output_metadata(metadata)
336
+ ```
337
+ """
338
+ # Import here to avoid circular import
339
+ from metaxy.ext.dagster.table_metadata import (
340
+ build_column_schema,
341
+ build_table_preview_metadata,
342
+ )
343
+
344
+ try:
345
+ # Use pre-computed partition_row_count if provided, otherwise compute
346
+ if partition_row_count is None:
347
+ partition_row_count = compute_row_count(lazy_df)
348
+
349
+ # Get store metadata
350
+ store_metadata = store.get_store_metadata(feature_key)
351
+
352
+ # Build metadata dict with metaxy info and store info
353
+ store_cls = store.__class__
354
+ metadata: dict[str, Any] = {
355
+ "metaxy/feature": feature_key.to_string(),
356
+ "metaxy/info": build_feature_info_metadata(feature_key),
357
+ "metaxy/store": {
358
+ "type": f"{store_cls.__module__}.{store_cls.__qualname__}",
359
+ "display": store.display(),
360
+ "versioning_engine": store._versioning_engine,
361
+ **store_metadata,
362
+ },
363
+ }
364
+
365
+ # For partitioned assets, compute total row count by re-reading without filters
366
+ if context is not None and context.has_partition_key:
367
+ # Read entire feature (no partition filter) for total count
368
+ full_lazy_df = store.read_metadata(feature_key)
369
+ metadata["dagster/row_count"] = compute_row_count(full_lazy_df)
370
+ metadata["dagster/partition_row_count"] = partition_row_count
371
+ else:
372
+ metadata["dagster/row_count"] = partition_row_count
373
+
374
+ # Map store metadata to dagster standard keys
375
+ if "table_name" in store_metadata:
376
+ metadata["dagster/table_name"] = store_metadata["table_name"]
377
+
378
+ if "uri" in store_metadata:
379
+ metadata["dagster/uri"] = dg.MetadataValue.path(store_metadata["uri"])
380
+
381
+ # Build table preview
382
+ feature_cls = mx.get_feature_by_key(feature_key)
383
+ schema = build_column_schema(feature_cls)
384
+ metadata["dagster/table"] = build_table_preview_metadata(lazy_df, schema)
385
+
386
+ return metadata
387
+ except Exception:
388
+ context.log.exception(
389
+ f"Failed to build runtime metadata for feature {feature_key.to_string()}"
390
+ )
391
+ return {}
392
+
393
+
394
+ def generate_observe_results(
395
+ context: dg.AssetExecutionContext,
396
+ store: mx.MetadataStore | MetaxyStoreFromConfigResource,
397
+ specs: Iterable[dg.AssetSpec] | None = None,
398
+ ) -> Iterator[dg.ObserveResult]:
399
+ """Generate `dagster.ObserveResult` events for assets in topological order.
400
+
401
+ Yields an `ObserveResult` for each asset spec that has `"metaxy/feature"` metadata key set, sorted by their associated
402
+ Metaxy features in topological order.
403
+ Each result includes the row count as `"dagster/row_count"` metadata.
404
+
405
+ Args:
406
+ context: The Dagster asset execution context.
407
+ store: The Metaxy metadata store to read from.
408
+ specs: Optional, concrete Dagster asset specs.
409
+ If missing, this function will take the current specs from the context.
410
+
411
+ Yields:
412
+ Observation result for each asset in topological order.
413
+
414
+ Example:
415
+ ```python
416
+ specs = [
417
+ dg.AssetSpec("output_a", metadata={"metaxy/feature": "my/feature/a"}),
418
+ dg.AssetSpec("output_b", metadata={"metaxy/feature": "my/feature/b"}),
419
+ ]
420
+
421
+ @metaxify
422
+ @dg.multi_observable_source_asset(specs=specs)
423
+ def my_observable_assets(context: dg.AssetExecutionContext, store: mx.MetadataStore):
424
+ yield from generate_observe_results(context, store)
425
+ ```
426
+ """
427
+ # Build mapping from feature key to asset spec
428
+ spec_by_feature_key: dict[mx.FeatureKey, dg.AssetSpec] = {}
429
+ specs = specs or context.assets_def.specs
430
+
431
+ for spec in specs:
432
+ if feature_key_raw := spec.metadata.get(DAGSTER_METAXY_FEATURE_METADATA_KEY):
433
+ feature_key = mx.coerce_to_feature_key(feature_key_raw)
434
+ spec_by_feature_key[feature_key] = spec
435
+
436
+ # Sort by topological order of feature keys
437
+ graph = mx.FeatureGraph.get_active()
438
+ sorted_keys = graph.topological_sort_features(list(spec_by_feature_key.keys()))
439
+
440
+ for key in sorted_keys:
441
+ asset_spec = spec_by_feature_key[key]
442
+ partition_filters = get_partition_filter(context, asset_spec)
443
+
444
+ with store:
445
+ try:
446
+ lazy_df = store.read_metadata(key, filters=partition_filters)
447
+ except FeatureNotFoundError:
448
+ context.log.exception(
449
+ f"Feature {key.to_string()} not found in store, skipping observation result"
450
+ )
451
+ continue
452
+
453
+ stats = compute_stats_from_lazy_frame(lazy_df)
454
+ metadata = build_runtime_feature_metadata(
455
+ key, store, lazy_df, context, partition_row_count=stats.row_count
456
+ )
457
+
458
+ yield dg.ObserveResult(
459
+ asset_key=asset_spec.key,
460
+ metadata=metadata,
461
+ data_version=stats.data_version,
462
+ )
@@ -0,0 +1,23 @@
1
+ """SQLAlchemy integration for metaxy.
2
+
3
+ This module provides SQLAlchemy table definitions and helpers for metaxy.
4
+ These can be used with migration tools like Alembic.
5
+
6
+ The main functions return tuples of (sqlalchemy_url, metadata) for easy
7
+ integration with migration tools:
8
+
9
+ - `get_system_slqa_metadata`: Get URL and system table metadata for a store
10
+ - `filter_feature_sqla_metadata`: Get URL and feature table metadata for a store
11
+ """
12
+
13
+ from metaxy.ext.sqlalchemy.config import SQLAlchemyConfig
14
+ from metaxy.ext.sqlalchemy.plugin import (
15
+ filter_feature_sqla_metadata,
16
+ get_system_slqa_metadata,
17
+ )
18
+
19
+ __all__ = [
20
+ "SQLAlchemyConfig",
21
+ "get_system_slqa_metadata",
22
+ "filter_feature_sqla_metadata",
23
+ ]
@@ -0,0 +1,29 @@
1
+ """Configuration for SQLAlchemy integration."""
2
+
3
+ from pydantic import Field as PydanticField
4
+ from pydantic_settings import SettingsConfigDict
5
+
6
+ from metaxy.config import PluginConfig
7
+
8
+
9
+ class SQLAlchemyConfig(PluginConfig):
10
+ """Configuration for SQLAlchemy integration.
11
+
12
+ This plugin provides helpers for working with SQLAlchemy metadata
13
+ and table definitions.
14
+ """
15
+
16
+ model_config = SettingsConfigDict(
17
+ env_prefix="METAXY_EXT__SQLALCHEMY_",
18
+ extra="forbid",
19
+ )
20
+
21
+ inject_primary_key: bool = PydanticField(
22
+ default=False,
23
+ description="Automatically inject composite primary key constraints on user-defined feature tables. The key is composed of ID columns, `metaxy_created_at`, and `metaxy_data_version`.",
24
+ )
25
+
26
+ inject_index: bool = PydanticField(
27
+ default=False,
28
+ description="Automatically inject composite index on user-defined feature tables. The index covers ID columns, `metaxy_created_at`, and `metaxy_data_version`.",
29
+ )