metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,200 @@
1
+ from typing import Any
2
+
3
+ import dagster as dg
4
+ import narwhals as nw
5
+ import pydantic
6
+ from narwhals.typing import IntoFrame
7
+
8
+ import metaxy as mx
9
+ from metaxy.ext.dagster.constants import (
10
+ DAGSTER_METAXY_FEATURE_METADATA_KEY,
11
+ DAGSTER_METAXY_PARTITION_KEY,
12
+ )
13
+ from metaxy.ext.dagster.resources import MetaxyStoreFromConfigResource
14
+ from metaxy.ext.dagster.utils import (
15
+ build_partition_filter,
16
+ build_runtime_feature_metadata,
17
+ )
18
+ from metaxy.metadata_store.exceptions import FeatureNotFoundError
19
+ from metaxy.models.constants import METAXY_MATERIALIZATION_ID
20
+ from metaxy.models.types import ValidatedFeatureKey
21
+
22
+ #: Type alias for MetaxyIOManager output - any narwhals-compatible dataframe or None
23
+ MetaxyOutput = IntoFrame | None
24
+
25
+
26
+ class MetaxyIOManager(dg.ConfigurableIOManager):
27
+ """MetaxyIOManager is a Dagster IOManager that reads and writes data to/from Metaxy's [`MetadataStore`][metaxy.MetadataStore].
28
+
29
+ It automatically attaches Metaxy feature and store metadata to Dagster materialization events and handles partitioned assets.
30
+
31
+ !!! warning "Always set `"metaxy/feature"` Dagster metadata"
32
+
33
+ This IOManager is using `"metaxy/feature"` Dagster metadata key to map Dagster assets into Metaxy features.
34
+ It expects it to be set on the assets being loaded or materialized.
35
+
36
+ ??? example
37
+
38
+ ```py
39
+ import dagster as dg
40
+
41
+ @dg.asset(
42
+ metadata={
43
+ "metaxy/feature": "my/feature/key",
44
+ }
45
+ )
46
+ def my_asset():
47
+ ...
48
+ ```
49
+
50
+ !!! tip "Defining Partitioned Assets"
51
+
52
+ To tell Metaxy which column to use when filtering partitioned assets, set `"partition_by"` Dagster metadata key.
53
+
54
+ ??? example
55
+ ```py
56
+ import dagster as dg
57
+
58
+ @dg.asset(
59
+ metadata={
60
+ "metaxy/feature": "my/feature/key",
61
+ "partition_by": "date",
62
+ }
63
+ )
64
+ def my_partitioned_asset():
65
+ ...
66
+ ```
67
+
68
+ This key is commonly used to configure partitioning behavior by various Dagster IO managers.
69
+
70
+ """
71
+
72
+ store: dg.ResourceDependency[MetaxyStoreFromConfigResource] = pydantic.Field(
73
+ default_factory=MetaxyStoreFromConfigResource(name="dev")
74
+ )
75
+
76
+ @property
77
+ def metadata_store(
78
+ self,
79
+ ) -> mx.MetadataStore: # this property mostly exists to fix the type annotation
80
+ return self.store # pyright: ignore[reportReturnType]
81
+
82
+ def _feature_key_from_context(
83
+ self, context: dg.InputContext | dg.OutputContext
84
+ ) -> ValidatedFeatureKey:
85
+ if isinstance(context, dg.InputContext):
86
+ assert context.upstream_output is not None
87
+ assert context.upstream_output.metadata is not None
88
+ return mx.ValidatedFeatureKeyAdapter.validate_python(
89
+ context.upstream_output.metadata[DAGSTER_METAXY_FEATURE_METADATA_KEY]
90
+ )
91
+ elif isinstance(context, dg.OutputContext):
92
+ return mx.ValidatedFeatureKeyAdapter.validate_python(
93
+ context.definition_metadata[DAGSTER_METAXY_FEATURE_METADATA_KEY]
94
+ )
95
+ else:
96
+ raise ValueError(f"Unexpected context type: {type(context)}")
97
+
98
+ def load_input(self, context: "dg.InputContext") -> nw.LazyFrame[Any]:
99
+ """Load feature metadata from [`MetadataStore`][metaxy.MetadataStore].
100
+
101
+ Reads metadata for the feature specified in the asset's `"metaxy/feature"` metadata.
102
+ For partitioned assets, filters to the current partition using the column specified
103
+ in `"partition_by"` metadata.
104
+
105
+ Args:
106
+ context: Dagster input context containing asset metadata.
107
+
108
+ Returns:
109
+ A narwhals LazyFrame with the feature metadata.
110
+ """
111
+ with self.metadata_store:
112
+ context.log.debug(
113
+ f"Reading metadata for Metaxy feature {self._feature_key_from_context(context).to_string()} from {self.metadata_store.display()}"
114
+ )
115
+
116
+ # Build partition filter if applicable
117
+ partition_col = (
118
+ context.definition_metadata.get(DAGSTER_METAXY_PARTITION_KEY)
119
+ if context.has_asset_partitions
120
+ else None
121
+ )
122
+ partition_key = (
123
+ context.asset_partition_key if context.has_asset_partitions else None
124
+ )
125
+ filters = build_partition_filter(
126
+ partition_col, # pyright: ignore[reportArgumentType]
127
+ partition_key,
128
+ )
129
+
130
+ return self.metadata_store.read_metadata(
131
+ feature=self._feature_key_from_context(context),
132
+ filters=filters,
133
+ )
134
+
135
+ def handle_output(self, context: "dg.OutputContext", obj: MetaxyOutput) -> None:
136
+ """Write feature metadata to [`MetadataStore`][metaxy.MetadataStore].
137
+
138
+ Writes the output dataframe to the metadata store for the feature specified
139
+ in the asset's `"metaxy/feature"` metadata. Also logs metadata about the
140
+ feature and store to Dagster's materialization events.
141
+
142
+ If `obj` is `None`, only metadata logging is performed (no data is written).
143
+
144
+ Args:
145
+ context: Dagster output context containing asset metadata.
146
+ obj: A narwhals-compatible dataframe to write, or None to skip writing.
147
+ """
148
+ assert DAGSTER_METAXY_FEATURE_METADATA_KEY in context.definition_metadata, (
149
+ f'Missing `"{DAGSTER_METAXY_FEATURE_METADATA_KEY}"` key in asset metadata'
150
+ )
151
+ key = self._feature_key_from_context(context)
152
+ feature = mx.get_feature_by_key(key)
153
+
154
+ if obj is not None:
155
+ context.log.debug(
156
+ f'Writing metadata for Metaxy feature "{key.to_string()}" into {self.metadata_store.display()}'
157
+ )
158
+ with self.metadata_store.open("write"):
159
+ self.metadata_store.write_metadata(feature=feature, df=obj)
160
+ context.log.debug(
161
+ f'Metadata written for Metaxy feature "{key.to_string()}" into {self.metadata_store.display()}'
162
+ )
163
+ else:
164
+ context.log.debug(
165
+ f'The output corresponds to Metaxy feature "{key.to_string()}" stored in {self.metadata_store.display()}'
166
+ )
167
+
168
+ self._log_output_metadata(context)
169
+
170
+ def _log_output_metadata(self, context: dg.OutputContext):
171
+ with self.metadata_store:
172
+ key = self._feature_key_from_context(context)
173
+
174
+ try:
175
+ feature = mx.get_feature_by_key(key)
176
+
177
+ # Build runtime metadata from data (includes metaxy/feature, metaxy/info,
178
+ # metaxy/store, row count, table preview, etc.)
179
+ lazy_df = self.metadata_store.read_metadata(feature)
180
+ runtime_metadata = build_runtime_feature_metadata(
181
+ key, self.metadata_store, lazy_df, context
182
+ )
183
+ context.add_output_metadata(runtime_metadata)
184
+
185
+ # Get materialized-in-run count
186
+ mat_lazy_df = self.metadata_store.read_metadata(
187
+ feature,
188
+ filters=[nw.col(METAXY_MATERIALIZATION_ID) == context.run_id],
189
+ )
190
+ materialized_in_run = (
191
+ mat_lazy_df.select(feature.spec().id_columns)
192
+ .unique()
193
+ .collect()
194
+ .to_native()
195
+ )
196
+ context.add_output_metadata(
197
+ {"metaxy/materialized_in_run": len(materialized_in_run)}
198
+ )
199
+ except FeatureNotFoundError:
200
+ pass