metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import dagster as dg
|
|
4
|
+
import narwhals as nw
|
|
5
|
+
import pydantic
|
|
6
|
+
from narwhals.typing import IntoFrame
|
|
7
|
+
|
|
8
|
+
import metaxy as mx
|
|
9
|
+
from metaxy.ext.dagster.constants import (
|
|
10
|
+
DAGSTER_METAXY_FEATURE_METADATA_KEY,
|
|
11
|
+
DAGSTER_METAXY_PARTITION_KEY,
|
|
12
|
+
)
|
|
13
|
+
from metaxy.ext.dagster.resources import MetaxyStoreFromConfigResource
|
|
14
|
+
from metaxy.ext.dagster.utils import (
|
|
15
|
+
build_partition_filter,
|
|
16
|
+
build_runtime_feature_metadata,
|
|
17
|
+
)
|
|
18
|
+
from metaxy.metadata_store.exceptions import FeatureNotFoundError
|
|
19
|
+
from metaxy.models.constants import METAXY_MATERIALIZATION_ID
|
|
20
|
+
from metaxy.models.types import ValidatedFeatureKey
|
|
21
|
+
|
|
22
|
+
#: Type alias for MetaxyIOManager output - any narwhals-compatible dataframe or None
|
|
23
|
+
MetaxyOutput = IntoFrame | None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MetaxyIOManager(dg.ConfigurableIOManager):
|
|
27
|
+
"""MetaxyIOManager is a Dagster IOManager that reads and writes data to/from Metaxy's [`MetadataStore`][metaxy.MetadataStore].
|
|
28
|
+
|
|
29
|
+
It automatically attaches Metaxy feature and store metadata to Dagster materialization events and handles partitioned assets.
|
|
30
|
+
|
|
31
|
+
!!! warning "Always set `"metaxy/feature"` Dagster metadata"
|
|
32
|
+
|
|
33
|
+
This IOManager is using `"metaxy/feature"` Dagster metadata key to map Dagster assets into Metaxy features.
|
|
34
|
+
It expects it to be set on the assets being loaded or materialized.
|
|
35
|
+
|
|
36
|
+
??? example
|
|
37
|
+
|
|
38
|
+
```py
|
|
39
|
+
import dagster as dg
|
|
40
|
+
|
|
41
|
+
@dg.asset(
|
|
42
|
+
metadata={
|
|
43
|
+
"metaxy/feature": "my/feature/key",
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
def my_asset():
|
|
47
|
+
...
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
!!! tip "Defining Partitioned Assets"
|
|
51
|
+
|
|
52
|
+
To tell Metaxy which column to use when filtering partitioned assets, set `"partition_by"` Dagster metadata key.
|
|
53
|
+
|
|
54
|
+
??? example
|
|
55
|
+
```py
|
|
56
|
+
import dagster as dg
|
|
57
|
+
|
|
58
|
+
@dg.asset(
|
|
59
|
+
metadata={
|
|
60
|
+
"metaxy/feature": "my/feature/key",
|
|
61
|
+
"partition_by": "date",
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
def my_partitioned_asset():
|
|
65
|
+
...
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
This key is commonly used to configure partitioning behavior by various Dagster IO managers.
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
store: dg.ResourceDependency[MetaxyStoreFromConfigResource] = pydantic.Field(
|
|
73
|
+
default_factory=MetaxyStoreFromConfigResource(name="dev")
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def metadata_store(
|
|
78
|
+
self,
|
|
79
|
+
) -> mx.MetadataStore: # this property mostly exists to fix the type annotation
|
|
80
|
+
return self.store # pyright: ignore[reportReturnType]
|
|
81
|
+
|
|
82
|
+
def _feature_key_from_context(
|
|
83
|
+
self, context: dg.InputContext | dg.OutputContext
|
|
84
|
+
) -> ValidatedFeatureKey:
|
|
85
|
+
if isinstance(context, dg.InputContext):
|
|
86
|
+
assert context.upstream_output is not None
|
|
87
|
+
assert context.upstream_output.metadata is not None
|
|
88
|
+
return mx.ValidatedFeatureKeyAdapter.validate_python(
|
|
89
|
+
context.upstream_output.metadata[DAGSTER_METAXY_FEATURE_METADATA_KEY]
|
|
90
|
+
)
|
|
91
|
+
elif isinstance(context, dg.OutputContext):
|
|
92
|
+
return mx.ValidatedFeatureKeyAdapter.validate_python(
|
|
93
|
+
context.definition_metadata[DAGSTER_METAXY_FEATURE_METADATA_KEY]
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError(f"Unexpected context type: {type(context)}")
|
|
97
|
+
|
|
98
|
+
def load_input(self, context: "dg.InputContext") -> nw.LazyFrame[Any]:
|
|
99
|
+
"""Load feature metadata from [`MetadataStore`][metaxy.MetadataStore].
|
|
100
|
+
|
|
101
|
+
Reads metadata for the feature specified in the asset's `"metaxy/feature"` metadata.
|
|
102
|
+
For partitioned assets, filters to the current partition using the column specified
|
|
103
|
+
in `"partition_by"` metadata.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
context: Dagster input context containing asset metadata.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A narwhals LazyFrame with the feature metadata.
|
|
110
|
+
"""
|
|
111
|
+
with self.metadata_store:
|
|
112
|
+
context.log.debug(
|
|
113
|
+
f"Reading metadata for Metaxy feature {self._feature_key_from_context(context).to_string()} from {self.metadata_store.display()}"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Build partition filter if applicable
|
|
117
|
+
partition_col = (
|
|
118
|
+
context.definition_metadata.get(DAGSTER_METAXY_PARTITION_KEY)
|
|
119
|
+
if context.has_asset_partitions
|
|
120
|
+
else None
|
|
121
|
+
)
|
|
122
|
+
partition_key = (
|
|
123
|
+
context.asset_partition_key if context.has_asset_partitions else None
|
|
124
|
+
)
|
|
125
|
+
filters = build_partition_filter(
|
|
126
|
+
partition_col, # pyright: ignore[reportArgumentType]
|
|
127
|
+
partition_key,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return self.metadata_store.read_metadata(
|
|
131
|
+
feature=self._feature_key_from_context(context),
|
|
132
|
+
filters=filters,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def handle_output(self, context: "dg.OutputContext", obj: MetaxyOutput) -> None:
|
|
136
|
+
"""Write feature metadata to [`MetadataStore`][metaxy.MetadataStore].
|
|
137
|
+
|
|
138
|
+
Writes the output dataframe to the metadata store for the feature specified
|
|
139
|
+
in the asset's `"metaxy/feature"` metadata. Also logs metadata about the
|
|
140
|
+
feature and store to Dagster's materialization events.
|
|
141
|
+
|
|
142
|
+
If `obj` is `None`, only metadata logging is performed (no data is written).
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
context: Dagster output context containing asset metadata.
|
|
146
|
+
obj: A narwhals-compatible dataframe to write, or None to skip writing.
|
|
147
|
+
"""
|
|
148
|
+
assert DAGSTER_METAXY_FEATURE_METADATA_KEY in context.definition_metadata, (
|
|
149
|
+
f'Missing `"{DAGSTER_METAXY_FEATURE_METADATA_KEY}"` key in asset metadata'
|
|
150
|
+
)
|
|
151
|
+
key = self._feature_key_from_context(context)
|
|
152
|
+
feature = mx.get_feature_by_key(key)
|
|
153
|
+
|
|
154
|
+
if obj is not None:
|
|
155
|
+
context.log.debug(
|
|
156
|
+
f'Writing metadata for Metaxy feature "{key.to_string()}" into {self.metadata_store.display()}'
|
|
157
|
+
)
|
|
158
|
+
with self.metadata_store.open("write"):
|
|
159
|
+
self.metadata_store.write_metadata(feature=feature, df=obj)
|
|
160
|
+
context.log.debug(
|
|
161
|
+
f'Metadata written for Metaxy feature "{key.to_string()}" into {self.metadata_store.display()}'
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
context.log.debug(
|
|
165
|
+
f'The output corresponds to Metaxy feature "{key.to_string()}" stored in {self.metadata_store.display()}'
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
self._log_output_metadata(context)
|
|
169
|
+
|
|
170
|
+
def _log_output_metadata(self, context: dg.OutputContext):
|
|
171
|
+
with self.metadata_store:
|
|
172
|
+
key = self._feature_key_from_context(context)
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
feature = mx.get_feature_by_key(key)
|
|
176
|
+
|
|
177
|
+
# Build runtime metadata from data (includes metaxy/feature, metaxy/info,
|
|
178
|
+
# metaxy/store, row count, table preview, etc.)
|
|
179
|
+
lazy_df = self.metadata_store.read_metadata(feature)
|
|
180
|
+
runtime_metadata = build_runtime_feature_metadata(
|
|
181
|
+
key, self.metadata_store, lazy_df, context
|
|
182
|
+
)
|
|
183
|
+
context.add_output_metadata(runtime_metadata)
|
|
184
|
+
|
|
185
|
+
# Get materialized-in-run count
|
|
186
|
+
mat_lazy_df = self.metadata_store.read_metadata(
|
|
187
|
+
feature,
|
|
188
|
+
filters=[nw.col(METAXY_MATERIALIZATION_ID) == context.run_id],
|
|
189
|
+
)
|
|
190
|
+
materialized_in_run = (
|
|
191
|
+
mat_lazy_df.select(feature.spec().id_columns)
|
|
192
|
+
.unique()
|
|
193
|
+
.collect()
|
|
194
|
+
.to_native()
|
|
195
|
+
)
|
|
196
|
+
context.add_output_metadata(
|
|
197
|
+
{"metaxy/materialized_in_run": len(materialized_in_run)}
|
|
198
|
+
)
|
|
199
|
+
except FeatureNotFoundError:
|
|
200
|
+
pass
|