metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
metaxy/models/lineage.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Lineage relationship types for feature dependencies.
|
|
2
|
+
|
|
3
|
+
This module defines how features relate to their upstream dependencies in terms of
|
|
4
|
+
cardinality and transformation patterns. These types make explicit the relationship
|
|
5
|
+
between parent and child features, enabling proper provenance aggregation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict
|
|
16
|
+
from pydantic import Field as PydanticField
|
|
17
|
+
from typing_extensions import Self
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LineageRelationshipType(str, Enum):
|
|
21
|
+
"""Type of lineage relationship between features."""
|
|
22
|
+
|
|
23
|
+
IDENTITY = "1:1"
|
|
24
|
+
AGGREGATION = "N:1"
|
|
25
|
+
EXPANSION = "1:N"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseLineageRelationship(BaseModel, ABC): # pyright: ignore[reportUnsafeMultipleInheritance]
|
|
29
|
+
"""Base class for lineage relationship configurations.
|
|
30
|
+
|
|
31
|
+
Lineage relationships define the cardinality and transformation pattern
|
|
32
|
+
between a child feature and its upstream dependencies.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
model_config = ConfigDict(frozen=True)
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def get_aggregation_columns(
|
|
39
|
+
self,
|
|
40
|
+
target_id_columns: Sequence[str],
|
|
41
|
+
) -> Sequence[str] | None:
|
|
42
|
+
"""Get columns to aggregate on for this relationship type.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
target_id_columns: The target feature's ID columns.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Columns to group by for aggregation, or None if no aggregation needed.
|
|
49
|
+
"""
|
|
50
|
+
raise NotImplementedError
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class IdentityRelationship(BaseLineageRelationship):
|
|
54
|
+
"""One-to-one relationship where each child row maps to exactly one parent row.
|
|
55
|
+
|
|
56
|
+
This is the default relationship type. Parent and child features share the same
|
|
57
|
+
ID columns and have the same cardinality. No aggregation is performed.
|
|
58
|
+
|
|
59
|
+
Examples:
|
|
60
|
+
>>> # Default 1:1 relationship
|
|
61
|
+
>>> IdentityRelationship()
|
|
62
|
+
|
|
63
|
+
>>> # Or use the classmethod
|
|
64
|
+
>>> LineageRelationship.identity()
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
type: Literal[LineageRelationshipType.IDENTITY] = LineageRelationshipType.IDENTITY
|
|
68
|
+
|
|
69
|
+
def get_aggregation_columns(
|
|
70
|
+
self,
|
|
71
|
+
target_id_columns: Sequence[str],
|
|
72
|
+
) -> Sequence[str] | None:
|
|
73
|
+
"""No aggregation needed for identity relationships."""
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class AggregationRelationship(BaseLineageRelationship):
|
|
78
|
+
"""Many-to-one relationship where multiple parent rows aggregate to one child row.
|
|
79
|
+
|
|
80
|
+
Parent features have more granular ID columns than the child. The child aggregates
|
|
81
|
+
multiple parent rows by grouping on a subset of the parent's ID columns.
|
|
82
|
+
|
|
83
|
+
Attributes:
|
|
84
|
+
on: Columns to group by for aggregation. These should be a subset of the
|
|
85
|
+
target feature's ID columns. If not specified, uses all target ID columns.
|
|
86
|
+
|
|
87
|
+
Examples:
|
|
88
|
+
>>> # Aggregate sensor readings by hour
|
|
89
|
+
>>> AggregationRelationship(on=["sensor_id", "hour"])
|
|
90
|
+
>>> # Parent has: sensor_id, hour, minute
|
|
91
|
+
>>> # Child has: sensor_id, hour
|
|
92
|
+
|
|
93
|
+
>>> # Or use the classmethod
|
|
94
|
+
>>> LineageRelationship.aggregation(on=["user_id", "session_id"])
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
type: Literal[LineageRelationshipType.AGGREGATION] = (
|
|
98
|
+
LineageRelationshipType.AGGREGATION
|
|
99
|
+
)
|
|
100
|
+
on: Sequence[str] | None = PydanticField(
|
|
101
|
+
default=None,
|
|
102
|
+
description="Columns to group by for aggregation. Defaults to all target ID columns.",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def get_aggregation_columns(
|
|
106
|
+
self,
|
|
107
|
+
target_id_columns: Sequence[str],
|
|
108
|
+
) -> Sequence[str]:
|
|
109
|
+
"""Get columns to aggregate on."""
|
|
110
|
+
return self.on if self.on is not None else target_id_columns
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ExpansionRelationship(BaseLineageRelationship):
|
|
114
|
+
"""One-to-many relationship where one parent row expands to multiple child rows.
|
|
115
|
+
|
|
116
|
+
Child features have more granular ID columns than the parent. Each parent row
|
|
117
|
+
generates multiple child rows with additional ID columns.
|
|
118
|
+
|
|
119
|
+
Attributes:
|
|
120
|
+
on: Parent ID columns that identify the parent record. Child records with
|
|
121
|
+
the same parent IDs will share the same upstream provenance.
|
|
122
|
+
If not specified, will be inferred from the available columns.
|
|
123
|
+
id_generation_pattern: Optional pattern for generating child IDs.
|
|
124
|
+
Can be "sequential", "hash", or a custom pattern. If not specified,
|
|
125
|
+
the feature's load_input() method is responsible for ID generation.
|
|
126
|
+
|
|
127
|
+
Examples:
|
|
128
|
+
>>> # Video frames from video
|
|
129
|
+
>>> ExpansionRelationship(
|
|
130
|
+
... on=["video_id"], # Parent ID
|
|
131
|
+
... id_generation_pattern="sequential"
|
|
132
|
+
... )
|
|
133
|
+
>>> # Parent has: video_id
|
|
134
|
+
>>> # Child has: video_id, frame_id (generated)
|
|
135
|
+
|
|
136
|
+
>>> # Text chunks from document
|
|
137
|
+
>>> ExpansionRelationship(on=["doc_id"])
|
|
138
|
+
>>> # Parent has: doc_id
|
|
139
|
+
>>> # Child has: doc_id, chunk_id (generated in load_input)
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
type: Literal[LineageRelationshipType.EXPANSION] = LineageRelationshipType.EXPANSION
|
|
143
|
+
on: Sequence[str] = PydanticField(
|
|
144
|
+
...,
|
|
145
|
+
description="Parent ID columns for grouping. Child records with same parent IDs share provenance. Required for expansion relationships.",
|
|
146
|
+
)
|
|
147
|
+
id_generation_pattern: str | None = PydanticField(
|
|
148
|
+
default=None,
|
|
149
|
+
description="Pattern for generating child IDs. If None, handled by load_input().",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def get_aggregation_columns(
|
|
153
|
+
self,
|
|
154
|
+
target_id_columns: Sequence[str],
|
|
155
|
+
) -> Sequence[str] | None:
|
|
156
|
+
"""Get aggregation columns for the joiner phase.
|
|
157
|
+
|
|
158
|
+
For expansion relationships, returns None because aggregation
|
|
159
|
+
happens during diff resolution, not during joining. The joiner
|
|
160
|
+
should pass through all parent records without aggregation.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
target_id_columns: The target (child) feature's ID columns.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
None - no aggregation during join phase for expansion relationships.
|
|
167
|
+
"""
|
|
168
|
+
# Expansion relationships don't aggregate during join phase
|
|
169
|
+
# Aggregation happens later during diff resolution
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# Discriminated union type for all lineage relationships
|
|
174
|
+
LineageRelationshipUnion = (
|
|
175
|
+
IdentityRelationship | AggregationRelationship | ExpansionRelationship
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class LineageRelationship(BaseModel):
|
|
180
|
+
"""Wrapper class for lineage relationship configurations with convenient constructors.
|
|
181
|
+
|
|
182
|
+
This provides a cleaner API for creating lineage relationships while maintaining
|
|
183
|
+
type safety through discriminated unions.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
model_config = ConfigDict(frozen=True)
|
|
187
|
+
|
|
188
|
+
relationship: LineageRelationshipUnion = PydanticField(..., discriminator="type")
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def identity(cls) -> Self:
|
|
192
|
+
"""Create an identity (1:1) relationship.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Configured LineageRelationship for 1:1 relationship.
|
|
196
|
+
|
|
197
|
+
Examples:
|
|
198
|
+
>>> spec = FeatureSpec(
|
|
199
|
+
... key="feature",
|
|
200
|
+
... lineage=LineageRelationship.identity()
|
|
201
|
+
... )
|
|
202
|
+
"""
|
|
203
|
+
return cls(relationship=IdentityRelationship())
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def aggregation(cls, on: Sequence[str] | None = None) -> Self:
|
|
207
|
+
"""Create an aggregation (N:1) relationship.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
on: Columns to group by for aggregation. If None, uses all target ID columns.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Configured LineageRelationship for N:1 relationship.
|
|
214
|
+
|
|
215
|
+
Examples:
|
|
216
|
+
>>> # Aggregate on specific columns
|
|
217
|
+
>>> spec = FeatureSpec(
|
|
218
|
+
... key="hourly_stats",
|
|
219
|
+
... id_columns=["sensor_id", "hour"],
|
|
220
|
+
... lineage=LineageRelationship.aggregation(on=["sensor_id", "hour"])
|
|
221
|
+
... )
|
|
222
|
+
|
|
223
|
+
>>> # Aggregate on all ID columns (default)
|
|
224
|
+
>>> spec = FeatureSpec(
|
|
225
|
+
... key="user_summary",
|
|
226
|
+
... id_columns=["user_id"],
|
|
227
|
+
... lineage=LineageRelationship.aggregation()
|
|
228
|
+
... )
|
|
229
|
+
"""
|
|
230
|
+
return cls(relationship=AggregationRelationship(on=on))
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def expansion(
|
|
234
|
+
cls,
|
|
235
|
+
on: Sequence[str],
|
|
236
|
+
id_generation_pattern: str | None = None,
|
|
237
|
+
) -> Self:
|
|
238
|
+
"""Create an expansion (1:N) relationship.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
on: Parent ID columns that identify the parent record. Child records with
|
|
242
|
+
the same parent IDs will share the same upstream provenance.
|
|
243
|
+
Required - must explicitly specify which columns link parent to child.
|
|
244
|
+
id_generation_pattern: Pattern for generating child IDs.
|
|
245
|
+
Can be "sequential", "hash", or custom. If None, handled by load_input().
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Configured LineageRelationship for 1:N relationship.
|
|
249
|
+
|
|
250
|
+
Examples:
|
|
251
|
+
>>> # Sequential ID generation with explicit parent ID
|
|
252
|
+
>>> spec = FeatureSpec(
|
|
253
|
+
... key="video_frames",
|
|
254
|
+
... id_columns=["video_id", "frame_id"],
|
|
255
|
+
... lineage=LineageRelationship.expansion(
|
|
256
|
+
... on=["video_id"],
|
|
257
|
+
... id_generation_pattern="sequential"
|
|
258
|
+
... )
|
|
259
|
+
... )
|
|
260
|
+
|
|
261
|
+
>>> # Custom ID generation in load_input()
|
|
262
|
+
>>> spec = FeatureSpec(
|
|
263
|
+
... key="text_chunks",
|
|
264
|
+
... id_columns=["doc_id", "chunk_id"],
|
|
265
|
+
... lineage=LineageRelationship.expansion(on=["doc_id"])
|
|
266
|
+
... )
|
|
267
|
+
"""
|
|
268
|
+
return cls(
|
|
269
|
+
relationship=ExpansionRelationship(
|
|
270
|
+
on=on, id_generation_pattern=id_generation_pattern
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def get_aggregation_columns(
|
|
275
|
+
self, target_id_columns: Sequence[str]
|
|
276
|
+
) -> Sequence[str] | None:
|
|
277
|
+
"""Get columns to aggregate on for this relationship.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
target_id_columns: The target feature's ID columns.
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Columns to group by for aggregation, or None if no aggregation needed.
|
|
284
|
+
"""
|
|
285
|
+
return self.relationship.get_aggregation_columns(target_id_columns)
|
metaxy/models/plan.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from functools import cached_property
|
|
3
|
+
|
|
4
|
+
import pydantic
|
|
5
|
+
|
|
6
|
+
from metaxy.models.bases import FrozenBaseModel
|
|
7
|
+
from metaxy.models.feature_spec import FeatureDep, FeatureKey, FeatureSpec
|
|
8
|
+
from metaxy.models.field import (
|
|
9
|
+
FieldDep,
|
|
10
|
+
FieldKey,
|
|
11
|
+
FieldSpec,
|
|
12
|
+
SpecialFieldDep,
|
|
13
|
+
)
|
|
14
|
+
from metaxy.models.fields_mapping import FieldsMappingResolutionContext
|
|
15
|
+
from metaxy.models.types import CoercibleToFieldKey, ValidatedFieldKeyAdapter
|
|
16
|
+
|
|
17
|
+
# Rebuild the model now that FeatureSpec is available
|
|
18
|
+
FieldsMappingResolutionContext.model_rebuild()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FQFieldKey(FrozenBaseModel):
|
|
22
|
+
field: FieldKey
|
|
23
|
+
feature: FeatureKey
|
|
24
|
+
|
|
25
|
+
def to_string(self) -> str:
|
|
26
|
+
return f"{self.feature.to_string()}.{self.field.to_string()}"
|
|
27
|
+
|
|
28
|
+
def __repr__(self) -> str:
|
|
29
|
+
return self.to_string()
|
|
30
|
+
|
|
31
|
+
def __lt__(self, other: "FQFieldKey") -> bool:
|
|
32
|
+
"""Enable sorting of FQFieldKey objects."""
|
|
33
|
+
return self.to_string() < other.to_string()
|
|
34
|
+
|
|
35
|
+
def __le__(self, other: "FQFieldKey") -> bool:
|
|
36
|
+
"""Enable sorting of FQFieldKey objects."""
|
|
37
|
+
return self.to_string() <= other.to_string()
|
|
38
|
+
|
|
39
|
+
def __gt__(self, other: "FQFieldKey") -> bool:
|
|
40
|
+
"""Enable sorting of FQFieldKey objects."""
|
|
41
|
+
return self.to_string() > other.to_string()
|
|
42
|
+
|
|
43
|
+
def __ge__(self, other: "FQFieldKey") -> bool:
|
|
44
|
+
"""Enable sorting of FQFieldKey objects."""
|
|
45
|
+
return self.to_string() >= other.to_string()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FeaturePlan(FrozenBaseModel):
|
|
49
|
+
"""Slice of the feature graph that includes a given feature and its parents"""
|
|
50
|
+
|
|
51
|
+
feature: pydantic.SkipValidation[FeatureSpec]
|
|
52
|
+
deps: pydantic.SkipValidation[list[FeatureSpec] | None]
|
|
53
|
+
feature_deps: list[FeatureDep] | None = (
|
|
54
|
+
None # The actual dependency specifications with field mappings
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@cached_property
|
|
58
|
+
def parent_features_by_key(
|
|
59
|
+
self,
|
|
60
|
+
) -> Mapping[FeatureKey, FeatureSpec]:
|
|
61
|
+
return {feature.key: feature for feature in self.deps or []}
|
|
62
|
+
|
|
63
|
+
@cached_property
|
|
64
|
+
def all_parent_fields_by_key(self) -> Mapping[FQFieldKey, FieldSpec]:
|
|
65
|
+
res: dict[FQFieldKey, FieldSpec] = {}
|
|
66
|
+
|
|
67
|
+
for feature in self.deps or []:
|
|
68
|
+
for field in feature.fields:
|
|
69
|
+
res[FQFieldKey(field=field.key, feature=feature.key)] = field
|
|
70
|
+
|
|
71
|
+
return res
|
|
72
|
+
|
|
73
|
+
@cached_property
|
|
74
|
+
def parent_fields_by_key(self) -> Mapping[FQFieldKey, FieldSpec]:
|
|
75
|
+
res: dict[FQFieldKey, FieldSpec] = {}
|
|
76
|
+
|
|
77
|
+
for field in self.feature.fields:
|
|
78
|
+
res.update(self.get_parent_fields_for_field(field.key))
|
|
79
|
+
|
|
80
|
+
return res
|
|
81
|
+
|
|
82
|
+
@cached_property
|
|
83
|
+
def parent_fields_by_feature_key(self) -> Mapping[FeatureKey, set[FieldKey]]:
|
|
84
|
+
res: dict[FeatureKey, set[FieldKey]] = {}
|
|
85
|
+
|
|
86
|
+
if self.deps:
|
|
87
|
+
for feature in self.deps:
|
|
88
|
+
res[feature.key] = set([f.key for f in feature.fields])
|
|
89
|
+
|
|
90
|
+
return res
|
|
91
|
+
|
|
92
|
+
def get_parent_fields_for_field(
|
|
93
|
+
self, key: CoercibleToFieldKey
|
|
94
|
+
) -> Mapping[FQFieldKey, FieldSpec]:
|
|
95
|
+
"""Get parent fields for a given field key.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
key: Field key to get parent fields for. Accepts string, sequence, or FieldKey.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Mapping of fully qualified field keys to their specs.
|
|
102
|
+
"""
|
|
103
|
+
# Validate and coerce the key
|
|
104
|
+
validated_key = ValidatedFieldKeyAdapter.validate_python(key)
|
|
105
|
+
|
|
106
|
+
res = {}
|
|
107
|
+
|
|
108
|
+
field = self.feature.fields_by_key[validated_key]
|
|
109
|
+
|
|
110
|
+
# Get resolved dependencies (combining automatic mapping and explicit deps)
|
|
111
|
+
resolved_deps = self._resolve_field_deps(field)
|
|
112
|
+
|
|
113
|
+
for field_dep in resolved_deps:
|
|
114
|
+
if field_dep.fields == SpecialFieldDep.ALL:
|
|
115
|
+
# we depend on all fields of the corresponding upstream feature
|
|
116
|
+
for parent_field in self.parent_features_by_key[
|
|
117
|
+
field_dep.feature
|
|
118
|
+
].fields:
|
|
119
|
+
res[
|
|
120
|
+
FQFieldKey(
|
|
121
|
+
field=parent_field.key,
|
|
122
|
+
feature=field_dep.feature,
|
|
123
|
+
)
|
|
124
|
+
] = parent_field
|
|
125
|
+
|
|
126
|
+
elif isinstance(field_dep, FieldDep):
|
|
127
|
+
#
|
|
128
|
+
for field_key in field_dep.fields:
|
|
129
|
+
fq_key = FQFieldKey(
|
|
130
|
+
field=field_key,
|
|
131
|
+
feature=field_dep.feature,
|
|
132
|
+
)
|
|
133
|
+
res[fq_key] = self.all_parent_fields_by_key[fq_key]
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(f"Unsupported dependency type: {type(field_dep)}")
|
|
136
|
+
|
|
137
|
+
return res
|
|
138
|
+
|
|
139
|
+
def _resolve_field_deps(self, field: FieldSpec) -> list[FieldDep]:
|
|
140
|
+
"""Resolve field dependencies by combining explicit deps and automatic mapping.
|
|
141
|
+
|
|
142
|
+
Apply field mappings from the FeatureDep and add explicit deps.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
if not self.feature_deps:
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Check if field has explicit deps
|
|
149
|
+
if field.deps and field.deps != []: # Check for non-empty list
|
|
150
|
+
if isinstance(field.deps, SpecialFieldDep):
|
|
151
|
+
# If it's SpecialFieldDep.ALL, return ALL for all upstream features
|
|
152
|
+
return [
|
|
153
|
+
FieldDep(feature=dep.key, fields=SpecialFieldDep.ALL)
|
|
154
|
+
for dep in (self.deps or [])
|
|
155
|
+
]
|
|
156
|
+
else:
|
|
157
|
+
# Use only the explicit deps, no automatic mapping
|
|
158
|
+
return field.deps
|
|
159
|
+
|
|
160
|
+
# No explicit deps - use automatic mapping
|
|
161
|
+
field_deps = []
|
|
162
|
+
|
|
163
|
+
for feature_dep in self.feature_deps:
|
|
164
|
+
# Resolve field mapping for this specific upstream feature
|
|
165
|
+
# Get the upstream feature spec
|
|
166
|
+
upstream_feature = self.parent_features_by_key.get(feature_dep.feature)
|
|
167
|
+
if not upstream_feature:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
# Create resolution context
|
|
171
|
+
context = FieldsMappingResolutionContext(
|
|
172
|
+
field_key=field.key, upstream_feature=upstream_feature
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
mapped_deps = feature_dep.fields_mapping.resolve_field_deps(context)
|
|
176
|
+
|
|
177
|
+
if mapped_deps:
|
|
178
|
+
# Add a single FieldDep with all mapped fields
|
|
179
|
+
field_deps.append(
|
|
180
|
+
FieldDep(feature=feature_dep.feature, fields=list(mapped_deps))
|
|
181
|
+
)
|
|
182
|
+
# Note: If mapped_deps is empty (e.g., feature excluded),
|
|
183
|
+
# we don't add any dependency for this feature
|
|
184
|
+
|
|
185
|
+
if field_deps:
|
|
186
|
+
return field_deps
|
|
187
|
+
else:
|
|
188
|
+
raise RuntimeError(
|
|
189
|
+
f"No upstream fields found for field {field} of feature {self.feature}. Please either specify explicit dependencies on it's FieldSpec or ensure that at least one FeatureDep on the FeatureSpec has a valid field mapping."
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
@cached_property
|
|
193
|
+
def field_dependencies(
|
|
194
|
+
self,
|
|
195
|
+
) -> Mapping[FieldKey, Mapping[FeatureKey, list[FieldKey]]]:
|
|
196
|
+
"""Get dependencies for each field in this feature.
|
|
197
|
+
|
|
198
|
+
Returns a mapping from field key to its upstream dependencies.
|
|
199
|
+
Each dependency maps an upstream feature key to a list of field keys
|
|
200
|
+
that this field depends on.
|
|
201
|
+
|
|
202
|
+
This is the format needed by DataVersionResolver.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Mapping of field keys to their dependency specifications.
|
|
206
|
+
Format: {field_key: {upstream_feature_key: [upstream_field_keys]}}
|
|
207
|
+
"""
|
|
208
|
+
result: dict[FieldKey, dict[FeatureKey, list[FieldKey]]] = {}
|
|
209
|
+
|
|
210
|
+
for field in self.feature.fields:
|
|
211
|
+
field_deps: dict[FeatureKey, list[FieldKey]] = {}
|
|
212
|
+
|
|
213
|
+
# Get resolved dependencies (combining automatic mapping and explicit deps)
|
|
214
|
+
resolved_deps = self._resolve_field_deps(field)
|
|
215
|
+
|
|
216
|
+
# Specific dependencies defined
|
|
217
|
+
for field_dep in resolved_deps:
|
|
218
|
+
feature_key = field_dep.feature
|
|
219
|
+
|
|
220
|
+
if field_dep.fields == SpecialFieldDep.ALL:
|
|
221
|
+
# All fields from this upstream feature
|
|
222
|
+
upstream_feature_spec = self.parent_features_by_key[feature_key]
|
|
223
|
+
field_deps[feature_key] = [
|
|
224
|
+
c.key for c in upstream_feature_spec.fields
|
|
225
|
+
]
|
|
226
|
+
elif isinstance(field_dep.fields, list):
|
|
227
|
+
# Specific fields
|
|
228
|
+
field_deps[feature_key] = field_dep.fields
|
|
229
|
+
|
|
230
|
+
result[field.key] = field_deps
|
|
231
|
+
|
|
232
|
+
return result
|