metaxy 0.0.1.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaxy/__init__.py +170 -0
- metaxy/_packaging.py +96 -0
- metaxy/_testing/__init__.py +55 -0
- metaxy/_testing/config.py +43 -0
- metaxy/_testing/metaxy_project.py +780 -0
- metaxy/_testing/models.py +111 -0
- metaxy/_testing/parametric/__init__.py +13 -0
- metaxy/_testing/parametric/metadata.py +664 -0
- metaxy/_testing/pytest_helpers.py +74 -0
- metaxy/_testing/runbook.py +533 -0
- metaxy/_utils.py +35 -0
- metaxy/_version.py +1 -0
- metaxy/cli/app.py +97 -0
- metaxy/cli/console.py +13 -0
- metaxy/cli/context.py +167 -0
- metaxy/cli/graph.py +610 -0
- metaxy/cli/graph_diff.py +290 -0
- metaxy/cli/list.py +46 -0
- metaxy/cli/metadata.py +317 -0
- metaxy/cli/migrations.py +999 -0
- metaxy/cli/utils.py +268 -0
- metaxy/config.py +680 -0
- metaxy/entrypoints.py +296 -0
- metaxy/ext/__init__.py +1 -0
- metaxy/ext/dagster/__init__.py +54 -0
- metaxy/ext/dagster/constants.py +10 -0
- metaxy/ext/dagster/dagster_type.py +156 -0
- metaxy/ext/dagster/io_manager.py +200 -0
- metaxy/ext/dagster/metaxify.py +512 -0
- metaxy/ext/dagster/observable.py +115 -0
- metaxy/ext/dagster/resources.py +27 -0
- metaxy/ext/dagster/selection.py +73 -0
- metaxy/ext/dagster/table_metadata.py +417 -0
- metaxy/ext/dagster/utils.py +462 -0
- metaxy/ext/sqlalchemy/__init__.py +23 -0
- metaxy/ext/sqlalchemy/config.py +29 -0
- metaxy/ext/sqlalchemy/plugin.py +353 -0
- metaxy/ext/sqlmodel/__init__.py +13 -0
- metaxy/ext/sqlmodel/config.py +29 -0
- metaxy/ext/sqlmodel/plugin.py +499 -0
- metaxy/graph/__init__.py +29 -0
- metaxy/graph/describe.py +325 -0
- metaxy/graph/diff/__init__.py +21 -0
- metaxy/graph/diff/diff_models.py +446 -0
- metaxy/graph/diff/differ.py +769 -0
- metaxy/graph/diff/models.py +443 -0
- metaxy/graph/diff/rendering/__init__.py +18 -0
- metaxy/graph/diff/rendering/base.py +323 -0
- metaxy/graph/diff/rendering/cards.py +188 -0
- metaxy/graph/diff/rendering/formatter.py +805 -0
- metaxy/graph/diff/rendering/graphviz.py +246 -0
- metaxy/graph/diff/rendering/mermaid.py +326 -0
- metaxy/graph/diff/rendering/rich.py +169 -0
- metaxy/graph/diff/rendering/theme.py +48 -0
- metaxy/graph/diff/traversal.py +247 -0
- metaxy/graph/status.py +329 -0
- metaxy/graph/utils.py +58 -0
- metaxy/metadata_store/__init__.py +32 -0
- metaxy/metadata_store/_ducklake_support.py +419 -0
- metaxy/metadata_store/base.py +1792 -0
- metaxy/metadata_store/bigquery.py +354 -0
- metaxy/metadata_store/clickhouse.py +184 -0
- metaxy/metadata_store/delta.py +371 -0
- metaxy/metadata_store/duckdb.py +446 -0
- metaxy/metadata_store/exceptions.py +61 -0
- metaxy/metadata_store/ibis.py +542 -0
- metaxy/metadata_store/lancedb.py +391 -0
- metaxy/metadata_store/memory.py +292 -0
- metaxy/metadata_store/system/__init__.py +57 -0
- metaxy/metadata_store/system/events.py +264 -0
- metaxy/metadata_store/system/keys.py +9 -0
- metaxy/metadata_store/system/models.py +129 -0
- metaxy/metadata_store/system/storage.py +957 -0
- metaxy/metadata_store/types.py +10 -0
- metaxy/metadata_store/utils.py +104 -0
- metaxy/metadata_store/warnings.py +36 -0
- metaxy/migrations/__init__.py +32 -0
- metaxy/migrations/detector.py +291 -0
- metaxy/migrations/executor.py +516 -0
- metaxy/migrations/generator.py +319 -0
- metaxy/migrations/loader.py +231 -0
- metaxy/migrations/models.py +528 -0
- metaxy/migrations/ops.py +447 -0
- metaxy/models/__init__.py +0 -0
- metaxy/models/bases.py +12 -0
- metaxy/models/constants.py +139 -0
- metaxy/models/feature.py +1335 -0
- metaxy/models/feature_spec.py +338 -0
- metaxy/models/field.py +263 -0
- metaxy/models/fields_mapping.py +307 -0
- metaxy/models/filter_expression.py +297 -0
- metaxy/models/lineage.py +285 -0
- metaxy/models/plan.py +232 -0
- metaxy/models/types.py +475 -0
- metaxy/py.typed +0 -0
- metaxy/utils/__init__.py +1 -0
- metaxy/utils/constants.py +2 -0
- metaxy/utils/exceptions.py +23 -0
- metaxy/utils/hashing.py +230 -0
- metaxy/versioning/__init__.py +31 -0
- metaxy/versioning/engine.py +656 -0
- metaxy/versioning/feature_dep_transformer.py +151 -0
- metaxy/versioning/ibis.py +249 -0
- metaxy/versioning/lineage_handler.py +205 -0
- metaxy/versioning/polars.py +189 -0
- metaxy/versioning/renamed_df.py +35 -0
- metaxy/versioning/types.py +63 -0
- metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
- metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
- metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
- metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""System table components for metadata store.
|
|
2
|
+
|
|
3
|
+
This package provides system table functionality for Metaxy:
|
|
4
|
+
- events: Migration event types with builder pattern
|
|
5
|
+
- keys: System table keys and constants
|
|
6
|
+
- models: Pydantic models and schemas for system tables
|
|
7
|
+
- storage: Storage layer for system tables
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from metaxy.metadata_store.system.events import (
|
|
11
|
+
COL_EVENT_TYPE,
|
|
12
|
+
COL_EXECUTION_ID,
|
|
13
|
+
COL_FEATURE_KEY,
|
|
14
|
+
COL_PAYLOAD,
|
|
15
|
+
COL_PROJECT,
|
|
16
|
+
COL_TIMESTAMP,
|
|
17
|
+
Event,
|
|
18
|
+
EventType,
|
|
19
|
+
MigrationStatus,
|
|
20
|
+
PayloadType,
|
|
21
|
+
)
|
|
22
|
+
from metaxy.metadata_store.system.keys import (
|
|
23
|
+
EVENTS_KEY,
|
|
24
|
+
FEATURE_VERSIONS_KEY,
|
|
25
|
+
METAXY_SYSTEM_KEY_PREFIX,
|
|
26
|
+
)
|
|
27
|
+
from metaxy.metadata_store.system.models import (
|
|
28
|
+
FEATURE_VERSIONS_SCHEMA,
|
|
29
|
+
FeatureVersionsModel,
|
|
30
|
+
)
|
|
31
|
+
from metaxy.metadata_store.system.storage import (
|
|
32
|
+
SystemTableStorage,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
# Events
|
|
37
|
+
"Event",
|
|
38
|
+
"EventType",
|
|
39
|
+
"MigrationStatus",
|
|
40
|
+
"PayloadType",
|
|
41
|
+
# Column names
|
|
42
|
+
"COL_PROJECT",
|
|
43
|
+
"COL_EXECUTION_ID",
|
|
44
|
+
"COL_EVENT_TYPE",
|
|
45
|
+
"COL_TIMESTAMP",
|
|
46
|
+
"COL_FEATURE_KEY",
|
|
47
|
+
"COL_PAYLOAD",
|
|
48
|
+
# Keys
|
|
49
|
+
"METAXY_SYSTEM_KEY_PREFIX",
|
|
50
|
+
"FEATURE_VERSIONS_KEY",
|
|
51
|
+
"EVENTS_KEY",
|
|
52
|
+
# Models
|
|
53
|
+
"FEATURE_VERSIONS_SCHEMA",
|
|
54
|
+
"FeatureVersionsModel",
|
|
55
|
+
# Storage
|
|
56
|
+
"SystemTableStorage",
|
|
57
|
+
]
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Annotated, Literal
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EventType(str, Enum):
|
|
12
|
+
"""Metaxy event types."""
|
|
13
|
+
|
|
14
|
+
MIGRATION_STARTED = "migration_started"
|
|
15
|
+
MIGRATION_COMPLETED = "migration_completed"
|
|
16
|
+
MIGRATION_FAILED = "migration_failed"
|
|
17
|
+
FEATURE_MIGRATION_STARTED = "feature_migration_started"
|
|
18
|
+
FEATURE_MIGRATION_COMPLETED = "feature_migration_completed"
|
|
19
|
+
FEATURE_MIGRATION_FAILED = "feature_migration_failed"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PayloadType(str, Enum):
|
|
23
|
+
"""Payload types for event payloads."""
|
|
24
|
+
|
|
25
|
+
EMPTY = "empty"
|
|
26
|
+
ERROR = "error"
|
|
27
|
+
ROWS_AFFECTED = "rows_affected"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MigrationStatus(str, Enum):
|
|
31
|
+
"""Migration execution status."""
|
|
32
|
+
|
|
33
|
+
NOT_STARTED = "not_started"
|
|
34
|
+
IN_PROGRESS = "in_progress"
|
|
35
|
+
COMPLETED = "completed"
|
|
36
|
+
FAILED = "failed"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Column name constants (to avoid drift between Event model and storage)
|
|
40
|
+
COL_PROJECT = "project"
|
|
41
|
+
COL_EXECUTION_ID = "execution_id"
|
|
42
|
+
COL_EVENT_TYPE = "event_type"
|
|
43
|
+
COL_TIMESTAMP = "timestamp"
|
|
44
|
+
COL_FEATURE_KEY = "feature_key"
|
|
45
|
+
COL_PAYLOAD = "payload"
|
|
46
|
+
|
|
47
|
+
# Events schema (for Polars storage)
|
|
48
|
+
EVENTS_SCHEMA = {
|
|
49
|
+
COL_PROJECT: pl.String,
|
|
50
|
+
COL_EXECUTION_ID: pl.String,
|
|
51
|
+
COL_EVENT_TYPE: pl.Enum(EventType),
|
|
52
|
+
COL_TIMESTAMP: pl.Datetime("us"),
|
|
53
|
+
COL_FEATURE_KEY: pl.String,
|
|
54
|
+
COL_PAYLOAD: pl.String, # JSON string with arbitrary event data
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class EmptyPayload(BaseModel):
|
|
59
|
+
"""Empty payload for events with no additional data."""
|
|
60
|
+
|
|
61
|
+
model_config = ConfigDict(frozen=True)
|
|
62
|
+
type: Literal[PayloadType.EMPTY] = PayloadType.EMPTY
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ErrorPayload(BaseModel):
|
|
66
|
+
"""Payload for events with error information."""
|
|
67
|
+
|
|
68
|
+
model_config = ConfigDict(frozen=True)
|
|
69
|
+
type: Literal[PayloadType.ERROR] = PayloadType.ERROR
|
|
70
|
+
error_message: str
|
|
71
|
+
rows_affected: int | None = None # Optional: rows processed before failure
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class RowsAffectedPayload(BaseModel):
|
|
75
|
+
"""Payload for events tracking rows affected."""
|
|
76
|
+
|
|
77
|
+
model_config = ConfigDict(frozen=True)
|
|
78
|
+
type: Literal[PayloadType.ROWS_AFFECTED] = PayloadType.ROWS_AFFECTED
|
|
79
|
+
rows_affected: int
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Discriminated union for payloads
|
|
83
|
+
Payload = EmptyPayload | ErrorPayload | RowsAffectedPayload
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Event(BaseModel):
|
|
87
|
+
"""Migration event with typed payload.
|
|
88
|
+
|
|
89
|
+
All event types use this single class and are distinguished by event_type and payload.type fields.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
model_config = ConfigDict(frozen=True)
|
|
93
|
+
|
|
94
|
+
event_type: EventType
|
|
95
|
+
project: str
|
|
96
|
+
execution_id: str # Generic ID for the execution (migration, job, etc.)
|
|
97
|
+
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
98
|
+
feature_key: str | None = (
|
|
99
|
+
None # Feature key for feature-level events, empty for execution-level events
|
|
100
|
+
)
|
|
101
|
+
payload: Annotated[
|
|
102
|
+
Payload, Field(default_factory=EmptyPayload, discriminator="type")
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
def to_polars(self) -> pl.DataFrame:
|
|
106
|
+
"""Convert this model instance to a single-row Polars DataFrame.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Polars DataFrame with one row matching EVENTS_SCHEMA
|
|
110
|
+
"""
|
|
111
|
+
data = {
|
|
112
|
+
COL_PROJECT: self.project,
|
|
113
|
+
COL_EXECUTION_ID: self.execution_id,
|
|
114
|
+
COL_EVENT_TYPE: self.event_type,
|
|
115
|
+
COL_TIMESTAMP: self.timestamp,
|
|
116
|
+
COL_FEATURE_KEY: self.feature_key,
|
|
117
|
+
COL_PAYLOAD: self.payload.model_dump_json(),
|
|
118
|
+
}
|
|
119
|
+
return pl.DataFrame([data], schema=EVENTS_SCHEMA)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def migration_started(cls, project: str, migration_id: str) -> Event:
|
|
123
|
+
"""Create a migration started event.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
project: Project name
|
|
127
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Event with started type and empty payload
|
|
131
|
+
"""
|
|
132
|
+
return cls(
|
|
133
|
+
project=project,
|
|
134
|
+
execution_id=migration_id,
|
|
135
|
+
event_type=EventType.MIGRATION_STARTED,
|
|
136
|
+
payload=EmptyPayload(),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def migration_completed(cls, project: str, migration_id: str) -> Event:
|
|
141
|
+
"""Create a migration completed event.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
project: Project name
|
|
145
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Event with migration_completed type and empty payload
|
|
149
|
+
"""
|
|
150
|
+
return cls(
|
|
151
|
+
project=project,
|
|
152
|
+
execution_id=migration_id,
|
|
153
|
+
event_type=EventType.MIGRATION_COMPLETED,
|
|
154
|
+
payload=EmptyPayload(),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@classmethod
|
|
158
|
+
def migration_failed(
|
|
159
|
+
cls,
|
|
160
|
+
project: str,
|
|
161
|
+
migration_id: str,
|
|
162
|
+
error_message: str,
|
|
163
|
+
rows_affected: int | None = None,
|
|
164
|
+
) -> Event:
|
|
165
|
+
"""Create a migration failed event.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
project: Project name
|
|
169
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
170
|
+
error_message: Error message describing the failure
|
|
171
|
+
rows_affected: Optional number of rows processed before failure
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Event with migration_failed type and error payload
|
|
175
|
+
"""
|
|
176
|
+
return cls(
|
|
177
|
+
project=project,
|
|
178
|
+
execution_id=migration_id,
|
|
179
|
+
event_type=EventType.MIGRATION_FAILED,
|
|
180
|
+
payload=ErrorPayload(
|
|
181
|
+
error_message=error_message, rows_affected=rows_affected
|
|
182
|
+
),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def feature_migration_started(
|
|
187
|
+
cls, project: str, migration_id: str, feature_key: str
|
|
188
|
+
) -> Event:
|
|
189
|
+
"""Create a feature started event.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
project: Project name
|
|
193
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
194
|
+
feature_key: Feature key being processed
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Event with feature_started type and empty payload
|
|
198
|
+
"""
|
|
199
|
+
return cls(
|
|
200
|
+
project=project,
|
|
201
|
+
execution_id=migration_id,
|
|
202
|
+
event_type=EventType.FEATURE_MIGRATION_STARTED,
|
|
203
|
+
payload=EmptyPayload(),
|
|
204
|
+
feature_key=feature_key,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def feature_migration_completed(
|
|
209
|
+
cls, project: str, migration_id: str, feature_key: str, rows_affected: int
|
|
210
|
+
) -> Event:
|
|
211
|
+
"""Create a feature completed event (successful).
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
project: Project name
|
|
215
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
216
|
+
feature_key: Feature key that was processed
|
|
217
|
+
rows_affected: Number of rows affected
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Event with feature_completed type and rows_affected payload
|
|
221
|
+
"""
|
|
222
|
+
return cls(
|
|
223
|
+
project=project,
|
|
224
|
+
execution_id=migration_id,
|
|
225
|
+
event_type=EventType.FEATURE_MIGRATION_COMPLETED,
|
|
226
|
+
feature_key=feature_key,
|
|
227
|
+
payload=RowsAffectedPayload(rows_affected=rows_affected),
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def feature_migration_failed(
|
|
232
|
+
cls,
|
|
233
|
+
project: str,
|
|
234
|
+
migration_id: str,
|
|
235
|
+
feature_key: str,
|
|
236
|
+
error_message: str,
|
|
237
|
+
rows_affected: int | None = None,
|
|
238
|
+
) -> Event:
|
|
239
|
+
"""Create a feature failed event.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
project: Project name
|
|
243
|
+
migration_id: Migration ID (maps to execution_id internally)
|
|
244
|
+
feature_key: Feature key that failed
|
|
245
|
+
error_message: Error message describing the failure
|
|
246
|
+
rows_affected: Optional number of rows processed before failure
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Event with feature_failed type and error payload
|
|
250
|
+
"""
|
|
251
|
+
return cls(
|
|
252
|
+
project=project,
|
|
253
|
+
execution_id=migration_id,
|
|
254
|
+
event_type=EventType.FEATURE_MIGRATION_FAILED,
|
|
255
|
+
feature_key=feature_key,
|
|
256
|
+
payload=ErrorPayload(
|
|
257
|
+
error_message=error_message, rows_affected=rows_affected
|
|
258
|
+
),
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Shorter aliases for convenience
|
|
262
|
+
feature_started = feature_migration_started
|
|
263
|
+
feature_completed = feature_migration_completed
|
|
264
|
+
feature_failed = feature_migration_failed
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""System table keys and constants."""
|
|
2
|
+
|
|
3
|
+
from metaxy.models.types import FeatureKey
|
|
4
|
+
|
|
5
|
+
METAXY_SYSTEM_KEY_PREFIX = "metaxy-system"
|
|
6
|
+
|
|
7
|
+
# System table keys
|
|
8
|
+
FEATURE_VERSIONS_KEY = FeatureKey([METAXY_SYSTEM_KEY_PREFIX, "feature_versions"])
|
|
9
|
+
EVENTS_KEY = FeatureKey([METAXY_SYSTEM_KEY_PREFIX, "events"])
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Pydantic models for system tables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from pydantic import BaseModel, Field, field_validator
|
|
9
|
+
|
|
10
|
+
from metaxy.metadata_store.system import FEATURE_VERSIONS_KEY
|
|
11
|
+
from metaxy.metadata_store.system.events import EVENTS_SCHEMA
|
|
12
|
+
from metaxy.metadata_store.system.keys import EVENTS_KEY
|
|
13
|
+
from metaxy.models.constants import (
|
|
14
|
+
METAXY_FEATURE_SPEC_VERSION,
|
|
15
|
+
METAXY_FEATURE_VERSION,
|
|
16
|
+
METAXY_FULL_DEFINITION_VERSION,
|
|
17
|
+
METAXY_SNAPSHOT_VERSION,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Common Polars schemas for system tables
|
|
21
|
+
# Note: tags field schema is omitted - Polars will infer the Struct schema from data
|
|
22
|
+
FEATURE_VERSIONS_SCHEMA = {
|
|
23
|
+
"project": pl.String,
|
|
24
|
+
"feature_key": pl.String,
|
|
25
|
+
METAXY_FEATURE_VERSION: pl.String,
|
|
26
|
+
METAXY_FEATURE_SPEC_VERSION: pl.String, # Hash of complete FeatureSpec (all properties)
|
|
27
|
+
METAXY_FULL_DEFINITION_VERSION: pl.String, # Hash of feature_spec_version + project (for migration detection) # TODO: this is probably not needed, we can just use a combination of project and metaxy_feature_version instead
|
|
28
|
+
"recorded_at": pl.Datetime("us"),
|
|
29
|
+
"feature_spec": pl.String, # Full serialized FeatureSpec
|
|
30
|
+
"feature_schema": pl.String, # Full Pydantic model schema as JSON
|
|
31
|
+
"feature_class_path": pl.String,
|
|
32
|
+
METAXY_SNAPSHOT_VERSION: pl.String,
|
|
33
|
+
"tags": pl.String,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
METAXY_TAG = "metaxy"
|
|
38
|
+
METAXY_VERSION_KEY = "version"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FeatureVersionsModel(BaseModel):
|
|
42
|
+
"""Pydantic model for feature_versions system table.
|
|
43
|
+
|
|
44
|
+
This table records when feature specifications are pushed to production,
|
|
45
|
+
tracking the evolution of feature definitions over time.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
project: str
|
|
49
|
+
feature_key: str
|
|
50
|
+
metaxy_feature_version: str = Field(
|
|
51
|
+
...,
|
|
52
|
+
description="Hash of versioned feature topology (combined versions of fields on this feature)",
|
|
53
|
+
)
|
|
54
|
+
metaxy_feature_spec_version: str = Field(
|
|
55
|
+
..., description="Hash of complete FeatureSpec (all properties)"
|
|
56
|
+
)
|
|
57
|
+
metaxy_full_definition_version: str = Field(
|
|
58
|
+
..., description="Hash of feature_spec_version + project"
|
|
59
|
+
)
|
|
60
|
+
recorded_at: datetime = Field(
|
|
61
|
+
..., description="Timestamp when feature version was recorded"
|
|
62
|
+
)
|
|
63
|
+
feature_spec: str = Field(
|
|
64
|
+
..., description="Full serialized FeatureSpec as JSON string"
|
|
65
|
+
)
|
|
66
|
+
feature_schema: str = Field(
|
|
67
|
+
..., description="Full Pydantic model schema as JSON string"
|
|
68
|
+
)
|
|
69
|
+
feature_class_path: str = Field(
|
|
70
|
+
..., description="Python import path to Feature class"
|
|
71
|
+
)
|
|
72
|
+
metaxy_snapshot_version: str = Field(
|
|
73
|
+
..., description="Deterministic hash of entire Metaxy project"
|
|
74
|
+
)
|
|
75
|
+
tags: dict[str, str] | str = Field(
|
|
76
|
+
default="{}",
|
|
77
|
+
description="Snapshot tags as JSON string (key-value pairs). The metaxy tag is reserved for internal use.",
|
|
78
|
+
validate_default=True,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@field_validator("tags", mode="before")
|
|
82
|
+
@classmethod
|
|
83
|
+
def serialize_tags(cls, v: dict[str, str] | str | None) -> str:
|
|
84
|
+
"""Convert tags dict to JSON string if needed."""
|
|
85
|
+
import json
|
|
86
|
+
|
|
87
|
+
# Parse to dict if string
|
|
88
|
+
tags_dict: dict[str, str]
|
|
89
|
+
if isinstance(v, str):
|
|
90
|
+
try:
|
|
91
|
+
tags_dict = json.loads(v)
|
|
92
|
+
except json.JSONDecodeError:
|
|
93
|
+
tags_dict = {}
|
|
94
|
+
else:
|
|
95
|
+
# Handle None or dict
|
|
96
|
+
tags_dict = v or {}
|
|
97
|
+
|
|
98
|
+
# Ensure metaxy.version is set
|
|
99
|
+
from metaxy._version import __version__
|
|
100
|
+
|
|
101
|
+
metaxy_tag_value = tags_dict.get(METAXY_TAG, "{}")
|
|
102
|
+
metaxy_tag_dict = (
|
|
103
|
+
json.loads(metaxy_tag_value)
|
|
104
|
+
if isinstance(metaxy_tag_value, str)
|
|
105
|
+
else metaxy_tag_value
|
|
106
|
+
)
|
|
107
|
+
if not isinstance(metaxy_tag_dict, dict):
|
|
108
|
+
metaxy_tag_dict = {}
|
|
109
|
+
metaxy_tag_dict[METAXY_VERSION_KEY] = metaxy_tag_dict.get(
|
|
110
|
+
METAXY_VERSION_KEY, __version__
|
|
111
|
+
)
|
|
112
|
+
tags_dict[METAXY_TAG] = json.dumps(metaxy_tag_dict)
|
|
113
|
+
|
|
114
|
+
return json.dumps(tags_dict)
|
|
115
|
+
|
|
116
|
+
def to_polars(self) -> pl.DataFrame:
|
|
117
|
+
"""Convert this model instance to a single-row Polars DataFrame.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Polars DataFrame with one row matching FEATURE_VERSIONS_SCHEMA
|
|
121
|
+
"""
|
|
122
|
+
# tags is already a JSON string, no need to serialize
|
|
123
|
+
return pl.DataFrame([self.model_dump()], schema=FEATURE_VERSIONS_SCHEMA)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
POLARS_SCHEMAS = {
|
|
127
|
+
FEATURE_VERSIONS_KEY: FEATURE_VERSIONS_SCHEMA,
|
|
128
|
+
EVENTS_KEY: EVENTS_SCHEMA,
|
|
129
|
+
}
|