metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,57 @@
1
+ """System table components for metadata store.
2
+
3
+ This package provides system table functionality for Metaxy:
4
+ - events: Migration event types with builder pattern
5
+ - keys: System table keys and constants
6
+ - models: Pydantic models and schemas for system tables
7
+ - storage: Storage layer for system tables
8
+ """
9
+
10
+ from metaxy.metadata_store.system.events import (
11
+ COL_EVENT_TYPE,
12
+ COL_EXECUTION_ID,
13
+ COL_FEATURE_KEY,
14
+ COL_PAYLOAD,
15
+ COL_PROJECT,
16
+ COL_TIMESTAMP,
17
+ Event,
18
+ EventType,
19
+ MigrationStatus,
20
+ PayloadType,
21
+ )
22
+ from metaxy.metadata_store.system.keys import (
23
+ EVENTS_KEY,
24
+ FEATURE_VERSIONS_KEY,
25
+ METAXY_SYSTEM_KEY_PREFIX,
26
+ )
27
+ from metaxy.metadata_store.system.models import (
28
+ FEATURE_VERSIONS_SCHEMA,
29
+ FeatureVersionsModel,
30
+ )
31
+ from metaxy.metadata_store.system.storage import (
32
+ SystemTableStorage,
33
+ )
34
+
35
+ __all__ = [
36
+ # Events
37
+ "Event",
38
+ "EventType",
39
+ "MigrationStatus",
40
+ "PayloadType",
41
+ # Column names
42
+ "COL_PROJECT",
43
+ "COL_EXECUTION_ID",
44
+ "COL_EVENT_TYPE",
45
+ "COL_TIMESTAMP",
46
+ "COL_FEATURE_KEY",
47
+ "COL_PAYLOAD",
48
+ # Keys
49
+ "METAXY_SYSTEM_KEY_PREFIX",
50
+ "FEATURE_VERSIONS_KEY",
51
+ "EVENTS_KEY",
52
+ # Models
53
+ "FEATURE_VERSIONS_SCHEMA",
54
+ "FeatureVersionsModel",
55
+ # Storage
56
+ "SystemTableStorage",
57
+ ]
@@ -0,0 +1,264 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from enum import Enum
5
+ from typing import Annotated, Literal
6
+
7
+ import polars as pl
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+
11
+ class EventType(str, Enum):
12
+ """Metaxy event types."""
13
+
14
+ MIGRATION_STARTED = "migration_started"
15
+ MIGRATION_COMPLETED = "migration_completed"
16
+ MIGRATION_FAILED = "migration_failed"
17
+ FEATURE_MIGRATION_STARTED = "feature_migration_started"
18
+ FEATURE_MIGRATION_COMPLETED = "feature_migration_completed"
19
+ FEATURE_MIGRATION_FAILED = "feature_migration_failed"
20
+
21
+
22
+ class PayloadType(str, Enum):
23
+ """Payload types for event payloads."""
24
+
25
+ EMPTY = "empty"
26
+ ERROR = "error"
27
+ ROWS_AFFECTED = "rows_affected"
28
+
29
+
30
+ class MigrationStatus(str, Enum):
31
+ """Migration execution status."""
32
+
33
+ NOT_STARTED = "not_started"
34
+ IN_PROGRESS = "in_progress"
35
+ COMPLETED = "completed"
36
+ FAILED = "failed"
37
+
38
+
39
+ # Column name constants (to avoid drift between Event model and storage)
40
+ COL_PROJECT = "project"
41
+ COL_EXECUTION_ID = "execution_id"
42
+ COL_EVENT_TYPE = "event_type"
43
+ COL_TIMESTAMP = "timestamp"
44
+ COL_FEATURE_KEY = "feature_key"
45
+ COL_PAYLOAD = "payload"
46
+
47
+ # Events schema (for Polars storage)
48
+ EVENTS_SCHEMA = {
49
+ COL_PROJECT: pl.String,
50
+ COL_EXECUTION_ID: pl.String,
51
+ COL_EVENT_TYPE: pl.Enum(EventType),
52
+ COL_TIMESTAMP: pl.Datetime("us"),
53
+ COL_FEATURE_KEY: pl.String,
54
+ COL_PAYLOAD: pl.String, # JSON string with arbitrary event data
55
+ }
56
+
57
+
58
+ class EmptyPayload(BaseModel):
59
+ """Empty payload for events with no additional data."""
60
+
61
+ model_config = ConfigDict(frozen=True)
62
+ type: Literal[PayloadType.EMPTY] = PayloadType.EMPTY
63
+
64
+
65
+ class ErrorPayload(BaseModel):
66
+ """Payload for events with error information."""
67
+
68
+ model_config = ConfigDict(frozen=True)
69
+ type: Literal[PayloadType.ERROR] = PayloadType.ERROR
70
+ error_message: str
71
+ rows_affected: int | None = None # Optional: rows processed before failure
72
+
73
+
74
+ class RowsAffectedPayload(BaseModel):
75
+ """Payload for events tracking rows affected."""
76
+
77
+ model_config = ConfigDict(frozen=True)
78
+ type: Literal[PayloadType.ROWS_AFFECTED] = PayloadType.ROWS_AFFECTED
79
+ rows_affected: int
80
+
81
+
82
+ # Discriminated union for payloads
83
+ Payload = EmptyPayload | ErrorPayload | RowsAffectedPayload
84
+
85
+
86
+ class Event(BaseModel):
87
+ """Migration event with typed payload.
88
+
89
+ All event types use this single class and are distinguished by event_type and payload.type fields.
90
+ """
91
+
92
+ model_config = ConfigDict(frozen=True)
93
+
94
+ event_type: EventType
95
+ project: str
96
+ execution_id: str # Generic ID for the execution (migration, job, etc.)
97
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
98
+ feature_key: str | None = (
99
+ None # Feature key for feature-level events, empty for execution-level events
100
+ )
101
+ payload: Annotated[
102
+ Payload, Field(default_factory=EmptyPayload, discriminator="type")
103
+ ]
104
+
105
+ def to_polars(self) -> pl.DataFrame:
106
+ """Convert this model instance to a single-row Polars DataFrame.
107
+
108
+ Returns:
109
+ Polars DataFrame with one row matching EVENTS_SCHEMA
110
+ """
111
+ data = {
112
+ COL_PROJECT: self.project,
113
+ COL_EXECUTION_ID: self.execution_id,
114
+ COL_EVENT_TYPE: self.event_type,
115
+ COL_TIMESTAMP: self.timestamp,
116
+ COL_FEATURE_KEY: self.feature_key,
117
+ COL_PAYLOAD: self.payload.model_dump_json(),
118
+ }
119
+ return pl.DataFrame([data], schema=EVENTS_SCHEMA)
120
+
121
+ @classmethod
122
+ def migration_started(cls, project: str, migration_id: str) -> Event:
123
+ """Create a migration started event.
124
+
125
+ Args:
126
+ project: Project name
127
+ migration_id: Migration ID (maps to execution_id internally)
128
+
129
+ Returns:
130
+ Event with started type and empty payload
131
+ """
132
+ return cls(
133
+ project=project,
134
+ execution_id=migration_id,
135
+ event_type=EventType.MIGRATION_STARTED,
136
+ payload=EmptyPayload(),
137
+ )
138
+
139
+ @classmethod
140
+ def migration_completed(cls, project: str, migration_id: str) -> Event:
141
+ """Create a migration completed event.
142
+
143
+ Args:
144
+ project: Project name
145
+ migration_id: Migration ID (maps to execution_id internally)
146
+
147
+ Returns:
148
+ Event with migration_completed type and empty payload
149
+ """
150
+ return cls(
151
+ project=project,
152
+ execution_id=migration_id,
153
+ event_type=EventType.MIGRATION_COMPLETED,
154
+ payload=EmptyPayload(),
155
+ )
156
+
157
+ @classmethod
158
+ def migration_failed(
159
+ cls,
160
+ project: str,
161
+ migration_id: str,
162
+ error_message: str,
163
+ rows_affected: int | None = None,
164
+ ) -> Event:
165
+ """Create a migration failed event.
166
+
167
+ Args:
168
+ project: Project name
169
+ migration_id: Migration ID (maps to execution_id internally)
170
+ error_message: Error message describing the failure
171
+ rows_affected: Optional number of rows processed before failure
172
+
173
+ Returns:
174
+ Event with migration_failed type and error payload
175
+ """
176
+ return cls(
177
+ project=project,
178
+ execution_id=migration_id,
179
+ event_type=EventType.MIGRATION_FAILED,
180
+ payload=ErrorPayload(
181
+ error_message=error_message, rows_affected=rows_affected
182
+ ),
183
+ )
184
+
185
+ @classmethod
186
+ def feature_migration_started(
187
+ cls, project: str, migration_id: str, feature_key: str
188
+ ) -> Event:
189
+ """Create a feature started event.
190
+
191
+ Args:
192
+ project: Project name
193
+ migration_id: Migration ID (maps to execution_id internally)
194
+ feature_key: Feature key being processed
195
+
196
+ Returns:
197
+ Event with feature_started type and empty payload
198
+ """
199
+ return cls(
200
+ project=project,
201
+ execution_id=migration_id,
202
+ event_type=EventType.FEATURE_MIGRATION_STARTED,
203
+ payload=EmptyPayload(),
204
+ feature_key=feature_key,
205
+ )
206
+
207
+ @classmethod
208
+ def feature_migration_completed(
209
+ cls, project: str, migration_id: str, feature_key: str, rows_affected: int
210
+ ) -> Event:
211
+ """Create a feature completed event (successful).
212
+
213
+ Args:
214
+ project: Project name
215
+ migration_id: Migration ID (maps to execution_id internally)
216
+ feature_key: Feature key that was processed
217
+ rows_affected: Number of rows affected
218
+
219
+ Returns:
220
+ Event with feature_completed type and rows_affected payload
221
+ """
222
+ return cls(
223
+ project=project,
224
+ execution_id=migration_id,
225
+ event_type=EventType.FEATURE_MIGRATION_COMPLETED,
226
+ feature_key=feature_key,
227
+ payload=RowsAffectedPayload(rows_affected=rows_affected),
228
+ )
229
+
230
+ @classmethod
231
+ def feature_migration_failed(
232
+ cls,
233
+ project: str,
234
+ migration_id: str,
235
+ feature_key: str,
236
+ error_message: str,
237
+ rows_affected: int | None = None,
238
+ ) -> Event:
239
+ """Create a feature failed event.
240
+
241
+ Args:
242
+ project: Project name
243
+ migration_id: Migration ID (maps to execution_id internally)
244
+ feature_key: Feature key that failed
245
+ error_message: Error message describing the failure
246
+ rows_affected: Optional number of rows processed before failure
247
+
248
+ Returns:
249
+ Event with feature_failed type and error payload
250
+ """
251
+ return cls(
252
+ project=project,
253
+ execution_id=migration_id,
254
+ event_type=EventType.FEATURE_MIGRATION_FAILED,
255
+ feature_key=feature_key,
256
+ payload=ErrorPayload(
257
+ error_message=error_message, rows_affected=rows_affected
258
+ ),
259
+ )
260
+
261
+ # Shorter aliases for convenience
262
+ feature_started = feature_migration_started
263
+ feature_completed = feature_migration_completed
264
+ feature_failed = feature_migration_failed
@@ -0,0 +1,9 @@
1
+ """System table keys and constants."""
2
+
3
+ from metaxy.models.types import FeatureKey
4
+
5
+ METAXY_SYSTEM_KEY_PREFIX = "metaxy-system"
6
+
7
+ # System table keys
8
+ FEATURE_VERSIONS_KEY = FeatureKey([METAXY_SYSTEM_KEY_PREFIX, "feature_versions"])
9
+ EVENTS_KEY = FeatureKey([METAXY_SYSTEM_KEY_PREFIX, "events"])
@@ -0,0 +1,129 @@
1
+ """Pydantic models for system tables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+
7
+ import polars as pl
8
+ from pydantic import BaseModel, Field, field_validator
9
+
10
+ from metaxy.metadata_store.system import FEATURE_VERSIONS_KEY
11
+ from metaxy.metadata_store.system.events import EVENTS_SCHEMA
12
+ from metaxy.metadata_store.system.keys import EVENTS_KEY
13
+ from metaxy.models.constants import (
14
+ METAXY_FEATURE_SPEC_VERSION,
15
+ METAXY_FEATURE_VERSION,
16
+ METAXY_FULL_DEFINITION_VERSION,
17
+ METAXY_SNAPSHOT_VERSION,
18
+ )
19
+
20
+ # Common Polars schemas for system tables
21
+ # Note: tags field schema is omitted - Polars will infer the Struct schema from data
22
+ FEATURE_VERSIONS_SCHEMA = {
23
+ "project": pl.String,
24
+ "feature_key": pl.String,
25
+ METAXY_FEATURE_VERSION: pl.String,
26
+ METAXY_FEATURE_SPEC_VERSION: pl.String, # Hash of complete FeatureSpec (all properties)
27
+ METAXY_FULL_DEFINITION_VERSION: pl.String, # Hash of feature_spec_version + project (for migration detection) # TODO: this is probably not needed, we can just use a combination of project and metaxy_feature_version instead
28
+ "recorded_at": pl.Datetime("us"),
29
+ "feature_spec": pl.String, # Full serialized FeatureSpec
30
+ "feature_schema": pl.String, # Full Pydantic model schema as JSON
31
+ "feature_class_path": pl.String,
32
+ METAXY_SNAPSHOT_VERSION: pl.String,
33
+ "tags": pl.String,
34
+ }
35
+
36
+
37
+ METAXY_TAG = "metaxy"
38
+ METAXY_VERSION_KEY = "version"
39
+
40
+
41
+ class FeatureVersionsModel(BaseModel):
42
+ """Pydantic model for feature_versions system table.
43
+
44
+ This table records when feature specifications are pushed to production,
45
+ tracking the evolution of feature definitions over time.
46
+ """
47
+
48
+ project: str
49
+ feature_key: str
50
+ metaxy_feature_version: str = Field(
51
+ ...,
52
+ description="Hash of versioned feature topology (combined versions of fields on this feature)",
53
+ )
54
+ metaxy_feature_spec_version: str = Field(
55
+ ..., description="Hash of complete FeatureSpec (all properties)"
56
+ )
57
+ metaxy_full_definition_version: str = Field(
58
+ ..., description="Hash of feature_spec_version + project"
59
+ )
60
+ recorded_at: datetime = Field(
61
+ ..., description="Timestamp when feature version was recorded"
62
+ )
63
+ feature_spec: str = Field(
64
+ ..., description="Full serialized FeatureSpec as JSON string"
65
+ )
66
+ feature_schema: str = Field(
67
+ ..., description="Full Pydantic model schema as JSON string"
68
+ )
69
+ feature_class_path: str = Field(
70
+ ..., description="Python import path to Feature class"
71
+ )
72
+ metaxy_snapshot_version: str = Field(
73
+ ..., description="Deterministic hash of entire Metaxy project"
74
+ )
75
+ tags: dict[str, str] | str = Field(
76
+ default="{}",
77
+ description="Snapshot tags as JSON string (key-value pairs). The metaxy tag is reserved for internal use.",
78
+ validate_default=True,
79
+ )
80
+
81
+ @field_validator("tags", mode="before")
82
+ @classmethod
83
+ def serialize_tags(cls, v: dict[str, str] | str | None) -> str:
84
+ """Convert tags dict to JSON string if needed."""
85
+ import json
86
+
87
+ # Parse to dict if string
88
+ tags_dict: dict[str, str]
89
+ if isinstance(v, str):
90
+ try:
91
+ tags_dict = json.loads(v)
92
+ except json.JSONDecodeError:
93
+ tags_dict = {}
94
+ else:
95
+ # Handle None or dict
96
+ tags_dict = v or {}
97
+
98
+ # Ensure metaxy.version is set
99
+ from metaxy._version import __version__
100
+
101
+ metaxy_tag_value = tags_dict.get(METAXY_TAG, "{}")
102
+ metaxy_tag_dict = (
103
+ json.loads(metaxy_tag_value)
104
+ if isinstance(metaxy_tag_value, str)
105
+ else metaxy_tag_value
106
+ )
107
+ if not isinstance(metaxy_tag_dict, dict):
108
+ metaxy_tag_dict = {}
109
+ metaxy_tag_dict[METAXY_VERSION_KEY] = metaxy_tag_dict.get(
110
+ METAXY_VERSION_KEY, __version__
111
+ )
112
+ tags_dict[METAXY_TAG] = json.dumps(metaxy_tag_dict)
113
+
114
+ return json.dumps(tags_dict)
115
+
116
+ def to_polars(self) -> pl.DataFrame:
117
+ """Convert this model instance to a single-row Polars DataFrame.
118
+
119
+ Returns:
120
+ Polars DataFrame with one row matching FEATURE_VERSIONS_SCHEMA
121
+ """
122
+ # tags is already a JSON string, no need to serialize
123
+ return pl.DataFrame([self.model_dump()], schema=FEATURE_VERSIONS_SCHEMA)
124
+
125
+
126
+ POLARS_SCHEMAS = {
127
+ FEATURE_VERSIONS_KEY: FEATURE_VERSIONS_SCHEMA,
128
+ EVENTS_KEY: EVENTS_SCHEMA,
129
+ }