metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,446 @@
1
+ """Graph diff models for migration system.
2
+
3
+ Provides GraphDiff with struct serialization for storage in migration tables.
4
+ """
5
+
6
+ from typing import Any
7
+
8
+ from pydantic import Field
9
+
10
+ from metaxy.models.bases import FrozenBaseModel
11
+ from metaxy.models.types import FeatureKey, FieldKey
12
+ from metaxy.utils.constants import DEFAULT_CODE_VERSION
13
+ from metaxy.utils.exceptions import MetaxyEmptyCodeVersionError
14
+
15
+
16
+ class FieldChange(FrozenBaseModel):
17
+ """Represents a change in a field between two snapshots."""
18
+
19
+ field_key: FieldKey
20
+ old_version: str | None = None # None if field was added
21
+ new_version: str | None = None # None if field was removed
22
+ old_code_version: str | None = None
23
+ new_code_version: str | None = None
24
+
25
+ @property
26
+ def is_added(self) -> bool:
27
+ """Check if field was added."""
28
+ return self.old_version is None
29
+
30
+ @property
31
+ def is_removed(self) -> bool:
32
+ """Check if field was removed."""
33
+ return self.new_version is None
34
+
35
+ @property
36
+ def is_changed(self) -> bool:
37
+ """Check if field version changed."""
38
+ return (
39
+ self.old_version is not None
40
+ and self.new_version is not None
41
+ and self.old_version != self.new_version
42
+ )
43
+
44
+
45
+ class NodeChange(FrozenBaseModel):
46
+ """Represents a change in a node/feature between two snapshots."""
47
+
48
+ feature_key: FeatureKey
49
+ old_version: str | None = None # None if node was added
50
+ new_version: str | None = None # None if node was removed
51
+ old_code_version: str | None = None
52
+ new_code_version: str | None = None
53
+ added_fields: list[FieldChange] = Field(default_factory=list)
54
+ removed_fields: list[FieldChange] = Field(default_factory=list)
55
+ changed_fields: list[FieldChange] = Field(default_factory=list)
56
+
57
+ @property
58
+ def is_added(self) -> bool:
59
+ """Check if node was added."""
60
+ return self.old_version is None
61
+
62
+ @property
63
+ def is_removed(self) -> bool:
64
+ """Check if node was removed."""
65
+ return self.new_version is None
66
+
67
+ @property
68
+ def is_changed(self) -> bool:
69
+ """Check if node version changed."""
70
+ return (
71
+ self.old_version is not None
72
+ and self.new_version is not None
73
+ and self.old_version != self.new_version
74
+ )
75
+
76
+ @property
77
+ def field_changes(self) -> list[FieldChange]:
78
+ """Get all field changes (added + removed + changed).
79
+
80
+ Backward compatibility property for old API.
81
+ """
82
+ return self.added_fields + self.removed_fields + self.changed_fields
83
+
84
+ @property
85
+ def has_field_changes(self) -> bool:
86
+ """Check if node has any field changes.
87
+
88
+ Backward compatibility property for old API.
89
+ """
90
+ return bool(self.added_fields or self.removed_fields or self.changed_fields)
91
+
92
+
93
+ class AddedNode(FrozenBaseModel):
94
+ """Represents a node that was added in the diff."""
95
+
96
+ feature_key: FeatureKey
97
+ version: str
98
+ code_version: str | None = None
99
+ fields: list[dict[str, Any]] = Field(
100
+ default_factory=list
101
+ ) # {key, version, code_version}
102
+ dependencies: list[FeatureKey] = Field(default_factory=list)
103
+
104
+
105
+ class RemovedNode(FrozenBaseModel):
106
+ """Represents a node that was removed in the diff."""
107
+
108
+ feature_key: FeatureKey
109
+ version: str
110
+ code_version: str | None = None
111
+ fields: list[dict[str, Any]] = Field(
112
+ default_factory=list
113
+ ) # {key, version, code_version}
114
+ dependencies: list[FeatureKey] = Field(default_factory=list)
115
+
116
+
117
+ class GraphDiff(FrozenBaseModel):
118
+ """Result of comparing two graph snapshots.
119
+
120
+ Stores changes between two graph states for migration generation.
121
+ """
122
+
123
+ from_snapshot_version: str
124
+ to_snapshot_version: str
125
+ added_nodes: list[AddedNode] = Field(default_factory=list)
126
+ removed_nodes: list[RemovedNode] = Field(default_factory=list)
127
+ changed_nodes: list[NodeChange] = Field(default_factory=list)
128
+
129
+ @property
130
+ def has_changes(self) -> bool:
131
+ """Check if diff contains any changes."""
132
+ return bool(self.added_nodes or self.removed_nodes or self.changed_nodes)
133
+
134
+ def to_struct(self) -> dict[str, Any]:
135
+ """Serialize to struct (native Python types for storage).
136
+
137
+ Returns:
138
+ Dict with structure compatible with Polars struct type
139
+ """
140
+ added_nodes_list = []
141
+ for node in self.added_nodes:
142
+ fields_list = []
143
+ for field in node.fields:
144
+ fields_list.append(
145
+ {
146
+ "key": field["key"]
147
+ if isinstance(field["key"], str)
148
+ else field["key"].to_string(),
149
+ "version": field.get("version", ""),
150
+ "code_version": field["code_version"],
151
+ }
152
+ )
153
+ if not node.code_version:
154
+ raise MetaxyEmptyCodeVersionError(
155
+ f"Node {node.feature_key.to_string()} has empty code_version."
156
+ )
157
+ added_nodes_list.append(
158
+ {
159
+ "key": node.feature_key.to_string(),
160
+ "version": node.version,
161
+ "code_version": node.code_version,
162
+ "fields": fields_list,
163
+ "dependencies": [dep.to_string() for dep in node.dependencies],
164
+ }
165
+ )
166
+
167
+ removed_nodes_list = []
168
+ for node in self.removed_nodes:
169
+ fields_list = []
170
+ for field in node.fields:
171
+ fields_list.append(
172
+ {
173
+ "key": field["key"]
174
+ if isinstance(field["key"], str)
175
+ else field["key"].to_string(),
176
+ "version": field.get("version", ""),
177
+ "code_version": field["code_version"],
178
+ }
179
+ )
180
+
181
+ if not node.code_version:
182
+ raise MetaxyEmptyCodeVersionError(
183
+ f"Node {node.feature_key.to_string()} has empty code_version."
184
+ )
185
+ removed_nodes_list.append(
186
+ {
187
+ "key": node.feature_key.to_string(),
188
+ "version": node.version,
189
+ "code_version": node.code_version,
190
+ "fields": fields_list,
191
+ "dependencies": [dep.to_string() for dep in node.dependencies],
192
+ }
193
+ )
194
+
195
+ changed_nodes_list = []
196
+ for node in self.changed_nodes:
197
+ added_fields_list = []
198
+ for field in node.added_fields:
199
+ if not field.new_code_version:
200
+ raise MetaxyEmptyCodeVersionError(
201
+ f"Node {node.feature_key.to_string()} has empty code_version for field {field.field_key.to_string()}."
202
+ )
203
+ added_fields_list.append(
204
+ {
205
+ "key": field.field_key.to_string(),
206
+ "version": field.new_version or "",
207
+ "code_version": field.new_code_version,
208
+ }
209
+ )
210
+
211
+ removed_fields_list = []
212
+ for field in node.removed_fields:
213
+ if not field.old_code_version:
214
+ raise MetaxyEmptyCodeVersionError(
215
+ f"Node {node.feature_key.to_string()} has empty code_version."
216
+ )
217
+ removed_fields_list.append(
218
+ {
219
+ "key": field.field_key.to_string(),
220
+ "version": field.old_version or "",
221
+ "code_version": field.old_code_version,
222
+ }
223
+ )
224
+
225
+ changed_fields_list = []
226
+ for field in node.changed_fields:
227
+ if not (field.old_code_version and field.new_code_version):
228
+ raise MetaxyEmptyCodeVersionError(
229
+ f"Node {node.feature_key.to_string()} has empty code_version."
230
+ )
231
+ changed_fields_list.append(
232
+ {
233
+ "key": field.field_key.to_string(),
234
+ "old_version": field.old_version or "",
235
+ "new_version": field.new_version or "",
236
+ "old_code_version": field.old_code_version,
237
+ "new_code_version": field.new_code_version,
238
+ }
239
+ )
240
+
241
+ if not (node.old_code_version and node.new_code_version):
242
+ raise MetaxyEmptyCodeVersionError(
243
+ f"Node {node.feature_key.to_string()} has empty old/new code_version."
244
+ )
245
+ changed_nodes_list.append(
246
+ {
247
+ "key": node.feature_key.to_string(),
248
+ "old_version": node.old_version or "",
249
+ "new_version": node.new_version or "",
250
+ "old_code_version": node.old_code_version,
251
+ "new_code_version": node.new_code_version,
252
+ "added_fields": added_fields_list,
253
+ "removed_fields": removed_fields_list,
254
+ "changed_fields": changed_fields_list,
255
+ }
256
+ )
257
+
258
+ return {
259
+ "added_nodes": added_nodes_list,
260
+ "removed_nodes": removed_nodes_list,
261
+ "changed_nodes": changed_nodes_list,
262
+ }
263
+
264
+ @classmethod
265
+ def from_struct(
266
+ cls,
267
+ struct_data: dict[str, Any],
268
+ from_snapshot_version: str,
269
+ to_snapshot_version: str,
270
+ ) -> "GraphDiff":
271
+ """Deserialize from struct.
272
+
273
+ Args:
274
+ struct_data: Dict with structure from to_struct()
275
+ from_snapshot_version: Source snapshot version
276
+ to_snapshot_version: Target snapshot version
277
+
278
+ Returns:
279
+ GraphDiff instance
280
+ """
281
+ added_nodes = []
282
+ for node_data in struct_data.get("added_nodes", []):
283
+ fields = []
284
+ for field_data in node_data.get("fields", []):
285
+ fields.append(
286
+ {
287
+ "key": field_data["key"],
288
+ "version": field_data["version"]
289
+ if field_data["version"]
290
+ else None,
291
+ "code_version": field_data["code_version"],
292
+ }
293
+ )
294
+
295
+ if (
296
+ not node_data["code_version"]
297
+ or node_data["code_version"] == DEFAULT_CODE_VERSION
298
+ ):
299
+ raise MetaxyEmptyCodeVersionError(
300
+ f"Node {node_data['key']} has empty code_version."
301
+ )
302
+ added_nodes.append(
303
+ AddedNode(
304
+ feature_key=FeatureKey(node_data["key"].split("/")),
305
+ version=node_data["version"],
306
+ code_version=node_data["code_version"],
307
+ fields=fields,
308
+ dependencies=[
309
+ FeatureKey(dep.split("/"))
310
+ for dep in node_data.get("dependencies", [])
311
+ ],
312
+ )
313
+ )
314
+
315
+ removed_nodes = []
316
+ for node_data in struct_data.get("removed_nodes", []):
317
+ fields = []
318
+ for field_data in node_data.get("fields", []):
319
+ fields.append(
320
+ {
321
+ "key": field_data["key"],
322
+ "version": field_data["version"]
323
+ if field_data["version"]
324
+ else None,
325
+ "code_version": field_data["code_version"],
326
+ }
327
+ )
328
+
329
+ if (
330
+ not node_data["code_version"]
331
+ or node_data["code_version"] == DEFAULT_CODE_VERSION
332
+ ):
333
+ raise MetaxyEmptyCodeVersionError(
334
+ f"Node {node_data['key']} has empty code_version."
335
+ )
336
+ removed_nodes.append(
337
+ RemovedNode(
338
+ feature_key=FeatureKey(node_data["key"].split("/")),
339
+ version=node_data["version"],
340
+ code_version=node_data["code_version"],
341
+ fields=fields,
342
+ dependencies=[
343
+ FeatureKey(dep.split("/"))
344
+ for dep in node_data.get("dependencies", [])
345
+ ],
346
+ )
347
+ )
348
+
349
+ changed_nodes = []
350
+ for node_data in struct_data.get("changed_nodes", []):
351
+ added_fields = []
352
+ for field_data in node_data.get("added_fields", []):
353
+ if (
354
+ not field_data["code_version"]
355
+ or field_data["code_version"] == DEFAULT_CODE_VERSION
356
+ ):
357
+ raise MetaxyEmptyCodeVersionError(
358
+ f"Field {field_data['key']} in feature {node_data['key']} has empty code_version."
359
+ )
360
+ added_fields.append(
361
+ FieldChange(
362
+ field_key=FieldKey(field_data["key"].split("/")),
363
+ old_version=None,
364
+ new_version=field_data["version"]
365
+ if field_data["version"]
366
+ else None,
367
+ old_code_version=None,
368
+ new_code_version=field_data["code_version"],
369
+ )
370
+ )
371
+
372
+ removed_fields = []
373
+ for field_data in node_data.get("removed_fields", []):
374
+ if (
375
+ not field_data["code_version"]
376
+ or field_data["code_version"] == DEFAULT_CODE_VERSION
377
+ ):
378
+ raise MetaxyEmptyCodeVersionError(
379
+ f"Field {field_data['key']} in feature {node_data['key']} has empty code_version."
380
+ )
381
+ removed_fields.append(
382
+ FieldChange(
383
+ field_key=FieldKey(field_data["key"].split("/")),
384
+ old_version=field_data["version"]
385
+ if field_data["version"]
386
+ else None,
387
+ new_version=None,
388
+ old_code_version=field_data["code_version"],
389
+ new_code_version=None,
390
+ )
391
+ )
392
+
393
+ changed_fields = []
394
+ for field_data in node_data.get("changed_fields", []):
395
+ if any(
396
+ field_data.get(k) in (None, "", DEFAULT_CODE_VERSION)
397
+ for k in ("old_code_version", "new_code_version")
398
+ ):
399
+ raise MetaxyEmptyCodeVersionError(
400
+ f"Field {field_data['key']} in feature {node_data['key']} has empty code_version."
401
+ )
402
+ changed_fields.append(
403
+ FieldChange(
404
+ field_key=FieldKey(field_data["key"].split("/")),
405
+ old_version=field_data["old_version"]
406
+ if field_data["old_version"]
407
+ else None,
408
+ new_version=field_data["new_version"]
409
+ if field_data["new_version"]
410
+ else None,
411
+ old_code_version=field_data["old_code_version"],
412
+ new_code_version=field_data["new_code_version"],
413
+ )
414
+ )
415
+
416
+ if any(
417
+ node_data.get(k) in (None, "", DEFAULT_CODE_VERSION)
418
+ for k in ("old_code_version", "new_code_version")
419
+ ):
420
+ raise MetaxyEmptyCodeVersionError(
421
+ f"Node {node_data['key']} has empty old/new code_version."
422
+ )
423
+ changed_nodes.append(
424
+ NodeChange(
425
+ feature_key=FeatureKey(node_data["key"].split("/")),
426
+ old_version=node_data["old_version"]
427
+ if node_data["old_version"]
428
+ else None,
429
+ new_version=node_data["new_version"]
430
+ if node_data["new_version"]
431
+ else None,
432
+ old_code_version=node_data["old_code_version"],
433
+ new_code_version=node_data["new_code_version"],
434
+ added_fields=added_fields,
435
+ removed_fields=removed_fields,
436
+ changed_fields=changed_fields,
437
+ )
438
+ )
439
+
440
+ return cls(
441
+ from_snapshot_version=from_snapshot_version,
442
+ to_snapshot_version=to_snapshot_version,
443
+ added_nodes=added_nodes,
444
+ removed_nodes=removed_nodes,
445
+ changed_nodes=changed_nodes,
446
+ )