metaxy 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of metaxy might be problematic. Click here for more details.

Files changed (75) hide show
  1. metaxy/__init__.py +61 -0
  2. metaxy/_testing.py +542 -0
  3. metaxy/_utils.py +16 -0
  4. metaxy/_version.py +1 -0
  5. metaxy/cli/app.py +76 -0
  6. metaxy/cli/context.py +71 -0
  7. metaxy/cli/graph.py +576 -0
  8. metaxy/cli/graph_diff.py +290 -0
  9. metaxy/cli/list.py +42 -0
  10. metaxy/cli/metadata.py +271 -0
  11. metaxy/cli/migrations.py +862 -0
  12. metaxy/cli/push.py +55 -0
  13. metaxy/config.py +450 -0
  14. metaxy/data_versioning/__init__.py +24 -0
  15. metaxy/data_versioning/calculators/__init__.py +13 -0
  16. metaxy/data_versioning/calculators/base.py +97 -0
  17. metaxy/data_versioning/calculators/duckdb.py +186 -0
  18. metaxy/data_versioning/calculators/ibis.py +225 -0
  19. metaxy/data_versioning/calculators/polars.py +135 -0
  20. metaxy/data_versioning/diff/__init__.py +15 -0
  21. metaxy/data_versioning/diff/base.py +150 -0
  22. metaxy/data_versioning/diff/narwhals.py +108 -0
  23. metaxy/data_versioning/hash_algorithms.py +19 -0
  24. metaxy/data_versioning/joiners/__init__.py +9 -0
  25. metaxy/data_versioning/joiners/base.py +70 -0
  26. metaxy/data_versioning/joiners/narwhals.py +235 -0
  27. metaxy/entrypoints.py +309 -0
  28. metaxy/ext/__init__.py +1 -0
  29. metaxy/ext/alembic.py +326 -0
  30. metaxy/ext/sqlmodel.py +172 -0
  31. metaxy/ext/sqlmodel_system_tables.py +139 -0
  32. metaxy/graph/__init__.py +21 -0
  33. metaxy/graph/diff/__init__.py +21 -0
  34. metaxy/graph/diff/diff_models.py +399 -0
  35. metaxy/graph/diff/differ.py +740 -0
  36. metaxy/graph/diff/models.py +418 -0
  37. metaxy/graph/diff/rendering/__init__.py +18 -0
  38. metaxy/graph/diff/rendering/base.py +274 -0
  39. metaxy/graph/diff/rendering/cards.py +188 -0
  40. metaxy/graph/diff/rendering/formatter.py +805 -0
  41. metaxy/graph/diff/rendering/graphviz.py +246 -0
  42. metaxy/graph/diff/rendering/mermaid.py +320 -0
  43. metaxy/graph/diff/rendering/rich.py +165 -0
  44. metaxy/graph/diff/rendering/theme.py +48 -0
  45. metaxy/graph/diff/traversal.py +247 -0
  46. metaxy/graph/utils.py +58 -0
  47. metaxy/metadata_store/__init__.py +31 -0
  48. metaxy/metadata_store/_protocols.py +38 -0
  49. metaxy/metadata_store/base.py +1676 -0
  50. metaxy/metadata_store/clickhouse.py +161 -0
  51. metaxy/metadata_store/duckdb.py +167 -0
  52. metaxy/metadata_store/exceptions.py +43 -0
  53. metaxy/metadata_store/ibis.py +451 -0
  54. metaxy/metadata_store/memory.py +228 -0
  55. metaxy/metadata_store/sqlite.py +187 -0
  56. metaxy/metadata_store/system_tables.py +257 -0
  57. metaxy/migrations/__init__.py +34 -0
  58. metaxy/migrations/detector.py +153 -0
  59. metaxy/migrations/executor.py +208 -0
  60. metaxy/migrations/loader.py +260 -0
  61. metaxy/migrations/models.py +718 -0
  62. metaxy/migrations/ops.py +390 -0
  63. metaxy/models/__init__.py +0 -0
  64. metaxy/models/bases.py +6 -0
  65. metaxy/models/constants.py +24 -0
  66. metaxy/models/feature.py +665 -0
  67. metaxy/models/feature_spec.py +105 -0
  68. metaxy/models/field.py +25 -0
  69. metaxy/models/plan.py +155 -0
  70. metaxy/models/types.py +157 -0
  71. metaxy/py.typed +0 -0
  72. metaxy-0.0.0.dist-info/METADATA +247 -0
  73. metaxy-0.0.0.dist-info/RECORD +75 -0
  74. metaxy-0.0.0.dist-info/WHEEL +4 -0
  75. metaxy-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,161 @@
1
+ """ClickHouse metadata store - thin wrapper around IbisMetadataStore."""
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from metaxy.data_versioning.calculators.ibis import HashSQLGenerator
7
+ from metaxy.metadata_store.base import MetadataStore
8
+
9
+ from metaxy.data_versioning.hash_algorithms import HashAlgorithm
10
+ from metaxy.metadata_store.ibis import IbisMetadataStore
11
+
12
+
13
+ class ClickHouseMetadataStore(IbisMetadataStore):
14
+ """
15
+ ClickHouse metadata store using Ibis backend.
16
+
17
+ Convenience wrapper that configures IbisMetadataStore for ClickHouse.
18
+
19
+ Hash algorithm support:
20
+ - MD5: Always available (built-in)
21
+ - XXHASH32, XXHASH64: Available via ClickHouse's xxHash32/xxHash64 functions
22
+
23
+ Components:
24
+ - joiner: NarwhalsJoiner (works with any backend)
25
+ - calculator: IbisDataVersionCalculator (native SQL hash computation with xxHash64/xxHash32/MD5)
26
+ - diff_resolver: NarwhalsDiffResolver
27
+
28
+ Examples:
29
+ >>> # Local ClickHouse instance
30
+ >>> with ClickHouseMetadataStore("clickhouse://localhost:9000/default") as store:
31
+ ... store.write_metadata(MyFeature, df)
32
+
33
+ >>> # With authentication
34
+ >>> with ClickHouseMetadataStore("clickhouse://user:pass@host:9000/db") as store:
35
+ ... store.write_metadata(MyFeature, df)
36
+
37
+ >>> # Using connection params
38
+ >>> store = ClickHouseMetadataStore(
39
+ ... backend="clickhouse",
40
+ ... connection_params={
41
+ ... "host": "localhost",
42
+ ... "port": 9000,
43
+ ... "database": "default",
44
+ ... "user": "default",
45
+ ... "password": ""
46
+ ... },
47
+ ... hash_algorithm=HashAlgorithm.XXHASH64
48
+ ... )
49
+ >>> with store:
50
+ ... store.write_metadata(MyFeature, df)
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ connection_string: str | None = None,
56
+ *,
57
+ connection_params: dict[str, Any] | None = None,
58
+ fallback_stores: list["MetadataStore"] | None = None,
59
+ **kwargs,
60
+ ):
61
+ """
62
+ Initialize ClickHouse metadata store.
63
+
64
+ Args:
65
+ connection_string: ClickHouse connection string.
66
+ Format: "clickhouse://[user[:password]@]host[:port]/database[?param=value]"
67
+ Examples:
68
+ - "clickhouse://localhost:9000/default"
69
+ - "clickhouse://user:pass@host:9000/db"
70
+ - "clickhouse://host:9000/db?secure=true"
71
+ connection_params: Alternative to connection_string, specify params as dict:
72
+ - host: Server host (default: "localhost")
73
+ - port: Server port (default: 9000)
74
+ - database: Database name (default: "default")
75
+ - user: Username (default: "default")
76
+ - password: Password (default: "")
77
+ - secure: Use secure connection (default: False)
78
+ fallback_stores: Ordered list of read-only fallback stores.
79
+ **kwargs: Passed to IbisMetadataStore (e.g., hash_algorithm, graph)
80
+
81
+ Raises:
82
+ ImportError: If ibis-clickhouse not installed
83
+ ValueError: If neither connection_string nor connection_params provided
84
+ """
85
+ if connection_string is None and connection_params is None:
86
+ raise ValueError(
87
+ "Must provide either connection_string or connection_params. "
88
+ "Example: connection_string='clickhouse://localhost:9000/default'"
89
+ )
90
+
91
+ # Initialize Ibis store with ClickHouse backend
92
+ super().__init__(
93
+ connection_string=connection_string,
94
+ backend="clickhouse" if connection_string is None else None,
95
+ connection_params=connection_params,
96
+ fallback_stores=fallback_stores,
97
+ **kwargs,
98
+ )
99
+
100
+ def _get_default_hash_algorithm(self) -> HashAlgorithm:
101
+ """Get default hash algorithm for ClickHouse stores.
102
+
103
+ Uses XXHASH64 which is built-in to ClickHouse.
104
+ """
105
+ return HashAlgorithm.XXHASH64
106
+
107
+ def _supports_native_components(self) -> bool:
108
+ """ClickHouse stores support native data version calculations when connection is open."""
109
+ return self._conn is not None
110
+
111
+ def _get_hash_sql_generators(self) -> dict[HashAlgorithm, "HashSQLGenerator"]:
112
+ """Get hash SQL generators for ClickHouse.
113
+
114
+ ClickHouse supports:
115
+ - MD5: Always available (built-in)
116
+ - XXHASH32, XXHASH64: Always available (built-in xxHash32/xxHash64 functions)
117
+
118
+ Returns:
119
+ Dictionary mapping HashAlgorithm to SQL generator functions
120
+ """
121
+
122
+ def md5_generator(table, concat_columns: dict[str, str]) -> str:
123
+ hash_selects: list[str] = []
124
+ for field_key, concat_col in concat_columns.items():
125
+ hash_col = f"__hash_{field_key}"
126
+ # MD5() in ClickHouse returns FixedString(16) binary, convert to lowercase hex string
127
+ # Use lower(hex(MD5(...))) to match DuckDB's md5() lowercase hex output
128
+ hash_expr = f"lower(hex(MD5({concat_col})))"
129
+ hash_selects.append(f"{hash_expr} as {hash_col}")
130
+
131
+ hash_clause = ", ".join(hash_selects)
132
+ table_sql = table.compile()
133
+ return f"SELECT *, {hash_clause} FROM ({table_sql}) AS __metaxy_temp"
134
+
135
+ def xxhash32_generator(table, concat_columns: dict[str, str]) -> str:
136
+ hash_selects: list[str] = []
137
+ for field_key, concat_col in concat_columns.items():
138
+ hash_col = f"__hash_{field_key}"
139
+ hash_expr = f"CAST(xxHash32({concat_col}) AS String)"
140
+ hash_selects.append(f"{hash_expr} as {hash_col}")
141
+
142
+ hash_clause = ", ".join(hash_selects)
143
+ table_sql = table.compile()
144
+ return f"SELECT *, {hash_clause} FROM ({table_sql}) AS __metaxy_temp"
145
+
146
+ def xxhash64_generator(table, concat_columns: dict[str, str]) -> str:
147
+ hash_selects: list[str] = []
148
+ for field_key, concat_col in concat_columns.items():
149
+ hash_col = f"__hash_{field_key}"
150
+ hash_expr = f"CAST(xxHash64({concat_col}) AS String)"
151
+ hash_selects.append(f"{hash_expr} as {hash_col}")
152
+
153
+ hash_clause = ", ".join(hash_selects)
154
+ table_sql = table.compile()
155
+ return f"SELECT *, {hash_clause} FROM ({table_sql}) AS __metaxy_temp"
156
+
157
+ return {
158
+ HashAlgorithm.MD5: md5_generator,
159
+ HashAlgorithm.XXHASH32: xxhash32_generator,
160
+ HashAlgorithm.XXHASH64: xxhash64_generator,
161
+ }
@@ -0,0 +1,167 @@
1
+ """DuckDB metadata store - thin wrapper around IbisMetadataStore."""
2
+
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING, TypedDict
5
+
6
+ if TYPE_CHECKING:
7
+ from metaxy.metadata_store.base import MetadataStore
8
+
9
+ from metaxy.data_versioning.hash_algorithms import HashAlgorithm
10
+ from metaxy.metadata_store.ibis import IbisMetadataStore
11
+
12
+
13
+ class ExtensionSpec(TypedDict, total=False):
14
+ """
15
+ DuckDB extension specification.
16
+
17
+ Can be expressed in TOML as:
18
+ extensions = ["hashfuncs"] # string form, uses 'community' repo
19
+ extensions = [{name = "hashfuncs"}] # dict form, uses 'community' repo
20
+ extensions = [{name = "spatial", repository = "core_nightly"}]
21
+ extensions = [{name = "my_ext", repository = "https://my-repo.com"}]
22
+ """
23
+
24
+ name: str
25
+ repository: str # defaults to "community" if not specified
26
+
27
+
28
+ class DuckDBMetadataStore(IbisMetadataStore):
29
+ """
30
+ DuckDB metadata store using Ibis backend.
31
+
32
+ Convenience wrapper that configures IbisMetadataStore for DuckDB.
33
+
34
+ Hash algorithm support is detected dynamically based on installed extensions:
35
+ - MD5: Always available (built-in)
36
+ - XXHASH32, XXHASH64: Available when 'hashfuncs' extension is loaded
37
+
38
+ Components:
39
+ - joiner: NarwhalsJoiner (works with any backend)
40
+ - calculator: IbisDataVersionCalculator (native SQL hash computation with xxh64/xxh32/md5)
41
+ - diff_resolver: NarwhalsDiffResolver
42
+
43
+ Examples:
44
+ >>> # Local file database
45
+ >>> with DuckDBMetadataStore("metadata.db") as store:
46
+ ... store.write_metadata(MyFeature, df)
47
+
48
+ >>> # In-memory database
49
+ >>> with DuckDBMetadataStore(":memory:") as store:
50
+ ... store.write_metadata(MyFeature, df)
51
+
52
+ >>> # MotherDuck
53
+ >>> with DuckDBMetadataStore("md:my_database") as store:
54
+ ... store.write_metadata(MyFeature, df)
55
+
56
+ >>> # With extensions
57
+ >>> store = DuckDBMetadataStore(
58
+ ... "metadata.db",
59
+ ... hash_algorithm=HashAlgorithm.XXHASH64,
60
+ ... extensions=["hashfuncs"]
61
+ ... )
62
+ >>> with store:
63
+ ... store.write_metadata(MyFeature, df)
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ database: str | Path,
69
+ *,
70
+ config: dict[str, str] | None = None,
71
+ extensions: list[ExtensionSpec | str] | None = None,
72
+ fallback_stores: list["MetadataStore"] | None = None,
73
+ **kwargs,
74
+ ):
75
+ """
76
+ Initialize DuckDB metadata store.
77
+
78
+ Args:
79
+ database: Database connection string or path.
80
+ - File path: "metadata.db" or Path("metadata.db")
81
+ - In-memory: ":memory:"
82
+ - MotherDuck: "md:my_database" or "md:my_database?motherduck_token=..."
83
+ - S3: "s3://bucket/path/database.duckdb" (read-only via ATTACH)
84
+ - HTTPS: "https://example.com/database.duckdb" (read-only via ATTACH)
85
+ - Any valid DuckDB connection string
86
+
87
+ Note: Parent directories are NOT created automatically. Ensure paths exist
88
+ before initializing the store.
89
+ config: Optional DuckDB configuration settings (e.g., {'threads': '4', 'memory_limit': '4GB'})
90
+ extensions: List of DuckDB extensions to install and load on open.
91
+ Can be strings (installed from 'community' repository) or dicts
92
+ specifying both name and repository.
93
+
94
+ Examples:
95
+ extensions=['hashfuncs'] # Install hashfuncs from community
96
+ extensions=[{'name': 'hashfuncs'}] # Same as above
97
+ extensions=[{'name': 'spatial', 'repository': 'core_nightly'}]
98
+ extensions=[{'name': 'my_ext', 'repository': 'https://my-repo.com'}]
99
+ fallback_stores: Ordered list of read-only fallback stores.
100
+ **kwargs: Passed to IbisMetadataStore (e.g., hash_algorithm, graph)
101
+ """
102
+ database_str = str(database)
103
+
104
+ # Build connection params for Ibis DuckDB backend
105
+ # Ibis DuckDB backend accepts config params directly (not nested under 'config')
106
+ connection_params = {"database": database_str}
107
+ if config:
108
+ connection_params.update(config)
109
+
110
+ self.database = database_str
111
+ self.extensions = extensions or []
112
+
113
+ # Auto-add hashfuncs extension if not present (needed for default XXHASH64)
114
+ extension_names = [
115
+ ext if isinstance(ext, str) else ext.get("name", "")
116
+ for ext in self.extensions
117
+ ]
118
+ if "hashfuncs" not in extension_names:
119
+ self.extensions.append("hashfuncs")
120
+
121
+ # Initialize Ibis store with DuckDB backend
122
+ super().__init__(
123
+ backend="duckdb",
124
+ connection_params=connection_params,
125
+ fallback_stores=fallback_stores,
126
+ **kwargs,
127
+ )
128
+
129
+ def _get_default_hash_algorithm(self) -> HashAlgorithm:
130
+ """Get default hash algorithm for DuckDB stores.
131
+
132
+ Uses XXHASH64 which requires the hashfuncs extension (lazily loaded).
133
+ """
134
+ return HashAlgorithm.XXHASH64
135
+
136
+ def _supports_native_components(self) -> bool:
137
+ """DuckDB stores support native data version calculations when connection is open."""
138
+ return self._conn is not None
139
+
140
+ def _create_native_components(self):
141
+ """Create components for native SQL execution with DuckDB.
142
+
143
+ Uses DuckDBDataVersionCalculator which handles extension loading lazily.
144
+ Extensions are loaded when the calculator is created (on-demand), not on store open.
145
+ """
146
+ from metaxy.data_versioning.calculators.duckdb import (
147
+ DuckDBDataVersionCalculator,
148
+ )
149
+ from metaxy.data_versioning.diff.narwhals import NarwhalsDiffResolver
150
+ from metaxy.data_versioning.joiners.narwhals import NarwhalsJoiner
151
+
152
+ if self._conn is None:
153
+ raise RuntimeError(
154
+ "Cannot create native data version calculations: store is not open. "
155
+ "Ensure store is used as context manager."
156
+ )
157
+
158
+ # All components accept/return Narwhals LazyFrames
159
+ # DuckDBDataVersionCalculator loads extensions and generates SQL for hashing
160
+ joiner = NarwhalsJoiner()
161
+ calculator = DuckDBDataVersionCalculator(
162
+ backend=self._conn,
163
+ extensions=self.extensions,
164
+ )
165
+ diff_resolver = NarwhalsDiffResolver()
166
+
167
+ return joiner, calculator, diff_resolver
@@ -0,0 +1,43 @@
1
+ """Exceptions for metadata store operations."""
2
+
3
+
4
+ class MetadataStoreError(Exception):
5
+ """Base exception for metadata store errors."""
6
+
7
+ pass
8
+
9
+
10
+ class FeatureNotFoundError(MetadataStoreError):
11
+ """Raised when a feature is not found in the store."""
12
+
13
+ pass
14
+
15
+
16
+ class FieldNotFoundError(MetadataStoreError):
17
+ """Raised when a field is not found for a feature."""
18
+
19
+ pass
20
+
21
+
22
+ class MetadataSchemaError(MetadataStoreError):
23
+ """Raised when metadata DataFrame has invalid schema."""
24
+
25
+ pass
26
+
27
+
28
+ class DependencyError(MetadataStoreError):
29
+ """Raised when upstream dependencies are missing or invalid."""
30
+
31
+ pass
32
+
33
+
34
+ class StoreNotOpenError(MetadataStoreError):
35
+ """Raised when attempting to use a store that hasn't been opened."""
36
+
37
+ pass
38
+
39
+
40
+ class HashAlgorithmNotSupportedError(MetadataStoreError):
41
+ """Raised when a hash algorithm is not supported by the store or its components."""
42
+
43
+ pass