rhizo 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rhizo-0.1.1/PKG-INFO +88 -0
- rhizo-0.1.1/README.md +59 -0
- rhizo-0.1.1/pyproject.toml +45 -0
- rhizo-0.1.1/rhizo/__init__.py +151 -0
- rhizo-0.1.1/rhizo/__init__.pyi +51 -0
- rhizo-0.1.1/rhizo/cache.py +481 -0
- rhizo-0.1.1/rhizo/engine.py +1413 -0
- rhizo-0.1.1/rhizo/olap_engine.py +808 -0
- rhizo-0.1.1/rhizo/py.typed +0 -0
- rhizo-0.1.1/rhizo/reader.py +701 -0
- rhizo-0.1.1/rhizo/subscriber.py +274 -0
- rhizo-0.1.1/rhizo/transaction.py +351 -0
- rhizo-0.1.1/rhizo/writer.py +334 -0
- rhizo-0.1.1/rhizo.egg-info/PKG-INFO +88 -0
- rhizo-0.1.1/rhizo.egg-info/SOURCES.txt +18 -0
- rhizo-0.1.1/rhizo.egg-info/dependency_links.txt +1 -0
- rhizo-0.1.1/rhizo.egg-info/requires.txt +9 -0
- rhizo-0.1.1/rhizo.egg-info/top_level.txt +1 -0
- rhizo-0.1.1/setup.cfg +4 -0
- rhizo-0.1.1/tests/test_cache.py +354 -0
rhizo-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rhizo
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Data, connected. Versioned data with SQL, time travel, and cross-table ACID transactions.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://rhizodata.dev
|
|
7
|
+
Project-URL: Repository, https://github.com/rhizodata/rhizo
|
|
8
|
+
Keywords: data,sql,duckdb,arrow,versioning,time-travel,transactions
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: rhizo-core>=0.1.0
|
|
22
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
23
|
+
Requires-Dist: duckdb>=0.9.0
|
|
24
|
+
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: maturin>=1.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# rhizo
|
|
31
|
+
|
|
32
|
+
Query layer for [Rhizo](https://rhizodata.dev) - versioned data with SQL, time travel, and cross-table ACID transactions.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install rhizo
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### From Source
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
git clone https://github.com/rhizodata/rhizo.git
|
|
44
|
+
cd rhizo/python
|
|
45
|
+
pip install -e .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Note: Requires `rhizo-core` (Rust) to be built first. See the main [README](https://github.com/rhizodata/rhizo) for full build instructions.
|
|
49
|
+
|
|
50
|
+
## Quick Start
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import rhizo
|
|
54
|
+
from rhizo import QueryEngine
|
|
55
|
+
import pandas as pd
|
|
56
|
+
|
|
57
|
+
# Initialize storage
|
|
58
|
+
store = rhizo.PyChunkStore("./data/chunks")
|
|
59
|
+
catalog = rhizo.PyCatalog("./data/catalog")
|
|
60
|
+
engine = QueryEngine(store, catalog)
|
|
61
|
+
|
|
62
|
+
# Write data
|
|
63
|
+
df = pd.DataFrame({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]})
|
|
64
|
+
engine.write_table("users", df)
|
|
65
|
+
|
|
66
|
+
# SQL queries with DuckDB
|
|
67
|
+
result = engine.query("SELECT * FROM users WHERE id > 1")
|
|
68
|
+
print(result.to_pandas())
|
|
69
|
+
|
|
70
|
+
# Time travel to any version
|
|
71
|
+
result_v1 = engine.query("SELECT * FROM users", versions={"users": 1})
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Features
|
|
75
|
+
|
|
76
|
+
- **SQL Queries**: DuckDB-powered analytical queries
|
|
77
|
+
- **Time Travel**: Query any historical version
|
|
78
|
+
- **Cross-Table ACID**: Atomic transactions across multiple tables
|
|
79
|
+
- **Git-like Branching**: Zero-copy branches for experimentation
|
|
80
|
+
- **Change Tracking**: Subscribe to data changes
|
|
81
|
+
|
|
82
|
+
## Documentation
|
|
83
|
+
|
|
84
|
+
See [rhizodata.dev](https://rhizodata.dev) for full documentation.
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
MIT
|
rhizo-0.1.1/README.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# rhizo
|
|
2
|
+
|
|
3
|
+
Query layer for [Rhizo](https://rhizodata.dev) - versioned data with SQL, time travel, and cross-table ACID transactions.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install rhizo
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### From Source
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
git clone https://github.com/rhizodata/rhizo.git
|
|
15
|
+
cd rhizo/python
|
|
16
|
+
pip install -e .
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Note: Requires `rhizo-core` (Rust) to be built first. See the main [README](https://github.com/rhizodata/rhizo) for full build instructions.
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
import rhizo
|
|
25
|
+
from rhizo import QueryEngine
|
|
26
|
+
import pandas as pd
|
|
27
|
+
|
|
28
|
+
# Initialize storage
|
|
29
|
+
store = rhizo.PyChunkStore("./data/chunks")
|
|
30
|
+
catalog = rhizo.PyCatalog("./data/catalog")
|
|
31
|
+
engine = QueryEngine(store, catalog)
|
|
32
|
+
|
|
33
|
+
# Write data
|
|
34
|
+
df = pd.DataFrame({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]})
|
|
35
|
+
engine.write_table("users", df)
|
|
36
|
+
|
|
37
|
+
# SQL queries with DuckDB
|
|
38
|
+
result = engine.query("SELECT * FROM users WHERE id > 1")
|
|
39
|
+
print(result.to_pandas())
|
|
40
|
+
|
|
41
|
+
# Time travel to any version
|
|
42
|
+
result_v1 = engine.query("SELECT * FROM users", versions={"users": 1})
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- **SQL Queries**: DuckDB-powered analytical queries
|
|
48
|
+
- **Time Travel**: Query any historical version
|
|
49
|
+
- **Cross-Table ACID**: Atomic transactions across multiple tables
|
|
50
|
+
- **Git-like Branching**: Zero-copy branches for experimentation
|
|
51
|
+
- **Change Tracking**: Subscribe to data changes
|
|
52
|
+
|
|
53
|
+
## Documentation
|
|
54
|
+
|
|
55
|
+
See [rhizodata.dev](https://rhizodata.dev) for full documentation.
|
|
56
|
+
|
|
57
|
+
## License
|
|
58
|
+
|
|
59
|
+
MIT
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "rhizo"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Data, connected. Versioned data with SQL, time travel, and cross-table ACID transactions."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["data", "sql", "duckdb", "arrow", "versioning", "time-travel", "transactions"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Database",
|
|
23
|
+
"Topic :: Software Development :: Libraries",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"rhizo-core>=0.1.0",
|
|
27
|
+
"pyarrow>=14.0.0",
|
|
28
|
+
"duckdb>=0.9.0",
|
|
29
|
+
"pandas>=2.0.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://rhizodata.dev"
|
|
34
|
+
Repository = "https://github.com/rhizodata/rhizo"
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.0.0",
|
|
39
|
+
"maturin>=1.0.0",
|
|
40
|
+
"ruff>=0.1.0",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["."]
|
|
45
|
+
include = ["rhizo*"]
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rhizo - Data, connected. SQL queries over versioned, content-addressable data.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- TableWriter: Write DataFrames as chunked Parquet files
|
|
6
|
+
- TableReader: Read and assemble tables from chunks
|
|
7
|
+
- QueryEngine: SQL interface with time travel support (DuckDB-based)
|
|
8
|
+
- OLAPEngine: High-performance analytical queries (DataFusion-based)
|
|
9
|
+
- CacheManager: LRU cache for Arrow tables
|
|
10
|
+
- TransactionContext: ACID transactions across multiple tables
|
|
11
|
+
- Subscriber: Stream changelog events with polling or callbacks
|
|
12
|
+
- ChangeEvent: Individual table change within a transaction
|
|
13
|
+
- Filter: Predicate filter builder for pushdown optimization
|
|
14
|
+
|
|
15
|
+
Low-level types (from _rhizo):
|
|
16
|
+
- PyChunkStore: Content-addressable chunk storage
|
|
17
|
+
- PyCatalog: Table version catalog
|
|
18
|
+
- PyBranchManager: Git-like branching
|
|
19
|
+
- PyTransactionManager: Cross-table ACID transactions
|
|
20
|
+
- PyMerkleConfig, merkle_build_tree, merkle_diff_trees, merkle_verify_tree: Merkle tree operations
|
|
21
|
+
- PyParquetEncoder, PyParquetDecoder: High-performance Parquet I/O
|
|
22
|
+
- PyPredicateFilter: Predicate pushdown filters
|
|
23
|
+
- PyOpType, PyAlgebraicValue: Algebraic merge types
|
|
24
|
+
- PyTableAlgebraicSchema, PyAlgebraicSchemaRegistry: Schema-level merge configuration
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from .writer import TableWriter
|
|
28
|
+
from .reader import TableReader, Filter
|
|
29
|
+
from .engine import QueryEngine
|
|
30
|
+
from .transaction import TransactionContext
|
|
31
|
+
from .subscriber import Subscriber, ChangeEvent
|
|
32
|
+
from .cache import CacheManager, CacheKey, CacheStats
|
|
33
|
+
from .olap_engine import OLAPEngine, is_datafusion_available
|
|
34
|
+
|
|
35
|
+
# Re-export low-level types from _rhizo for convenience
|
|
36
|
+
from _rhizo import (
|
|
37
|
+
PyChunkStore,
|
|
38
|
+
PyCatalog,
|
|
39
|
+
PyBranchManager,
|
|
40
|
+
PyTransactionManager,
|
|
41
|
+
PyTableVersion,
|
|
42
|
+
PyBranch,
|
|
43
|
+
PyBranchDiff,
|
|
44
|
+
PyMerkleConfig,
|
|
45
|
+
PyMerkleTree,
|
|
46
|
+
PyMerkleDiff,
|
|
47
|
+
PyDataChunk,
|
|
48
|
+
PyMerkleNode,
|
|
49
|
+
merkle_build_tree,
|
|
50
|
+
merkle_diff_trees,
|
|
51
|
+
merkle_verify_tree,
|
|
52
|
+
PyParquetEncoder,
|
|
53
|
+
PyParquetDecoder,
|
|
54
|
+
PyPredicateFilter,
|
|
55
|
+
PyFilterOp,
|
|
56
|
+
PyScalarValue,
|
|
57
|
+
PyChangelogEntry,
|
|
58
|
+
PyTableChange,
|
|
59
|
+
PyTransactionInfo,
|
|
60
|
+
PyRecoveryReport,
|
|
61
|
+
# Algebraic types
|
|
62
|
+
PyOpType,
|
|
63
|
+
PyAlgebraicValue,
|
|
64
|
+
PyTableAlgebraicSchema,
|
|
65
|
+
PyAlgebraicSchemaRegistry,
|
|
66
|
+
PyMergeAnalysis,
|
|
67
|
+
PyMergeOutcome,
|
|
68
|
+
algebraic_merge,
|
|
69
|
+
# Distributed types (coordination-free transactions)
|
|
70
|
+
PyNodeId,
|
|
71
|
+
PyCausalOrder,
|
|
72
|
+
PyVectorClock,
|
|
73
|
+
# Local commit protocol (coordination-free transactions)
|
|
74
|
+
PyAlgebraicOperation,
|
|
75
|
+
PyAlgebraicTransaction,
|
|
76
|
+
PyVersionedUpdate,
|
|
77
|
+
PyLocalCommitProtocol,
|
|
78
|
+
# Simulation types (multi-node convergence testing)
|
|
79
|
+
PyNetworkCondition,
|
|
80
|
+
PySimulationConfig,
|
|
81
|
+
PySimulationStats,
|
|
82
|
+
PySimulatedNode,
|
|
83
|
+
PySimulatedCluster,
|
|
84
|
+
PySimulationBuilder,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
__version__ = "0.4.0"
|
|
88
|
+
__all__ = [
|
|
89
|
+
# High-level API
|
|
90
|
+
"TableWriter",
|
|
91
|
+
"TableReader",
|
|
92
|
+
"QueryEngine",
|
|
93
|
+
"OLAPEngine",
|
|
94
|
+
"CacheManager",
|
|
95
|
+
"CacheKey",
|
|
96
|
+
"CacheStats",
|
|
97
|
+
"is_datafusion_available",
|
|
98
|
+
"TransactionContext",
|
|
99
|
+
"Subscriber",
|
|
100
|
+
"ChangeEvent",
|
|
101
|
+
"Filter",
|
|
102
|
+
# Low-level types
|
|
103
|
+
"PyChunkStore",
|
|
104
|
+
"PyCatalog",
|
|
105
|
+
"PyBranchManager",
|
|
106
|
+
"PyTransactionManager",
|
|
107
|
+
"PyTableVersion",
|
|
108
|
+
"PyBranch",
|
|
109
|
+
"PyBranchDiff",
|
|
110
|
+
"PyMerkleConfig",
|
|
111
|
+
"PyMerkleTree",
|
|
112
|
+
"PyMerkleDiff",
|
|
113
|
+
"PyDataChunk",
|
|
114
|
+
"PyMerkleNode",
|
|
115
|
+
"merkle_build_tree",
|
|
116
|
+
"merkle_diff_trees",
|
|
117
|
+
"merkle_verify_tree",
|
|
118
|
+
"PyParquetEncoder",
|
|
119
|
+
"PyParquetDecoder",
|
|
120
|
+
"PyPredicateFilter",
|
|
121
|
+
"PyFilterOp",
|
|
122
|
+
"PyScalarValue",
|
|
123
|
+
"PyChangelogEntry",
|
|
124
|
+
"PyTableChange",
|
|
125
|
+
"PyTransactionInfo",
|
|
126
|
+
"PyRecoveryReport",
|
|
127
|
+
# Algebraic types
|
|
128
|
+
"PyOpType",
|
|
129
|
+
"PyAlgebraicValue",
|
|
130
|
+
"PyTableAlgebraicSchema",
|
|
131
|
+
"PyAlgebraicSchemaRegistry",
|
|
132
|
+
"PyMergeAnalysis",
|
|
133
|
+
"PyMergeOutcome",
|
|
134
|
+
"algebraic_merge",
|
|
135
|
+
# Distributed types
|
|
136
|
+
"PyNodeId",
|
|
137
|
+
"PyCausalOrder",
|
|
138
|
+
"PyVectorClock",
|
|
139
|
+
# Local commit protocol
|
|
140
|
+
"PyAlgebraicOperation",
|
|
141
|
+
"PyAlgebraicTransaction",
|
|
142
|
+
"PyVersionedUpdate",
|
|
143
|
+
"PyLocalCommitProtocol",
|
|
144
|
+
# Simulation types
|
|
145
|
+
"PyNetworkCondition",
|
|
146
|
+
"PySimulationConfig",
|
|
147
|
+
"PySimulationStats",
|
|
148
|
+
"PySimulatedNode",
|
|
149
|
+
"PySimulatedCluster",
|
|
150
|
+
"PySimulationBuilder",
|
|
151
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Type stubs for the rhizo package."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
# High-level API
|
|
6
|
+
from .writer import TableWriter as TableWriter
|
|
7
|
+
from .reader import TableReader as TableReader, Filter as Filter
|
|
8
|
+
from .engine import QueryEngine as QueryEngine
|
|
9
|
+
from .transaction import TransactionContext as TransactionContext
|
|
10
|
+
from .subscriber import Subscriber as Subscriber, ChangeEvent as ChangeEvent
|
|
11
|
+
from .cache import CacheManager as CacheManager, CacheKey as CacheKey, CacheStats as CacheStats
|
|
12
|
+
from .olap_engine import OLAPEngine as OLAPEngine, is_datafusion_available as is_datafusion_available
|
|
13
|
+
|
|
14
|
+
# Re-exports from _rhizo
|
|
15
|
+
from _rhizo import (
|
|
16
|
+
PyChunkStore as PyChunkStore,
|
|
17
|
+
PyCatalog as PyCatalog,
|
|
18
|
+
PyBranchManager as PyBranchManager,
|
|
19
|
+
PyTransactionManager as PyTransactionManager,
|
|
20
|
+
PyTableVersion as PyTableVersion,
|
|
21
|
+
PyBranch as PyBranch,
|
|
22
|
+
PyBranchDiff as PyBranchDiff,
|
|
23
|
+
PyMerkleConfig as PyMerkleConfig,
|
|
24
|
+
PyMerkleTree as PyMerkleTree,
|
|
25
|
+
PyMerkleDiff as PyMerkleDiff,
|
|
26
|
+
PyDataChunk as PyDataChunk,
|
|
27
|
+
PyMerkleNode as PyMerkleNode,
|
|
28
|
+
merkle_build_tree as merkle_build_tree,
|
|
29
|
+
merkle_diff_trees as merkle_diff_trees,
|
|
30
|
+
merkle_verify_tree as merkle_verify_tree,
|
|
31
|
+
PyParquetEncoder as PyParquetEncoder,
|
|
32
|
+
PyParquetDecoder as PyParquetDecoder,
|
|
33
|
+
PyPredicateFilter as PyPredicateFilter,
|
|
34
|
+
PyFilterOp as PyFilterOp,
|
|
35
|
+
PyScalarValue as PyScalarValue,
|
|
36
|
+
PyChangelogEntry as PyChangelogEntry,
|
|
37
|
+
PyTableChange as PyTableChange,
|
|
38
|
+
PyTransactionInfo as PyTransactionInfo,
|
|
39
|
+
PyRecoveryReport as PyRecoveryReport,
|
|
40
|
+
# Algebraic types
|
|
41
|
+
PyOpType as PyOpType,
|
|
42
|
+
PyAlgebraicValue as PyAlgebraicValue,
|
|
43
|
+
PyTableAlgebraicSchema as PyTableAlgebraicSchema,
|
|
44
|
+
PyAlgebraicSchemaRegistry as PyAlgebraicSchemaRegistry,
|
|
45
|
+
PyMergeAnalysis as PyMergeAnalysis,
|
|
46
|
+
PyMergeOutcome as PyMergeOutcome,
|
|
47
|
+
algebraic_merge as algebraic_merge,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
__version__: str
|
|
51
|
+
__all__: List[str]
|