datus-storage-postgresql 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datus_storage_postgresql-0.1.0/PKG-INFO +193 -0
- datus_storage_postgresql-0.1.0/README.md +169 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/__init__.py +1 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/__init__.py +13 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/backend.py +463 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/testing.py +89 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/__init__.py +13 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/backend.py +654 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/schema_converter.py +107 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/testing.py +153 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/PKG-INFO +193 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/SOURCES.txt +18 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/dependency_links.txt +1 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/entry_points.txt +11 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/requires.txt +10 -0
- datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/top_level.txt +1 -0
- datus_storage_postgresql-0.1.0/pyproject.toml +59 -0
- datus_storage_postgresql-0.1.0/setup.cfg +4 -0
- datus_storage_postgresql-0.1.0/tests/test_pg_rdb_backend.py +650 -0
- datus_storage_postgresql-0.1.0/tests/test_pgvector_backend.py +468 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datus-storage-postgresql
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PostgreSQL RDB and pgvector backend adapters for datus-agent
|
|
5
|
+
Author-email: Datus Team <felix.liu@datus.ai>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/Datus-ai/datus-storage-adapters
|
|
8
|
+
Project-URL: Repository, https://github.com/Datus-ai/datus-storage-adapters
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.12
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: datus-storage-base>=0.1.0
|
|
16
|
+
Requires-Dist: psycopg[binary]>=3.1
|
|
17
|
+
Requires-Dist: psycopg-pool>=3.1
|
|
18
|
+
Requires-Dist: pgvector>=0.3.0
|
|
19
|
+
Requires-Dist: pyarrow
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
23
|
+
Requires-Dist: testcontainers[postgres]>=4.0; extra == "dev"
|
|
24
|
+
|
|
25
|
+
# datus-storage-postgresql
|
|
26
|
+
|
|
27
|
+
PostgreSQL storage adapter for [datus-agent](https://github.com/user/Datus-agent). Provides both RDB and Vector backends powered by a single PostgreSQL instance.
|
|
28
|
+
|
|
29
|
+
## Backends
|
|
30
|
+
|
|
31
|
+
### RDB Backend — `PostgresRdbBackend`
|
|
32
|
+
|
|
33
|
+
Implements `BaseRdbBackend` using psycopg v3 and psycopg-pool with a three-layer architecture:
|
|
34
|
+
|
|
35
|
+
- **`PostgresRdbBackend`** (lifecycle): `initialize()`, `connect(namespace, store_db_name)`, `close()`
|
|
36
|
+
- **`PgRdbDatabase`** (database-level, implements `RdbDatabase`): `ensure_table()`, `transaction()`, `close()`
|
|
37
|
+
- **`PgRdbTable`** (table-level, implements `RdbTable`): `insert()`, `query()`, `update()`, `delete()`, `upsert()`
|
|
38
|
+
|
|
39
|
+
Features:
|
|
40
|
+
- Full CRUD via `PgRdbTable` (no need to pass table name)
|
|
41
|
+
- `upsert()` with PostgreSQL `ON CONFLICT` (dataclass record input)
|
|
42
|
+
- `transaction()` context manager with auto-commit/rollback
|
|
43
|
+
- Namespace-based data isolation via PostgreSQL schemas
|
|
44
|
+
- Connection pooling with configurable min/max size
|
|
45
|
+
- Convenience methods on `PgRdbDatabase`: `get_connection()`, `execute()`, `execute_query()`, `execute_insert()`
|
|
46
|
+
|
|
47
|
+
### Vector Backend — `PgvectorBackend`
|
|
48
|
+
|
|
49
|
+
Implements `BaseVectorBackend` using the [pgvector](https://github.com/pgvector/pgvector) extension with a three-layer architecture:
|
|
50
|
+
|
|
51
|
+
- **`PgvectorBackend`** (lifecycle): `initialize()`, `connect(namespace)`, `build_embedding_config()`, `close()`
|
|
52
|
+
- **`PgVectorDb`** (database-level, implements `VectorDatabase`): `table_exists()`, `table_names()`, `create_table()`, `open_table()`, `drop_table()`
|
|
53
|
+
- **`PgVectorTable`** (table-level, implements `VectorTable`): `add()`, `merge_insert()`, `delete()`, `update()`, `search_vector()`, `search_hybrid()`, `search_all()`, `count_rows()`, index operations
|
|
54
|
+
|
|
55
|
+
Features:
|
|
56
|
+
- `WhereExpr` support (`str`, condition AST nodes, or `None`) via `build_where()`
|
|
57
|
+
- Vector similarity search (cosine / L2 / inner product)
|
|
58
|
+
- Automatic embedding computation on insert
|
|
59
|
+
- HNSW vector index, B-tree scalar index, GIN full-text index
|
|
60
|
+
- PyArrow Schema to PostgreSQL DDL mapping
|
|
61
|
+
|
|
62
|
+
## Configuration
|
|
63
|
+
|
|
64
|
+
Both backends register as `type: postgresql` and accept the same configuration parameters. They can point to the same PostgreSQL instance.
|
|
65
|
+
|
|
66
|
+
```yaml
|
|
67
|
+
storage:
|
|
68
|
+
rdb:
|
|
69
|
+
type: postgresql
|
|
70
|
+
host: localhost
|
|
71
|
+
port: 5432
|
|
72
|
+
user: postgres
|
|
73
|
+
password: postgres
|
|
74
|
+
dbname: datus
|
|
75
|
+
pool_min_size: 1
|
|
76
|
+
pool_max_size: 10
|
|
77
|
+
vector:
|
|
78
|
+
type: postgresql
|
|
79
|
+
host: localhost
|
|
80
|
+
port: 5432
|
|
81
|
+
user: postgres
|
|
82
|
+
password: postgres
|
|
83
|
+
dbname: datus
|
|
84
|
+
pool_min_size: 1
|
|
85
|
+
pool_max_size: 10
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Parameters
|
|
89
|
+
|
|
90
|
+
| Parameter | Required | Default | Description |
|
|
91
|
+
|-----------|----------|---------|-------------|
|
|
92
|
+
| `host` | Yes | — | Database host |
|
|
93
|
+
| `port` | Yes | — | Database port |
|
|
94
|
+
| `user` | Yes | — | Username |
|
|
95
|
+
| `password` | Yes | — | Password |
|
|
96
|
+
| `dbname` | Yes | — | Database name |
|
|
97
|
+
| `pool_min_size` | No | `1` | Minimum connections in pool |
|
|
98
|
+
| `pool_max_size` | No | `10` | Maximum connections in pool |
|
|
99
|
+
|
|
100
|
+
The vector backend automatically enables the pgvector extension (`CREATE EXTENSION IF NOT EXISTS vector`) on connect.
|
|
101
|
+
|
|
102
|
+
## Usage
|
|
103
|
+
|
|
104
|
+
### RDB Backend
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from dataclasses import dataclass
|
|
108
|
+
from datus.storage.rdb.base import TableDefinition, ColumnDef
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class User:
|
|
112
|
+
id: int = None
|
|
113
|
+
name: str = None
|
|
114
|
+
email: str = None
|
|
115
|
+
|
|
116
|
+
backend = PostgresRdbBackend()
|
|
117
|
+
backend.initialize(config)
|
|
118
|
+
|
|
119
|
+
# connect() returns a RdbDatabase handle (namespace maps to PG schema)
|
|
120
|
+
db = backend.connect(namespace="my_app", store_db_name="user_store")
|
|
121
|
+
|
|
122
|
+
# ensure_table() returns a RdbTable handle
|
|
123
|
+
users_table = db.ensure_table(TableDefinition(
|
|
124
|
+
table_name="users",
|
|
125
|
+
columns=[
|
|
126
|
+
ColumnDef(name="id", col_type="INTEGER", primary_key=True, autoincrement=True),
|
|
127
|
+
ColumnDef(name="name", col_type="TEXT"),
|
|
128
|
+
ColumnDef(name="email", col_type="TEXT"),
|
|
129
|
+
],
|
|
130
|
+
))
|
|
131
|
+
|
|
132
|
+
# Table-level CRUD (no need to pass table name)
|
|
133
|
+
row_id = users_table.insert(User(name="Alice", email="alice@example.com"))
|
|
134
|
+
users = users_table.query(User, where={"name": "Alice"})
|
|
135
|
+
users_table.update({"email": "new@example.com"}, where={"name": "Alice"})
|
|
136
|
+
users_table.delete(where={"name": "Alice"})
|
|
137
|
+
|
|
138
|
+
# Transaction on database level
|
|
139
|
+
with db.transaction():
|
|
140
|
+
users_table.insert(User(name="Bob", email="bob@example.com"))
|
|
141
|
+
users_table.insert(User(name="Carol", email="carol@example.com"))
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Vector Backend
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from datus.storage.conditions import eq, and_
|
|
148
|
+
|
|
149
|
+
backend = PgvectorBackend()
|
|
150
|
+
backend.initialize(config)
|
|
151
|
+
|
|
152
|
+
# connect() returns a VectorDatabase handle
|
|
153
|
+
db = backend.connect(namespace="my_namespace")
|
|
154
|
+
|
|
155
|
+
# create_table() / open_table() return VectorTable handles
|
|
156
|
+
table = db.create_table("my_table", schema=my_schema, embedding_function=emb_config)
|
|
157
|
+
table = db.open_table("my_table")
|
|
158
|
+
|
|
159
|
+
# Table-level operations (no handle passing)
|
|
160
|
+
table.add(df)
|
|
161
|
+
results = table.search_all(where=eq("category", "active"))
|
|
162
|
+
results = table.search_all(where=and_(eq("status", "active"), eq("type", "A")))
|
|
163
|
+
results = table.search_vector(query_text="hello", vector_column="vector", top_n=10)
|
|
164
|
+
|
|
165
|
+
# Database-level operations
|
|
166
|
+
db.drop_table("my_table", ignore_missing=True)
|
|
167
|
+
assert db.table_exists("my_table") == False
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Entry Points
|
|
171
|
+
|
|
172
|
+
```toml
|
|
173
|
+
[project.entry-points."datus.storage.rdb"]
|
|
174
|
+
postgresql = "datus_storage_postgresql.rdb:register"
|
|
175
|
+
|
|
176
|
+
[project.entry-points."datus.storage.vector"]
|
|
177
|
+
postgresql = "datus_storage_postgresql.vector:register"
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Once installed, datus-agent discovers and registers both backends automatically — no manual wiring needed.
|
|
181
|
+
|
|
182
|
+
## Source Layout
|
|
183
|
+
|
|
184
|
+
```
|
|
185
|
+
datus_storage_postgresql/
|
|
186
|
+
├── rdb/
|
|
187
|
+
│ ├── __init__.py # register() → RdbRegistry
|
|
188
|
+
│ └── backend.py # PostgresRdbBackend
|
|
189
|
+
└── vector/
|
|
190
|
+
├── __init__.py # register() → VectorRegistry
|
|
191
|
+
├── backend.py # PgvectorBackend
|
|
192
|
+
└── schema_converter.py # PyArrow Schema → PostgreSQL DDL
|
|
193
|
+
```
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# datus-storage-postgresql
|
|
2
|
+
|
|
3
|
+
PostgreSQL storage adapter for [datus-agent](https://github.com/user/Datus-agent). Provides both RDB and Vector backends powered by a single PostgreSQL instance.
|
|
4
|
+
|
|
5
|
+
## Backends
|
|
6
|
+
|
|
7
|
+
### RDB Backend — `PostgresRdbBackend`
|
|
8
|
+
|
|
9
|
+
Implements `BaseRdbBackend` using psycopg v3 and psycopg-pool with a three-layer architecture:
|
|
10
|
+
|
|
11
|
+
- **`PostgresRdbBackend`** (lifecycle): `initialize()`, `connect(namespace, store_db_name)`, `close()`
|
|
12
|
+
- **`PgRdbDatabase`** (database-level, implements `RdbDatabase`): `ensure_table()`, `transaction()`, `close()`
|
|
13
|
+
- **`PgRdbTable`** (table-level, implements `RdbTable`): `insert()`, `query()`, `update()`, `delete()`, `upsert()`
|
|
14
|
+
|
|
15
|
+
Features:
|
|
16
|
+
- Full CRUD via `PgRdbTable` (no need to pass table name)
|
|
17
|
+
- `upsert()` with PostgreSQL `ON CONFLICT` (dataclass record input)
|
|
18
|
+
- `transaction()` context manager with auto-commit/rollback
|
|
19
|
+
- Namespace-based data isolation via PostgreSQL schemas
|
|
20
|
+
- Connection pooling with configurable min/max size
|
|
21
|
+
- Convenience methods on `PgRdbDatabase`: `get_connection()`, `execute()`, `execute_query()`, `execute_insert()`
|
|
22
|
+
|
|
23
|
+
### Vector Backend — `PgvectorBackend`
|
|
24
|
+
|
|
25
|
+
Implements `BaseVectorBackend` using the [pgvector](https://github.com/pgvector/pgvector) extension with a three-layer architecture:
|
|
26
|
+
|
|
27
|
+
- **`PgvectorBackend`** (lifecycle): `initialize()`, `connect(namespace)`, `build_embedding_config()`, `close()`
|
|
28
|
+
- **`PgVectorDb`** (database-level, implements `VectorDatabase`): `table_exists()`, `table_names()`, `create_table()`, `open_table()`, `drop_table()`
|
|
29
|
+
- **`PgVectorTable`** (table-level, implements `VectorTable`): `add()`, `merge_insert()`, `delete()`, `update()`, `search_vector()`, `search_hybrid()`, `search_all()`, `count_rows()`, index operations
|
|
30
|
+
|
|
31
|
+
Features:
|
|
32
|
+
- `WhereExpr` support (`str`, condition AST nodes, or `None`) via `build_where()`
|
|
33
|
+
- Vector similarity search (cosine / L2 / inner product)
|
|
34
|
+
- Automatic embedding computation on insert
|
|
35
|
+
- HNSW vector index, B-tree scalar index, GIN full-text index
|
|
36
|
+
- PyArrow Schema to PostgreSQL DDL mapping
|
|
37
|
+
|
|
38
|
+
## Configuration
|
|
39
|
+
|
|
40
|
+
Both backends register as `type: postgresql` and accept the same configuration parameters. They can point to the same PostgreSQL instance.
|
|
41
|
+
|
|
42
|
+
```yaml
|
|
43
|
+
storage:
|
|
44
|
+
rdb:
|
|
45
|
+
type: postgresql
|
|
46
|
+
host: localhost
|
|
47
|
+
port: 5432
|
|
48
|
+
user: postgres
|
|
49
|
+
password: postgres
|
|
50
|
+
dbname: datus
|
|
51
|
+
pool_min_size: 1
|
|
52
|
+
pool_max_size: 10
|
|
53
|
+
vector:
|
|
54
|
+
type: postgresql
|
|
55
|
+
host: localhost
|
|
56
|
+
port: 5432
|
|
57
|
+
user: postgres
|
|
58
|
+
password: postgres
|
|
59
|
+
dbname: datus
|
|
60
|
+
pool_min_size: 1
|
|
61
|
+
pool_max_size: 10
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Parameters
|
|
65
|
+
|
|
66
|
+
| Parameter | Required | Default | Description |
|
|
67
|
+
|-----------|----------|---------|-------------|
|
|
68
|
+
| `host` | Yes | — | Database host |
|
|
69
|
+
| `port` | Yes | — | Database port |
|
|
70
|
+
| `user` | Yes | — | Username |
|
|
71
|
+
| `password` | Yes | — | Password |
|
|
72
|
+
| `dbname` | Yes | — | Database name |
|
|
73
|
+
| `pool_min_size` | No | `1` | Minimum connections in pool |
|
|
74
|
+
| `pool_max_size` | No | `10` | Maximum connections in pool |
|
|
75
|
+
|
|
76
|
+
The vector backend automatically enables the pgvector extension (`CREATE EXTENSION IF NOT EXISTS vector`) on connect.
|
|
77
|
+
|
|
78
|
+
## Usage
|
|
79
|
+
|
|
80
|
+
### RDB Backend
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from dataclasses import dataclass
|
|
84
|
+
from datus.storage.rdb.base import TableDefinition, ColumnDef
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class User:
|
|
88
|
+
id: int = None
|
|
89
|
+
name: str = None
|
|
90
|
+
email: str = None
|
|
91
|
+
|
|
92
|
+
backend = PostgresRdbBackend()
|
|
93
|
+
backend.initialize(config)
|
|
94
|
+
|
|
95
|
+
# connect() returns a RdbDatabase handle (namespace maps to PG schema)
|
|
96
|
+
db = backend.connect(namespace="my_app", store_db_name="user_store")
|
|
97
|
+
|
|
98
|
+
# ensure_table() returns a RdbTable handle
|
|
99
|
+
users_table = db.ensure_table(TableDefinition(
|
|
100
|
+
table_name="users",
|
|
101
|
+
columns=[
|
|
102
|
+
ColumnDef(name="id", col_type="INTEGER", primary_key=True, autoincrement=True),
|
|
103
|
+
ColumnDef(name="name", col_type="TEXT"),
|
|
104
|
+
ColumnDef(name="email", col_type="TEXT"),
|
|
105
|
+
],
|
|
106
|
+
))
|
|
107
|
+
|
|
108
|
+
# Table-level CRUD (no need to pass table name)
|
|
109
|
+
row_id = users_table.insert(User(name="Alice", email="alice@example.com"))
|
|
110
|
+
users = users_table.query(User, where={"name": "Alice"})
|
|
111
|
+
users_table.update({"email": "new@example.com"}, where={"name": "Alice"})
|
|
112
|
+
users_table.delete(where={"name": "Alice"})
|
|
113
|
+
|
|
114
|
+
# Transaction on database level
|
|
115
|
+
with db.transaction():
|
|
116
|
+
users_table.insert(User(name="Bob", email="bob@example.com"))
|
|
117
|
+
users_table.insert(User(name="Carol", email="carol@example.com"))
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Vector Backend
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from datus.storage.conditions import eq, and_
|
|
124
|
+
|
|
125
|
+
backend = PgvectorBackend()
|
|
126
|
+
backend.initialize(config)
|
|
127
|
+
|
|
128
|
+
# connect() returns a VectorDatabase handle
|
|
129
|
+
db = backend.connect(namespace="my_namespace")
|
|
130
|
+
|
|
131
|
+
# create_table() / open_table() return VectorTable handles
|
|
132
|
+
table = db.create_table("my_table", schema=my_schema, embedding_function=emb_config)
|
|
133
|
+
table = db.open_table("my_table")
|
|
134
|
+
|
|
135
|
+
# Table-level operations (no handle passing)
|
|
136
|
+
table.add(df)
|
|
137
|
+
results = table.search_all(where=eq("category", "active"))
|
|
138
|
+
results = table.search_all(where=and_(eq("status", "active"), eq("type", "A")))
|
|
139
|
+
results = table.search_vector(query_text="hello", vector_column="vector", top_n=10)
|
|
140
|
+
|
|
141
|
+
# Database-level operations
|
|
142
|
+
db.drop_table("my_table", ignore_missing=True)
|
|
143
|
+
assert db.table_exists("my_table") == False
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Entry Points
|
|
147
|
+
|
|
148
|
+
```toml
|
|
149
|
+
[project.entry-points."datus.storage.rdb"]
|
|
150
|
+
postgresql = "datus_storage_postgresql.rdb:register"
|
|
151
|
+
|
|
152
|
+
[project.entry-points."datus.storage.vector"]
|
|
153
|
+
postgresql = "datus_storage_postgresql.vector:register"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Once installed, datus-agent discovers and registers both backends automatically — no manual wiring needed.
|
|
157
|
+
|
|
158
|
+
## Source Layout
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
datus_storage_postgresql/
|
|
162
|
+
├── rdb/
|
|
163
|
+
│ ├── __init__.py # register() → RdbRegistry
|
|
164
|
+
│ └── backend.py # PostgresRdbBackend
|
|
165
|
+
└── vector/
|
|
166
|
+
├── __init__.py # register() → VectorRegistry
|
|
167
|
+
├── backend.py # PgvectorBackend
|
|
168
|
+
└── schema_converter.py # PyArrow Schema → PostgreSQL DDL
|
|
169
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""PostgreSQL storage adapters for datus-agent."""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""PostgreSQL RDB backend adapter for datus-agent."""
|
|
2
|
+
|
|
3
|
+
from datus_storage_postgresql.rdb.backend import PostgresRdbBackend
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def register():
|
|
7
|
+
"""Register the PostgreSQL RDB backend with the datus registry."""
|
|
8
|
+
from datus_storage_base.rdb.registry import RdbRegistry
|
|
9
|
+
|
|
10
|
+
RdbRegistry.register("postgresql", PostgresRdbBackend)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
__all__ = ["PostgresRdbBackend", "register"]
|