datus-storage-postgresql 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. datus_storage_postgresql-0.1.0/PKG-INFO +193 -0
  2. datus_storage_postgresql-0.1.0/README.md +169 -0
  3. datus_storage_postgresql-0.1.0/datus_storage_postgresql/__init__.py +1 -0
  4. datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/__init__.py +13 -0
  5. datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/backend.py +463 -0
  6. datus_storage_postgresql-0.1.0/datus_storage_postgresql/rdb/testing.py +89 -0
  7. datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/__init__.py +13 -0
  8. datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/backend.py +654 -0
  9. datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/schema_converter.py +107 -0
  10. datus_storage_postgresql-0.1.0/datus_storage_postgresql/vector/testing.py +153 -0
  11. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/PKG-INFO +193 -0
  12. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/SOURCES.txt +18 -0
  13. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/dependency_links.txt +1 -0
  14. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/entry_points.txt +11 -0
  15. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/requires.txt +10 -0
  16. datus_storage_postgresql-0.1.0/datus_storage_postgresql.egg-info/top_level.txt +1 -0
  17. datus_storage_postgresql-0.1.0/pyproject.toml +59 -0
  18. datus_storage_postgresql-0.1.0/setup.cfg +4 -0
  19. datus_storage_postgresql-0.1.0/tests/test_pg_rdb_backend.py +650 -0
  20. datus_storage_postgresql-0.1.0/tests/test_pgvector_backend.py +468 -0
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: datus-storage-postgresql
3
+ Version: 0.1.0
4
+ Summary: PostgreSQL RDB and pgvector backend adapters for datus-agent
5
+ Author-email: Datus Team <felix.liu@datus.ai>
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/Datus-ai/datus-storage-adapters
8
+ Project-URL: Repository, https://github.com/Datus-ai/datus-storage-adapters
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.12
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: datus-storage-base>=0.1.0
16
+ Requires-Dist: psycopg[binary]>=3.1
17
+ Requires-Dist: psycopg-pool>=3.1
18
+ Requires-Dist: pgvector>=0.3.0
19
+ Requires-Dist: pyarrow
20
+ Requires-Dist: pandas
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=8.0; extra == "dev"
23
+ Requires-Dist: testcontainers[postgres]>=4.0; extra == "dev"
24
+
25
+ # datus-storage-postgresql
26
+
27
+ PostgreSQL storage adapter for [datus-agent](https://github.com/user/Datus-agent). Provides both RDB and Vector backends powered by a single PostgreSQL instance.
28
+
29
+ ## Backends
30
+
31
+ ### RDB Backend — `PostgresRdbBackend`
32
+
33
+ Implements `BaseRdbBackend` using psycopg v3 and psycopg-pool with a three-layer architecture:
34
+
35
+ - **`PostgresRdbBackend`** (lifecycle): `initialize()`, `connect(namespace, store_db_name)`, `close()`
36
+ - **`PgRdbDatabase`** (database-level, implements `RdbDatabase`): `ensure_table()`, `transaction()`, `close()`
37
+ - **`PgRdbTable`** (table-level, implements `RdbTable`): `insert()`, `query()`, `update()`, `delete()`, `upsert()`
38
+
39
+ Features:
40
+ - Full CRUD via `PgRdbTable` (no need to pass table name)
41
+ - `upsert()` with PostgreSQL `ON CONFLICT` (dataclass record input)
42
+ - `transaction()` context manager with auto-commit/rollback
43
+ - Namespace-based data isolation via PostgreSQL schemas
44
+ - Connection pooling with configurable min/max size
45
+ - Convenience methods on `PgRdbDatabase`: `get_connection()`, `execute()`, `execute_query()`, `execute_insert()`
46
+
47
+ ### Vector Backend — `PgvectorBackend`
48
+
49
+ Implements `BaseVectorBackend` using the [pgvector](https://github.com/pgvector/pgvector) extension with a three-layer architecture:
50
+
51
+ - **`PgvectorBackend`** (lifecycle): `initialize()`, `connect(namespace)`, `build_embedding_config()`, `close()`
52
+ - **`PgVectorDb`** (database-level, implements `VectorDatabase`): `table_exists()`, `table_names()`, `create_table()`, `open_table()`, `drop_table()`
53
+ - **`PgVectorTable`** (table-level, implements `VectorTable`): `add()`, `merge_insert()`, `delete()`, `update()`, `search_vector()`, `search_hybrid()`, `search_all()`, `count_rows()`, index operations
54
+
55
+ Features:
56
+ - `WhereExpr` support (`str`, condition AST nodes, or `None`) via `build_where()`
57
+ - Vector similarity search (cosine / L2 / inner product)
58
+ - Automatic embedding computation on insert
59
+ - HNSW vector index, B-tree scalar index, GIN full-text index
60
+ - PyArrow Schema to PostgreSQL DDL mapping
61
+
62
+ ## Configuration
63
+
64
+ Both backends register as `type: postgresql` and accept the same configuration parameters. They can point to the same PostgreSQL instance.
65
+
66
+ ```yaml
67
+ storage:
68
+ rdb:
69
+ type: postgresql
70
+ host: localhost
71
+ port: 5432
72
+ user: postgres
73
+ password: postgres
74
+ dbname: datus
75
+ pool_min_size: 1
76
+ pool_max_size: 10
77
+ vector:
78
+ type: postgresql
79
+ host: localhost
80
+ port: 5432
81
+ user: postgres
82
+ password: postgres
83
+ dbname: datus
84
+ pool_min_size: 1
85
+ pool_max_size: 10
86
+ ```
87
+
88
+ ### Parameters
89
+
90
+ | Parameter | Required | Default | Description |
91
+ |-----------|----------|---------|-------------|
92
+ | `host` | Yes | — | Database host |
93
+ | `port` | Yes | — | Database port |
94
+ | `user` | Yes | — | Username |
95
+ | `password` | Yes | — | Password |
96
+ | `dbname` | Yes | — | Database name |
97
+ | `pool_min_size` | No | `1` | Minimum connections in pool |
98
+ | `pool_max_size` | No | `10` | Maximum connections in pool |
99
+
100
+ The vector backend automatically enables the pgvector extension (`CREATE EXTENSION IF NOT EXISTS vector`) on connect.
101
+
102
+ ## Usage
103
+
104
+ ### RDB Backend
105
+
106
+ ```python
107
+ from dataclasses import dataclass
108
+ from datus.storage.rdb.base import TableDefinition, ColumnDef
109
+
110
+ @dataclass
111
+ class User:
112
+ id: int = None
113
+ name: str = None
114
+ email: str = None
115
+
116
+ backend = PostgresRdbBackend()
117
+ backend.initialize(config)
118
+
119
+ # connect() returns a RdbDatabase handle (namespace maps to PG schema)
120
+ db = backend.connect(namespace="my_app", store_db_name="user_store")
121
+
122
+ # ensure_table() returns a RdbTable handle
123
+ users_table = db.ensure_table(TableDefinition(
124
+ table_name="users",
125
+ columns=[
126
+ ColumnDef(name="id", col_type="INTEGER", primary_key=True, autoincrement=True),
127
+ ColumnDef(name="name", col_type="TEXT"),
128
+ ColumnDef(name="email", col_type="TEXT"),
129
+ ],
130
+ ))
131
+
132
+ # Table-level CRUD (no need to pass table name)
133
+ row_id = users_table.insert(User(name="Alice", email="alice@example.com"))
134
+ users = users_table.query(User, where={"name": "Alice"})
135
+ users_table.update({"email": "new@example.com"}, where={"name": "Alice"})
136
+ users_table.delete(where={"name": "Alice"})
137
+
138
+ # Transaction on database level
139
+ with db.transaction():
140
+ users_table.insert(User(name="Bob", email="bob@example.com"))
141
+ users_table.insert(User(name="Carol", email="carol@example.com"))
142
+ ```
143
+
144
+ ### Vector Backend
145
+
146
+ ```python
147
+ from datus.storage.conditions import eq, and_
148
+
149
+ backend = PgvectorBackend()
150
+ backend.initialize(config)
151
+
152
+ # connect() returns a VectorDatabase handle
153
+ db = backend.connect(namespace="my_namespace")
154
+
155
+ # create_table() / open_table() return VectorTable handles
156
+ table = db.create_table("my_table", schema=my_schema, embedding_function=emb_config)
157
+ table = db.open_table("my_table")
158
+
159
+ # Table-level operations (no handle passing)
160
+ table.add(df)
161
+ results = table.search_all(where=eq("category", "active"))
162
+ results = table.search_all(where=and_(eq("status", "active"), eq("type", "A")))
163
+ results = table.search_vector(query_text="hello", vector_column="vector", top_n=10)
164
+
165
+ # Database-level operations
166
+ db.drop_table("my_table", ignore_missing=True)
167
+ assert db.table_exists("my_table") == False
168
+ ```
169
+
170
+ ## Entry Points
171
+
172
+ ```toml
173
+ [project.entry-points."datus.storage.rdb"]
174
+ postgresql = "datus_storage_postgresql.rdb:register"
175
+
176
+ [project.entry-points."datus.storage.vector"]
177
+ postgresql = "datus_storage_postgresql.vector:register"
178
+ ```
179
+
180
+ Once installed, datus-agent discovers and registers both backends automatically — no manual wiring needed.
181
+
182
+ ## Source Layout
183
+
184
+ ```
185
+ datus_storage_postgresql/
186
+ ├── rdb/
187
+ │ ├── __init__.py # register() → RdbRegistry
188
+ │ └── backend.py # PostgresRdbBackend
189
+ └── vector/
190
+ ├── __init__.py # register() → VectorRegistry
191
+ ├── backend.py # PgvectorBackend
192
+ └── schema_converter.py # PyArrow Schema → PostgreSQL DDL
193
+ ```
@@ -0,0 +1,169 @@
1
+ # datus-storage-postgresql
2
+
3
+ PostgreSQL storage adapter for [datus-agent](https://github.com/user/Datus-agent). Provides both RDB and Vector backends powered by a single PostgreSQL instance.
4
+
5
+ ## Backends
6
+
7
+ ### RDB Backend — `PostgresRdbBackend`
8
+
9
+ Implements `BaseRdbBackend` using psycopg v3 and psycopg-pool with a three-layer architecture:
10
+
11
+ - **`PostgresRdbBackend`** (lifecycle): `initialize()`, `connect(namespace, store_db_name)`, `close()`
12
+ - **`PgRdbDatabase`** (database-level, implements `RdbDatabase`): `ensure_table()`, `transaction()`, `close()`
13
+ - **`PgRdbTable`** (table-level, implements `RdbTable`): `insert()`, `query()`, `update()`, `delete()`, `upsert()`
14
+
15
+ Features:
16
+ - Full CRUD via `PgRdbTable` (no need to pass table name)
17
+ - `upsert()` with PostgreSQL `ON CONFLICT` (dataclass record input)
18
+ - `transaction()` context manager with auto-commit/rollback
19
+ - Namespace-based data isolation via PostgreSQL schemas
20
+ - Connection pooling with configurable min/max size
21
+ - Convenience methods on `PgRdbDatabase`: `get_connection()`, `execute()`, `execute_query()`, `execute_insert()`
22
+
23
+ ### Vector Backend — `PgvectorBackend`
24
+
25
+ Implements `BaseVectorBackend` using the [pgvector](https://github.com/pgvector/pgvector) extension with a three-layer architecture:
26
+
27
+ - **`PgvectorBackend`** (lifecycle): `initialize()`, `connect(namespace)`, `build_embedding_config()`, `close()`
28
+ - **`PgVectorDb`** (database-level, implements `VectorDatabase`): `table_exists()`, `table_names()`, `create_table()`, `open_table()`, `drop_table()`
29
+ - **`PgVectorTable`** (table-level, implements `VectorTable`): `add()`, `merge_insert()`, `delete()`, `update()`, `search_vector()`, `search_hybrid()`, `search_all()`, `count_rows()`, index operations
30
+
31
+ Features:
32
+ - `WhereExpr` support (`str`, condition AST nodes, or `None`) via `build_where()`
33
+ - Vector similarity search (cosine / L2 / inner product)
34
+ - Automatic embedding computation on insert
35
+ - HNSW vector index, B-tree scalar index, GIN full-text index
36
+ - PyArrow Schema to PostgreSQL DDL mapping
37
+
38
+ ## Configuration
39
+
40
+ Both backends register as `type: postgresql` and accept the same configuration parameters. They can point to the same PostgreSQL instance.
41
+
42
+ ```yaml
43
+ storage:
44
+ rdb:
45
+ type: postgresql
46
+ host: localhost
47
+ port: 5432
48
+ user: postgres
49
+ password: postgres
50
+ dbname: datus
51
+ pool_min_size: 1
52
+ pool_max_size: 10
53
+ vector:
54
+ type: postgresql
55
+ host: localhost
56
+ port: 5432
57
+ user: postgres
58
+ password: postgres
59
+ dbname: datus
60
+ pool_min_size: 1
61
+ pool_max_size: 10
62
+ ```
63
+
64
+ ### Parameters
65
+
66
+ | Parameter | Required | Default | Description |
67
+ |-----------|----------|---------|-------------|
68
+ | `host` | Yes | — | Database host |
69
+ | `port` | Yes | — | Database port |
70
+ | `user` | Yes | — | Username |
71
+ | `password` | Yes | — | Password |
72
+ | `dbname` | Yes | — | Database name |
73
+ | `pool_min_size` | No | `1` | Minimum connections in pool |
74
+ | `pool_max_size` | No | `10` | Maximum connections in pool |
75
+
76
+ The vector backend automatically enables the pgvector extension (`CREATE EXTENSION IF NOT EXISTS vector`) on connect.
77
+
78
+ ## Usage
79
+
80
+ ### RDB Backend
81
+
82
+ ```python
83
+ from dataclasses import dataclass
84
+ from datus.storage.rdb.base import TableDefinition, ColumnDef
85
+
86
+ @dataclass
87
+ class User:
88
+ id: int = None
89
+ name: str = None
90
+ email: str = None
91
+
92
+ backend = PostgresRdbBackend()
93
+ backend.initialize(config)
94
+
95
+ # connect() returns a RdbDatabase handle (namespace maps to PG schema)
96
+ db = backend.connect(namespace="my_app", store_db_name="user_store")
97
+
98
+ # ensure_table() returns a RdbTable handle
99
+ users_table = db.ensure_table(TableDefinition(
100
+ table_name="users",
101
+ columns=[
102
+ ColumnDef(name="id", col_type="INTEGER", primary_key=True, autoincrement=True),
103
+ ColumnDef(name="name", col_type="TEXT"),
104
+ ColumnDef(name="email", col_type="TEXT"),
105
+ ],
106
+ ))
107
+
108
+ # Table-level CRUD (no need to pass table name)
109
+ row_id = users_table.insert(User(name="Alice", email="alice@example.com"))
110
+ users = users_table.query(User, where={"name": "Alice"})
111
+ users_table.update({"email": "new@example.com"}, where={"name": "Alice"})
112
+ users_table.delete(where={"name": "Alice"})
113
+
114
+ # Transaction on database level
115
+ with db.transaction():
116
+ users_table.insert(User(name="Bob", email="bob@example.com"))
117
+ users_table.insert(User(name="Carol", email="carol@example.com"))
118
+ ```
119
+
120
+ ### Vector Backend
121
+
122
+ ```python
123
+ from datus.storage.conditions import eq, and_
124
+
125
+ backend = PgvectorBackend()
126
+ backend.initialize(config)
127
+
128
+ # connect() returns a VectorDatabase handle
129
+ db = backend.connect(namespace="my_namespace")
130
+
131
+ # create_table() / open_table() return VectorTable handles
132
+ table = db.create_table("my_table", schema=my_schema, embedding_function=emb_config)
133
+ table = db.open_table("my_table")
134
+
135
+ # Table-level operations (no handle passing)
136
+ table.add(df)
137
+ results = table.search_all(where=eq("category", "active"))
138
+ results = table.search_all(where=and_(eq("status", "active"), eq("type", "A")))
139
+ results = table.search_vector(query_text="hello", vector_column="vector", top_n=10)
140
+
141
+ # Database-level operations
142
+ db.drop_table("my_table", ignore_missing=True)
143
+ assert db.table_exists("my_table") == False
144
+ ```
145
+
146
+ ## Entry Points
147
+
148
+ ```toml
149
+ [project.entry-points."datus.storage.rdb"]
150
+ postgresql = "datus_storage_postgresql.rdb:register"
151
+
152
+ [project.entry-points."datus.storage.vector"]
153
+ postgresql = "datus_storage_postgresql.vector:register"
154
+ ```
155
+
156
+ Once installed, datus-agent discovers and registers both backends automatically — no manual wiring needed.
157
+
158
+ ## Source Layout
159
+
160
+ ```
161
+ datus_storage_postgresql/
162
+ ├── rdb/
163
+ │ ├── __init__.py # register() → RdbRegistry
164
+ │ └── backend.py # PostgresRdbBackend
165
+ └── vector/
166
+ ├── __init__.py # register() → VectorRegistry
167
+ ├── backend.py # PgvectorBackend
168
+ └── schema_converter.py # PyArrow Schema → PostgreSQL DDL
169
+ ```
@@ -0,0 +1 @@
1
+ """PostgreSQL storage adapters for datus-agent."""
@@ -0,0 +1,13 @@
1
+ """PostgreSQL RDB backend adapter for datus-agent."""
2
+
3
+ from datus_storage_postgresql.rdb.backend import PostgresRdbBackend
4
+
5
+
6
+ def register():
7
+ """Register the PostgreSQL RDB backend with the datus registry."""
8
+ from datus_storage_base.rdb.registry import RdbRegistry
9
+
10
+ RdbRegistry.register("postgresql", PostgresRdbBackend)
11
+
12
+
13
+ __all__ = ["PostgresRdbBackend", "register"]