datus-postgresql 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datus_postgresql-0.1.5/.gitignore +143 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/PKG-INFO +3 -3
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/connector.py +181 -57
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/pyproject.toml +3 -3
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/test_connector_unit.py +120 -6
- datus_postgresql-0.1.5/tests/unit/test_migration_mixin.py +89 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/README.md +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/__init__.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/config.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/handlers.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/docker-compose.yml +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/scripts/init_tpch_data.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/__init__.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/conftest.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/__init__.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/conftest.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/test_integration.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/test_tpch.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/__init__.py +0 -0
- {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/test_config.py +0 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
*.manifest
|
|
32
|
+
*.spec
|
|
33
|
+
|
|
34
|
+
# Installer logs
|
|
35
|
+
pip-log.txt
|
|
36
|
+
pip-delete-this-directory.txt
|
|
37
|
+
|
|
38
|
+
# Unit test / coverage reports
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
.cache
|
|
45
|
+
nosetests.xml
|
|
46
|
+
coverage.xml
|
|
47
|
+
*.cover
|
|
48
|
+
*.py,cover
|
|
49
|
+
.hypothesis/
|
|
50
|
+
.pytest_cache/
|
|
51
|
+
|
|
52
|
+
# Translations
|
|
53
|
+
*.mo
|
|
54
|
+
*.pot
|
|
55
|
+
|
|
56
|
+
# Django stuff:
|
|
57
|
+
*.log
|
|
58
|
+
local_settings.py
|
|
59
|
+
db.sqlite3
|
|
60
|
+
db.sqlite3-journal
|
|
61
|
+
|
|
62
|
+
# Flask stuff:
|
|
63
|
+
instance/
|
|
64
|
+
.webassets-cache
|
|
65
|
+
|
|
66
|
+
# Scrapy stuff:
|
|
67
|
+
.scrapy
|
|
68
|
+
|
|
69
|
+
# Sphinx documentation
|
|
70
|
+
docs/_build/
|
|
71
|
+
|
|
72
|
+
# PyBuilder
|
|
73
|
+
target/
|
|
74
|
+
|
|
75
|
+
# Jupyter Notebook
|
|
76
|
+
.ipynb_checkpoints
|
|
77
|
+
|
|
78
|
+
# IPython
|
|
79
|
+
profile_default/
|
|
80
|
+
ipython_config.py
|
|
81
|
+
|
|
82
|
+
# pyenv
|
|
83
|
+
.python-version
|
|
84
|
+
|
|
85
|
+
# pipenv
|
|
86
|
+
Pipfile.lock
|
|
87
|
+
|
|
88
|
+
# uv
|
|
89
|
+
uv.lock
|
|
90
|
+
|
|
91
|
+
# PEP 582
|
|
92
|
+
__pypackages__/
|
|
93
|
+
|
|
94
|
+
# Celery stuff
|
|
95
|
+
celerybeat-schedule
|
|
96
|
+
celerybeat.pid
|
|
97
|
+
|
|
98
|
+
# SageMath parsed files
|
|
99
|
+
*.sage.py
|
|
100
|
+
|
|
101
|
+
# Environments
|
|
102
|
+
.env
|
|
103
|
+
.venv
|
|
104
|
+
env/
|
|
105
|
+
venv/
|
|
106
|
+
ENV/
|
|
107
|
+
env.bak/
|
|
108
|
+
venv.bak/
|
|
109
|
+
|
|
110
|
+
# Spyder project settings
|
|
111
|
+
.spyderproject
|
|
112
|
+
.spyproject
|
|
113
|
+
|
|
114
|
+
# Rope project settings
|
|
115
|
+
.ropeproject
|
|
116
|
+
|
|
117
|
+
# mkdocs documentation
|
|
118
|
+
/site
|
|
119
|
+
|
|
120
|
+
# mypy
|
|
121
|
+
.mypy_cache/
|
|
122
|
+
.dmypy.json
|
|
123
|
+
dmypy.json
|
|
124
|
+
|
|
125
|
+
# Pyre type checker
|
|
126
|
+
.pyre/
|
|
127
|
+
|
|
128
|
+
# IDEs
|
|
129
|
+
.vscode/
|
|
130
|
+
.idea/
|
|
131
|
+
*.swp
|
|
132
|
+
*.swo
|
|
133
|
+
*~
|
|
134
|
+
|
|
135
|
+
# OS
|
|
136
|
+
.DS_Store
|
|
137
|
+
Thumbs.db
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
.omc
|
|
141
|
+
|
|
142
|
+
uv.toml
|
|
143
|
+
*/uv.toml
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datus-postgresql
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: PostgreSQL database adapter for Datus
|
|
5
5
|
Project-URL: Homepage, https://github.com/Datus-ai/datus-db-adapters
|
|
6
6
|
Project-URL: Repository, https://github.com/Datus-ai/datus-db-adapters
|
|
@@ -14,8 +14,8 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
16
|
Requires-Python: >=3.12
|
|
17
|
-
Requires-Dist: datus-db-core>=0.1.
|
|
18
|
-
Requires-Dist: datus-sqlalchemy>=0.1.
|
|
17
|
+
Requires-Dist: datus-db-core>=0.1.3
|
|
18
|
+
Requires-Dist: datus-sqlalchemy>=0.1.6
|
|
19
19
|
Requires-Dist: psycopg2-binary>=2.9.11
|
|
20
20
|
Requires-Dist: pydantic>=2.0.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
# Licensed under the Apache License, Version 2.0.
|
|
3
3
|
# See http://www.apache.org/licenses/LICENSE-2.0 for details.
|
|
4
4
|
|
|
5
|
+
from collections import OrderedDict
|
|
5
6
|
from typing import Any, Dict, List, Optional, Set, Union, override
|
|
6
7
|
from urllib.parse import quote_plus
|
|
7
8
|
|
|
8
|
-
from pandas import DataFrame
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
from sqlalchemy import create_engine, text
|
|
11
11
|
|
|
@@ -13,6 +13,7 @@ from datus_db_core import (
|
|
|
13
13
|
TABLE_TYPE,
|
|
14
14
|
DatusDbException,
|
|
15
15
|
ErrorCode,
|
|
16
|
+
MigrationTargetMixin,
|
|
16
17
|
get_logger,
|
|
17
18
|
list_to_in_str,
|
|
18
19
|
)
|
|
@@ -52,7 +53,7 @@ def _get_metadata_config(table_type: TABLE_TYPE) -> TableMetadataNames:
|
|
|
52
53
|
return METADATA_DICT[table_type]
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
class PostgreSQLConnector(SQLAlchemyConnector):
|
|
56
|
+
class PostgreSQLConnector(SQLAlchemyConnector, MigrationTargetMixin):
|
|
56
57
|
"""PostgreSQL database connector."""
|
|
57
58
|
|
|
58
59
|
def __init__(self, config: Union[PostgreSQLConfig, dict]):
|
|
@@ -68,7 +69,6 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
68
69
|
elif not isinstance(config, PostgreSQLConfig):
|
|
69
70
|
raise TypeError(f"config must be PostgreSQLConfig or dict, got {type(config)}")
|
|
70
71
|
|
|
71
|
-
self.config = config
|
|
72
72
|
self.host = config.host
|
|
73
73
|
self.port = config.port
|
|
74
74
|
self.username = config.username
|
|
@@ -90,8 +90,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
90
90
|
dialect="postgresql",
|
|
91
91
|
timeout_seconds=config.timeout_seconds,
|
|
92
92
|
)
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
# Set after super().__init__() so BaseSqlConnector doesn't overwrite
|
|
94
|
+
# with a plain ConnectionConfig (which lacks sslmode, etc.)
|
|
95
|
+
self.config = config
|
|
96
|
+
self._default_database = database
|
|
97
|
+
self._default_schema = config.schema_name or "public"
|
|
98
|
+
self._engines: OrderedDict = OrderedDict() # LRU cache: database_name -> engine
|
|
99
|
+
self._max_engines = 8
|
|
95
100
|
|
|
96
101
|
# ==================== System Resources ====================
|
|
97
102
|
|
|
@@ -113,11 +118,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
113
118
|
|
|
114
119
|
# ==================== Utility Methods ====================
|
|
115
120
|
|
|
116
|
-
|
|
117
|
-
def _quote_identifier(identifier: str) -> str:
|
|
118
|
-
"""Safely wrap identifiers with double quotes for PostgreSQL."""
|
|
119
|
-
escaped = identifier.replace('"', '""')
|
|
120
|
-
return f'"{escaped}"'
|
|
121
|
+
# quote_identifier: uses BaseSqlConnector default (ANSI double quotes)
|
|
121
122
|
|
|
122
123
|
def _build_connection_string(self, database_name: str) -> str:
|
|
123
124
|
"""Build a PostgreSQL connection string for a given database."""
|
|
@@ -128,24 +129,6 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
128
129
|
f"@{self.host}:{self.port}/{database_name}?sslmode={self.config.sslmode}"
|
|
129
130
|
)
|
|
130
131
|
|
|
131
|
-
def _execute_on_database(self, sql: str, database_name: str) -> DataFrame:
|
|
132
|
-
"""Execute a query on a specific database using a temporary connection.
|
|
133
|
-
|
|
134
|
-
Thread-safe: creates an isolated connection without mutating self.
|
|
135
|
-
"""
|
|
136
|
-
if database_name == self.database_name:
|
|
137
|
-
return self._execute_pandas(sql)
|
|
138
|
-
|
|
139
|
-
conn_str = self._build_connection_string(database_name)
|
|
140
|
-
engine = create_engine(conn_str)
|
|
141
|
-
try:
|
|
142
|
-
with engine.connect() as conn:
|
|
143
|
-
result = conn.execute(text(sql))
|
|
144
|
-
rows = [row._asdict() for row in result.fetchall()]
|
|
145
|
-
return DataFrame(rows)
|
|
146
|
-
finally:
|
|
147
|
-
engine.dispose()
|
|
148
|
-
|
|
149
132
|
# ==================== Metadata Retrieval ====================
|
|
150
133
|
|
|
151
134
|
def _get_metadata(
|
|
@@ -177,8 +160,9 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
177
160
|
if table_type == "mv":
|
|
178
161
|
# pg_matviews is scoped to the current database connection.
|
|
179
162
|
# Use a temporary connection if a different database is requested (thread-safe).
|
|
163
|
+
safe_schema = schema_name.replace("'", "''") if schema_name else ""
|
|
180
164
|
if schema_name:
|
|
181
|
-
where = f"schemaname = '{
|
|
165
|
+
where = f"schemaname = '{safe_schema}'"
|
|
182
166
|
else:
|
|
183
167
|
where = f"{list_to_in_str('schemaname not in', list(self._sys_schemas()))}"
|
|
184
168
|
|
|
@@ -187,11 +171,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
187
171
|
FROM pg_matviews
|
|
188
172
|
WHERE {where}
|
|
189
173
|
"""
|
|
190
|
-
query_result = self.
|
|
174
|
+
query_result = self._execute_pandas(query, database_name=database_name)
|
|
191
175
|
else:
|
|
192
176
|
# Tables and views use information_schema (supports table_catalog filter)
|
|
177
|
+
safe_schema = schema_name.replace("'", "''") if schema_name else ""
|
|
178
|
+
safe_db = database_name.replace("'", "''") if database_name else ""
|
|
193
179
|
if schema_name:
|
|
194
|
-
where = f"table_schema = '{
|
|
180
|
+
where = f"table_schema = '{safe_schema}'"
|
|
195
181
|
else:
|
|
196
182
|
where = f"{list_to_in_str('table_schema not in', list(self._sys_schemas()))}"
|
|
197
183
|
|
|
@@ -203,9 +189,9 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
203
189
|
query = f"""
|
|
204
190
|
SELECT table_schema, table_name
|
|
205
191
|
FROM information_schema.{metadata_config.info_table}
|
|
206
|
-
WHERE table_catalog = '{
|
|
192
|
+
WHERE table_catalog = '{safe_db}' AND {where} {type_filter}
|
|
207
193
|
"""
|
|
208
|
-
query_result = self._execute_pandas(query)
|
|
194
|
+
query_result = self._execute_pandas(query, database_name=database_name)
|
|
209
195
|
|
|
210
196
|
# Format results
|
|
211
197
|
result = []
|
|
@@ -238,10 +224,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
238
224
|
"""
|
|
239
225
|
full_name = self.full_name(schema_name=schema_name, table_name=table_name)
|
|
240
226
|
|
|
227
|
+
safe_schema = schema_name.replace("'", "''") if schema_name else ""
|
|
228
|
+
safe_table = table_name.replace("'", "''") if table_name else ""
|
|
229
|
+
|
|
241
230
|
if object_type == "VIEW":
|
|
242
231
|
# Get view definition
|
|
243
232
|
sql = f"""
|
|
244
|
-
SELECT pg_get_viewdef('{
|
|
233
|
+
SELECT pg_get_viewdef('{safe_schema}.{safe_table}'::regclass, true) as definition
|
|
245
234
|
"""
|
|
246
235
|
result = self._execute_pandas(sql)
|
|
247
236
|
if not result.empty and result["definition"][0]:
|
|
@@ -253,7 +242,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
253
242
|
sql = f"""
|
|
254
243
|
SELECT definition
|
|
255
244
|
FROM pg_matviews
|
|
256
|
-
WHERE schemaname = '{
|
|
245
|
+
WHERE schemaname = '{safe_schema}' AND matviewname = '{safe_table}'
|
|
257
246
|
"""
|
|
258
247
|
result = self._execute_pandas(sql)
|
|
259
248
|
if not result.empty and result["definition"][0]:
|
|
@@ -269,7 +258,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
269
258
|
col_defs = []
|
|
270
259
|
pk_cols = []
|
|
271
260
|
for col in columns:
|
|
272
|
-
col_def = f" {self.
|
|
261
|
+
col_def = f" {self.quote_identifier(col['name'])} {col['type']}"
|
|
273
262
|
if not col.get("nullable", True):
|
|
274
263
|
col_def += " NOT NULL"
|
|
275
264
|
if col.get("default_value"):
|
|
@@ -281,7 +270,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
281
270
|
ddl = f"CREATE TABLE {full_name} (\n"
|
|
282
271
|
ddl += ",\n".join(col_defs)
|
|
283
272
|
if pk_cols:
|
|
284
|
-
pk_names = ", ".join(self.
|
|
273
|
+
pk_names = ", ".join(self.quote_identifier(c) for c in pk_cols)
|
|
285
274
|
ddl += f",\n PRIMARY KEY ({pk_names})"
|
|
286
275
|
ddl += "\n);"
|
|
287
276
|
return ddl
|
|
@@ -397,6 +386,10 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
397
386
|
database_name = database_name or self.database_name
|
|
398
387
|
schema_name = schema_name or self.schema_name
|
|
399
388
|
|
|
389
|
+
safe_db = database_name.replace("'", "''") if database_name else ""
|
|
390
|
+
safe_schema = schema_name.replace("'", "''") if schema_name else ""
|
|
391
|
+
safe_table = table_name.replace("'", "''") if table_name else ""
|
|
392
|
+
|
|
400
393
|
# Use INFORMATION_SCHEMA to get schema with comments
|
|
401
394
|
sql = f"""
|
|
402
395
|
SELECT
|
|
@@ -414,16 +407,16 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
414
407
|
ON tc.constraint_name = kcu.constraint_name
|
|
415
408
|
AND tc.table_schema = kcu.table_schema
|
|
416
409
|
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
417
|
-
AND tc.table_schema = '{
|
|
418
|
-
AND tc.table_name = '{
|
|
410
|
+
AND tc.table_schema = '{safe_schema}'
|
|
411
|
+
AND tc.table_name = '{safe_table}'
|
|
419
412
|
) pk ON c.column_name = pk.column_name
|
|
420
413
|
LEFT JOIN pg_catalog.pg_statio_all_tables st
|
|
421
414
|
ON st.schemaname = c.table_schema AND st.relname = c.table_name
|
|
422
415
|
LEFT JOIN pg_catalog.pg_description pgd
|
|
423
416
|
ON pgd.objoid = st.relid AND pgd.objsubid = c.ordinal_position
|
|
424
|
-
WHERE c.table_catalog = '{
|
|
425
|
-
AND c.table_schema = '{
|
|
426
|
-
AND c.table_name = '{
|
|
417
|
+
WHERE c.table_catalog = '{safe_db}'
|
|
418
|
+
AND c.table_schema = '{safe_schema}'
|
|
419
|
+
AND c.table_name = '{safe_table}'
|
|
427
420
|
ORDER BY c.ordinal_position
|
|
428
421
|
"""
|
|
429
422
|
query_result = self._execute_pandas(sql)
|
|
@@ -462,7 +455,8 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
462
455
|
def get_schemas(self, catalog_name: str = "", database_name: str = "", include_sys: bool = False) -> List[str]:
|
|
463
456
|
"""Get list of schemas in the current database."""
|
|
464
457
|
database_name = database_name or self.database_name
|
|
465
|
-
|
|
458
|
+
safe_db = database_name.replace("'", "''") if database_name else ""
|
|
459
|
+
sql = f"SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '{safe_db}'"
|
|
466
460
|
result = self._execute_pandas(sql)
|
|
467
461
|
schemas = result["schema_name"].tolist()
|
|
468
462
|
|
|
@@ -479,21 +473,88 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
479
473
|
"""Get schema name for SQLAlchemy Inspector."""
|
|
480
474
|
return schema_name or self.schema_name
|
|
481
475
|
|
|
476
|
+
def _get_engine(self, database_name: str = ""):
|
|
477
|
+
"""Get or create engine for the given database. Thread-safe.
|
|
478
|
+
|
|
479
|
+
PostgreSQL requires different connection strings per database,
|
|
480
|
+
so each database gets its own engine with connection pool.
|
|
481
|
+
Uses LRU eviction (max 8 engines) to avoid holding too many connections.
|
|
482
|
+
"""
|
|
483
|
+
db = database_name or self.database_name
|
|
484
|
+
with self._engine_lock:
|
|
485
|
+
if db in self._engines:
|
|
486
|
+
self._engines.move_to_end(db)
|
|
487
|
+
return self._engines[db]
|
|
488
|
+
conn_str = self._build_connection_string(db)
|
|
489
|
+
engine = create_engine(
|
|
490
|
+
conn_str,
|
|
491
|
+
pool_size=5,
|
|
492
|
+
max_overflow=10,
|
|
493
|
+
pool_timeout=self.timeout_seconds,
|
|
494
|
+
pool_recycle=3600,
|
|
495
|
+
pool_pre_ping=True,
|
|
496
|
+
)
|
|
497
|
+
self._engines[db] = engine
|
|
498
|
+
while len(self._engines) > self._max_engines:
|
|
499
|
+
_, evicted = self._engines.popitem(last=False)
|
|
500
|
+
try:
|
|
501
|
+
evicted.dispose()
|
|
502
|
+
except Exception as e:
|
|
503
|
+
logger.warning(f"Error disposing evicted engine: {e}")
|
|
504
|
+
return engine
|
|
505
|
+
|
|
506
|
+
@override
|
|
507
|
+
def _conn(self, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
|
|
508
|
+
"""Checkout connection from the correct per-database engine. Thread-safe.
|
|
509
|
+
|
|
510
|
+
Overrides base _conn() to avoid writing to shared self.engine.
|
|
511
|
+
Each thread gets a connection from the engine matching its database_name.
|
|
512
|
+
"""
|
|
513
|
+
from contextlib import contextmanager
|
|
514
|
+
|
|
515
|
+
@contextmanager
|
|
516
|
+
def _pg_conn():
|
|
517
|
+
effective_database = database_name or self.database_name
|
|
518
|
+
effective_schema = schema_name or self.schema_name
|
|
519
|
+
effective_catalog = catalog_name or self.catalog_name
|
|
520
|
+
engine = self._get_engine(effective_database)
|
|
521
|
+
conn = engine.connect()
|
|
522
|
+
try:
|
|
523
|
+
self.do_switch_context(conn, effective_catalog, effective_database, effective_schema)
|
|
524
|
+
yield conn
|
|
525
|
+
except Exception:
|
|
526
|
+
try:
|
|
527
|
+
conn.rollback()
|
|
528
|
+
except Exception:
|
|
529
|
+
pass
|
|
530
|
+
raise
|
|
531
|
+
finally:
|
|
532
|
+
conn.close()
|
|
533
|
+
|
|
534
|
+
return _pg_conn()
|
|
535
|
+
|
|
536
|
+
@override
|
|
537
|
+
def close(self):
|
|
538
|
+
"""Dispose all engines (per-database pool + parent engine)."""
|
|
539
|
+
for engine in self._engines.values():
|
|
540
|
+
try:
|
|
541
|
+
engine.dispose()
|
|
542
|
+
except Exception as e:
|
|
543
|
+
logger.warning(f"Error disposing engine: {e}")
|
|
544
|
+
self._engines.clear()
|
|
545
|
+
# Dispose parent engine that may have been created via connect()/_ensure_engine()
|
|
546
|
+
super().close()
|
|
547
|
+
|
|
482
548
|
@override
|
|
483
|
-
def do_switch_context(self, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
|
|
484
|
-
"""
|
|
549
|
+
def do_switch_context(self, conn, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
|
|
550
|
+
"""Apply schema context to a connection.
|
|
485
551
|
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
use explicit schema qualification via full_name().
|
|
552
|
+
Database switching is handled by _conn() which picks the right engine
|
|
553
|
+
based on the effective database_name.
|
|
489
554
|
"""
|
|
490
|
-
if database_name and database_name != self.database_name:
|
|
491
|
-
self.connection_string = self._build_connection_string(database_name)
|
|
492
|
-
self.close()
|
|
493
|
-
self.connect()
|
|
494
|
-
self.database_name = database_name
|
|
495
555
|
if schema_name:
|
|
496
|
-
|
|
556
|
+
conn.execute(text(f"SET search_path TO {self.quote_identifier(schema_name)}"))
|
|
557
|
+
conn.commit()
|
|
497
558
|
|
|
498
559
|
# ==================== Sample Data ====================
|
|
499
560
|
|
|
@@ -591,10 +652,10 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
591
652
|
database_name = database_name or self.database_name
|
|
592
653
|
schema_name = schema_name or self.schema_name
|
|
593
654
|
if database_name and schema_name:
|
|
594
|
-
return f"{self.
|
|
655
|
+
return f"{self.quote_identifier(database_name)}.{self.quote_identifier(schema_name)}.{self.quote_identifier(table_name)}"
|
|
595
656
|
if schema_name:
|
|
596
|
-
return f"{self.
|
|
597
|
-
return self.
|
|
657
|
+
return f"{self.quote_identifier(schema_name)}.{self.quote_identifier(table_name)}"
|
|
658
|
+
return self.quote_identifier(table_name)
|
|
598
659
|
|
|
599
660
|
@override
|
|
600
661
|
def _reset_filter_tables(
|
|
@@ -607,3 +668,66 @@ class PostgreSQLConnector(SQLAlchemyConnector):
|
|
|
607
668
|
"""Reset filter tables with full names."""
|
|
608
669
|
schema_name = schema_name or self.schema_name
|
|
609
670
|
return super()._reset_filter_tables(tables, "", database_name, schema_name)
|
|
671
|
+
|
|
672
|
+
# ==================== MigrationTargetMixin ====================
|
|
673
|
+
|
|
674
|
+
def describe_migration_capabilities(self) -> Dict[str, Any]:
|
|
675
|
+
return {
|
|
676
|
+
"supported": True,
|
|
677
|
+
"dialect_family": "postgres-like",
|
|
678
|
+
"requires": [], # OLTP — no distribution/partition required
|
|
679
|
+
"forbids": [
|
|
680
|
+
"DUPLICATE KEY (StarRocks-only)",
|
|
681
|
+
"DISTRIBUTED BY HASH ... BUCKETS (StarRocks-only)",
|
|
682
|
+
"ENGINE = (MySQL/ClickHouse syntax)",
|
|
683
|
+
],
|
|
684
|
+
"type_hints": {
|
|
685
|
+
"HUGEINT": "NUMERIC(38,0) (Postgres has no HUGEINT/LARGEINT)",
|
|
686
|
+
"LARGEINT": "NUMERIC(38,0)",
|
|
687
|
+
"unbounded VARCHAR": "TEXT (prefer TEXT over unbounded VARCHAR)",
|
|
688
|
+
"TIMESTAMP WITH TIME ZONE": "TIMESTAMPTZ",
|
|
689
|
+
"JSON": "JSONB (prefer for indexing)",
|
|
690
|
+
"BOOLEAN": "BOOLEAN (no TINYINT cast needed)",
|
|
691
|
+
},
|
|
692
|
+
"example_ddl": (
|
|
693
|
+
"CREATE TABLE public.t (\n"
|
|
694
|
+
" id BIGSERIAL PRIMARY KEY,\n"
|
|
695
|
+
" name VARCHAR(255),\n"
|
|
696
|
+
" created_at TIMESTAMPTZ DEFAULT now()\n"
|
|
697
|
+
")"
|
|
698
|
+
),
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
def suggest_table_layout(self, columns: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
702
|
+
# Postgres is OLTP — no distribution keys or bucketing required
|
|
703
|
+
return {}
|
|
704
|
+
|
|
705
|
+
def validate_ddl(self, ddl: str) -> List[str]:
|
|
706
|
+
errors: List[str] = []
|
|
707
|
+
upper = ddl.upper()
|
|
708
|
+
|
|
709
|
+
if "DUPLICATE KEY" in upper:
|
|
710
|
+
errors.append("DUPLICATE KEY is StarRocks-only syntax; Postgres does not support it")
|
|
711
|
+
if "BUCKETS" in upper and "DISTRIBUTED BY" in upper:
|
|
712
|
+
errors.append("DISTRIBUTED BY ... BUCKETS is StarRocks syntax; Postgres does not support it")
|
|
713
|
+
if "ENGINE =" in upper or "ENGINE=" in upper:
|
|
714
|
+
errors.append("ENGINE clause is MySQL/ClickHouse syntax; not supported in Postgres")
|
|
715
|
+
if "ORDER BY" in upper and "CREATE TABLE" in upper:
|
|
716
|
+
# Rough heuristic: top-level ORDER BY inside CREATE TABLE is ClickHouse's
|
|
717
|
+
# MergeTree syntax. Postgres allows ORDER BY inside CTAS SELECT, so this
|
|
718
|
+
# check is intentionally loose (only flags when accompanied by ENGINE).
|
|
719
|
+
if "ENGINE" in upper:
|
|
720
|
+
errors.append("ORDER BY inside CREATE TABLE is ClickHouse syntax; use CREATE INDEX in Postgres")
|
|
721
|
+
|
|
722
|
+
return errors
|
|
723
|
+
|
|
724
|
+
def map_source_type(self, source_dialect: str, source_type: str) -> Optional[str]:
|
|
725
|
+
import re as _re
|
|
726
|
+
|
|
727
|
+
base = _re.sub(r"\(.*\)", "", source_type.strip().upper()).strip()
|
|
728
|
+
overrides = {
|
|
729
|
+
"HUGEINT": "NUMERIC(38,0)",
|
|
730
|
+
"LARGEINT": "NUMERIC(38,0)",
|
|
731
|
+
"DATETIME": "TIMESTAMP",
|
|
732
|
+
}
|
|
733
|
+
return overrides.get(base)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datus-postgresql"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.5"
|
|
4
4
|
description = "PostgreSQL database adapter for Datus"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -18,8 +18,8 @@ classifiers = [
|
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
dependencies = [
|
|
21
|
-
"datus-db-core>=0.1.
|
|
22
|
-
"datus-sqlalchemy>=0.1.
|
|
21
|
+
"datus-db-core>=0.1.3",
|
|
22
|
+
"datus-sqlalchemy>=0.1.6",
|
|
23
23
|
"psycopg2-binary>=2.9.11",
|
|
24
24
|
"pydantic>=2.0.0",
|
|
25
25
|
]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Licensed under the Apache License, Version 2.0.
|
|
3
3
|
# See http://www.apache.org/licenses/LICENSE-2.0 for details.
|
|
4
4
|
|
|
5
|
-
from unittest.mock import patch
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
8
|
|
|
@@ -193,28 +193,28 @@ def test_sys_schemas():
|
|
|
193
193
|
@pytest.mark.acceptance
|
|
194
194
|
def test_quote_identifier_basic():
|
|
195
195
|
"""Test _quote_identifier with basic identifier."""
|
|
196
|
-
assert PostgreSQLConnector.
|
|
196
|
+
assert PostgreSQLConnector.quote_identifier(MagicMock(), "table_name") == '"table_name"'
|
|
197
197
|
|
|
198
198
|
|
|
199
199
|
@pytest.mark.acceptance
|
|
200
200
|
def test_quote_identifier_with_double_quotes():
|
|
201
201
|
"""Test _quote_identifier escapes double quotes."""
|
|
202
|
-
assert PostgreSQLConnector.
|
|
202
|
+
assert PostgreSQLConnector.quote_identifier(MagicMock(), 'table"name') == '"table""name"'
|
|
203
203
|
|
|
204
204
|
|
|
205
205
|
def test_quote_identifier_with_multiple_double_quotes():
|
|
206
206
|
"""Test _quote_identifier escapes multiple double quotes."""
|
|
207
|
-
assert PostgreSQLConnector.
|
|
207
|
+
assert PostgreSQLConnector.quote_identifier(MagicMock(), 'ta"ble"name') == '"ta""ble""name"'
|
|
208
208
|
|
|
209
209
|
|
|
210
210
|
def test_quote_identifier_empty_string():
|
|
211
211
|
"""Test _quote_identifier with empty string."""
|
|
212
|
-
assert PostgreSQLConnector.
|
|
212
|
+
assert PostgreSQLConnector.quote_identifier(MagicMock(), "") == '""'
|
|
213
213
|
|
|
214
214
|
|
|
215
215
|
def test_quote_identifier_special_characters():
|
|
216
216
|
"""Test _quote_identifier with special characters."""
|
|
217
|
-
assert PostgreSQLConnector.
|
|
217
|
+
assert PostgreSQLConnector.quote_identifier(MagicMock(), "table-name_123") == '"table-name_123"'
|
|
218
218
|
|
|
219
219
|
|
|
220
220
|
@pytest.mark.acceptance
|
|
@@ -396,3 +396,117 @@ def test_connector_schema_name_default():
|
|
|
396
396
|
connector = PostgreSQLConnector(config)
|
|
397
397
|
|
|
398
398
|
assert connector.schema_name == "public"
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# ==================== _get_engine LRU Cache Tests ====================
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _make_connector():
|
|
405
|
+
"""Helper: create a PostgreSQLConnector with mocked parent __init__."""
|
|
406
|
+
import threading
|
|
407
|
+
|
|
408
|
+
config = PostgreSQLConfig(username="user", password="pass", database="default_db")
|
|
409
|
+
with patch("datus_sqlalchemy.SQLAlchemyConnector.__init__", return_value=None):
|
|
410
|
+
connector = PostgreSQLConnector(config)
|
|
411
|
+
# Parent __init__ is mocked, so set attributes that _get_engine needs
|
|
412
|
+
connector._engine_lock = threading.Lock()
|
|
413
|
+
connector.engine = None
|
|
414
|
+
connector._owns_engine = False
|
|
415
|
+
connector.timeout_seconds = 30
|
|
416
|
+
return connector
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def test_get_engine_returns_same_engine_for_same_db():
|
|
420
|
+
"""Requesting the same database twice returns the cached engine."""
|
|
421
|
+
connector = _make_connector()
|
|
422
|
+
with patch("datus_postgresql.connector.create_engine", return_value=MagicMock()) as mock_ce:
|
|
423
|
+
e1 = connector._get_engine("db1")
|
|
424
|
+
e2 = connector._get_engine("db1")
|
|
425
|
+
|
|
426
|
+
assert e1 is e2
|
|
427
|
+
mock_ce.assert_called_once()
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def test_get_engine_creates_different_engines_per_db():
|
|
431
|
+
"""Different databases get different engines."""
|
|
432
|
+
connector = _make_connector()
|
|
433
|
+
engines = [MagicMock(), MagicMock()]
|
|
434
|
+
with patch("datus_postgresql.connector.create_engine", side_effect=engines):
|
|
435
|
+
e1 = connector._get_engine("db1")
|
|
436
|
+
e2 = connector._get_engine("db2")
|
|
437
|
+
|
|
438
|
+
assert e1 is not e2
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def test_get_engine_evicts_lru_when_over_max():
|
|
442
|
+
"""When cache exceeds max_engines, the least-recently-used engine is disposed."""
|
|
443
|
+
connector = _make_connector()
|
|
444
|
+
connector._max_engines = 3
|
|
445
|
+
|
|
446
|
+
created_engines = []
|
|
447
|
+
|
|
448
|
+
def make_engine(*args, **kwargs):
|
|
449
|
+
e = MagicMock()
|
|
450
|
+
created_engines.append(e)
|
|
451
|
+
return e
|
|
452
|
+
|
|
453
|
+
with patch("datus_postgresql.connector.create_engine", side_effect=make_engine):
|
|
454
|
+
connector._get_engine("db1")
|
|
455
|
+
connector._get_engine("db2")
|
|
456
|
+
connector._get_engine("db3")
|
|
457
|
+
# All 3 fit within max_engines=3
|
|
458
|
+
assert len(connector._engines) == 3
|
|
459
|
+
created_engines[0].dispose.assert_not_called()
|
|
460
|
+
|
|
461
|
+
# Adding a 4th should evict db1 (LRU)
|
|
462
|
+
connector._get_engine("db4")
|
|
463
|
+
assert len(connector._engines) == 3
|
|
464
|
+
assert "db1" not in connector._engines
|
|
465
|
+
created_engines[0].dispose.assert_called_once()
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def test_get_engine_lru_access_refreshes_order():
|
|
469
|
+
"""Accessing an existing engine moves it to most-recently-used, protecting it from eviction."""
|
|
470
|
+
connector = _make_connector()
|
|
471
|
+
connector._max_engines = 3
|
|
472
|
+
|
|
473
|
+
created_engines = {}
|
|
474
|
+
|
|
475
|
+
def make_engine(*args, **kwargs):
|
|
476
|
+
e = MagicMock()
|
|
477
|
+
created_engines[len(created_engines)] = e
|
|
478
|
+
return e
|
|
479
|
+
|
|
480
|
+
with patch("datus_postgresql.connector.create_engine", side_effect=make_engine):
|
|
481
|
+
connector._get_engine("db1") # engines[0]
|
|
482
|
+
connector._get_engine("db2") # engines[1]
|
|
483
|
+
connector._get_engine("db3") # engines[2]
|
|
484
|
+
|
|
485
|
+
# Access db1 again — moves it to MRU
|
|
486
|
+
connector._get_engine("db1")
|
|
487
|
+
|
|
488
|
+
# Add db4 — should evict db2 (now LRU), NOT db1
|
|
489
|
+
connector._get_engine("db4")
|
|
490
|
+
|
|
491
|
+
assert "db1" in connector._engines
|
|
492
|
+
assert "db2" not in connector._engines
|
|
493
|
+
assert "db3" in connector._engines
|
|
494
|
+
assert "db4" in connector._engines
|
|
495
|
+
created_engines[1].dispose.assert_called_once() # db2 evicted
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def test_close_disposes_all_cached_engines():
|
|
499
|
+
"""close() disposes all cached engines and clears the cache."""
|
|
500
|
+
connector = _make_connector()
|
|
501
|
+
|
|
502
|
+
mock_engines = [MagicMock(), MagicMock()]
|
|
503
|
+
with patch("datus_postgresql.connector.create_engine", side_effect=mock_engines):
|
|
504
|
+
connector._get_engine("db1")
|
|
505
|
+
connector._get_engine("db2")
|
|
506
|
+
|
|
507
|
+
with patch("datus_sqlalchemy.SQLAlchemyConnector.close"):
|
|
508
|
+
connector.close()
|
|
509
|
+
|
|
510
|
+
for e in mock_engines:
|
|
511
|
+
e.dispose.assert_called_once()
|
|
512
|
+
assert len(connector._engines) == 0
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Copyright 2025-present DatusAI, Inc.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0.
|
|
3
|
+
# See http://www.apache.org/licenses/LICENSE-2.0 for details.
|
|
4
|
+
|
|
5
|
+
"""Tests for PostgreSQL MigrationTargetMixin implementation."""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from datus_db_core import MigrationTargetMixin
|
|
10
|
+
from datus_postgresql import PostgreSQLConnector
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def connector():
|
|
15
|
+
return PostgreSQLConnector.__new__(PostgreSQLConnector)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestMixinInheritance:
|
|
19
|
+
def test_postgresql_is_migration_target(self, connector):
|
|
20
|
+
assert isinstance(connector, MigrationTargetMixin)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestDescribeMigrationCapabilities:
|
|
24
|
+
def test_supported_true(self, connector):
|
|
25
|
+
result = connector.describe_migration_capabilities()
|
|
26
|
+
assert result["supported"] is True
|
|
27
|
+
|
|
28
|
+
def test_dialect_family_postgres_like(self, connector):
|
|
29
|
+
result = connector.describe_migration_capabilities()
|
|
30
|
+
assert result["dialect_family"] == "postgres-like"
|
|
31
|
+
|
|
32
|
+
def test_no_hard_requirements(self, connector):
|
|
33
|
+
"""Postgres is OLTP — no distribution/partition required."""
|
|
34
|
+
result = connector.describe_migration_capabilities()
|
|
35
|
+
assert result["requires"] == []
|
|
36
|
+
|
|
37
|
+
def test_type_hints_mention_text_over_varchar(self, connector):
|
|
38
|
+
result = connector.describe_migration_capabilities()
|
|
39
|
+
hints_str = " ".join(result["type_hints"].values()).upper()
|
|
40
|
+
assert "TEXT" in hints_str
|
|
41
|
+
|
|
42
|
+
def test_example_ddl_is_simple(self, connector):
|
|
43
|
+
result = connector.describe_migration_capabilities()
|
|
44
|
+
ddl = result["example_ddl"].upper()
|
|
45
|
+
assert "CREATE TABLE" in ddl
|
|
46
|
+
# Should NOT contain DUPLICATE KEY or BUCKETS
|
|
47
|
+
assert "DUPLICATE KEY" not in ddl
|
|
48
|
+
assert "BUCKETS" not in ddl
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TestValidateDdl:
|
|
52
|
+
def test_accepts_standard_postgres_ddl(self, connector):
|
|
53
|
+
ddl = "CREATE TABLE public.t (id BIGSERIAL PRIMARY KEY, name VARCHAR(255))"
|
|
54
|
+
assert connector.validate_ddl(ddl) == []
|
|
55
|
+
|
|
56
|
+
def test_rejects_duplicate_key_starrocks_syntax(self, connector):
|
|
57
|
+
ddl = """CREATE TABLE public.t (id BIGINT)
|
|
58
|
+
DUPLICATE KEY(id)
|
|
59
|
+
DISTRIBUTED BY HASH(id) BUCKETS 10"""
|
|
60
|
+
errors = connector.validate_ddl(ddl)
|
|
61
|
+
assert any("DUPLICATE KEY" in e.upper() or "STARROCKS" in e.upper() for e in errors)
|
|
62
|
+
|
|
63
|
+
def test_rejects_distributed_by_hash_buckets(self, connector):
|
|
64
|
+
ddl = "CREATE TABLE public.t (id BIGINT) DISTRIBUTED BY HASH(id) BUCKETS 10"
|
|
65
|
+
errors = connector.validate_ddl(ddl)
|
|
66
|
+
assert any("BUCKETS" in e.upper() or "STARROCKS" in e.upper() for e in errors)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class TestSuggestTableLayout:
|
|
70
|
+
def test_returns_empty_dict(self, connector):
|
|
71
|
+
"""Postgres doesn't need distribution keys — OLTP."""
|
|
72
|
+
columns = [
|
|
73
|
+
{"name": "id", "type": "BIGINT", "nullable": False},
|
|
74
|
+
{"name": "name", "type": "VARCHAR", "nullable": True},
|
|
75
|
+
]
|
|
76
|
+
layout = connector.suggest_table_layout(columns)
|
|
77
|
+
assert layout == {}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class TestMapSourceType:
|
|
81
|
+
def test_hugeint_to_numeric(self, connector):
|
|
82
|
+
assert connector.map_source_type("duckdb", "HUGEINT") == "NUMERIC(38,0)"
|
|
83
|
+
|
|
84
|
+
def test_largeint_to_numeric(self, connector):
|
|
85
|
+
"""StarRocks LARGEINT has no direct Postgres equivalent."""
|
|
86
|
+
assert connector.map_source_type("starrocks", "LARGEINT") == "NUMERIC(38,0)"
|
|
87
|
+
|
|
88
|
+
def test_unknown_returns_none(self, connector):
|
|
89
|
+
assert connector.map_source_type("duckdb", "VARCHAR") is None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|