datus-postgresql 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. datus_postgresql-0.1.5/.gitignore +143 -0
  2. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/PKG-INFO +3 -3
  3. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/connector.py +181 -57
  4. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/pyproject.toml +3 -3
  5. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/test_connector_unit.py +120 -6
  6. datus_postgresql-0.1.5/tests/unit/test_migration_mixin.py +89 -0
  7. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/README.md +0 -0
  8. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/__init__.py +0 -0
  9. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/config.py +0 -0
  10. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/datus_postgresql/handlers.py +0 -0
  11. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/docker-compose.yml +0 -0
  12. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/scripts/init_tpch_data.py +0 -0
  13. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/__init__.py +0 -0
  14. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/conftest.py +0 -0
  15. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/__init__.py +0 -0
  16. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/conftest.py +0 -0
  17. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/test_integration.py +0 -0
  18. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/integration/test_tpch.py +0 -0
  19. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/__init__.py +0 -0
  20. {datus_postgresql-0.1.4 → datus_postgresql-0.1.5}/tests/unit/test_config.py +0 -0
@@ -0,0 +1,143 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+ db.sqlite3
60
+ db.sqlite3-journal
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # pipenv
86
+ Pipfile.lock
87
+
88
+ # uv
89
+ uv.lock
90
+
91
+ # PEP 582
92
+ __pypackages__/
93
+
94
+ # Celery stuff
95
+ celerybeat-schedule
96
+ celerybeat.pid
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # IDEs
129
+ .vscode/
130
+ .idea/
131
+ *.swp
132
+ *.swo
133
+ *~
134
+
135
+ # OS
136
+ .DS_Store
137
+ Thumbs.db
138
+
139
+
140
+ .omc
141
+
142
+ uv.toml
143
+ */uv.toml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datus-postgresql
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: PostgreSQL database adapter for Datus
5
5
  Project-URL: Homepage, https://github.com/Datus-ai/datus-db-adapters
6
6
  Project-URL: Repository, https://github.com/Datus-ai/datus-db-adapters
@@ -14,8 +14,8 @@ Classifier: License :: OSI Approved :: Apache Software License
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.12
17
- Requires-Dist: datus-db-core>=0.1.0
18
- Requires-Dist: datus-sqlalchemy>=0.1.2
17
+ Requires-Dist: datus-db-core>=0.1.3
18
+ Requires-Dist: datus-sqlalchemy>=0.1.6
19
19
  Requires-Dist: psycopg2-binary>=2.9.11
20
20
  Requires-Dist: pydantic>=2.0.0
21
21
  Description-Content-Type: text/markdown
@@ -2,10 +2,10 @@
2
2
  # Licensed under the Apache License, Version 2.0.
3
3
  # See http://www.apache.org/licenses/LICENSE-2.0 for details.
4
4
 
5
+ from collections import OrderedDict
5
6
  from typing import Any, Dict, List, Optional, Set, Union, override
6
7
  from urllib.parse import quote_plus
7
8
 
8
- from pandas import DataFrame
9
9
  from pydantic import BaseModel, Field
10
10
  from sqlalchemy import create_engine, text
11
11
 
@@ -13,6 +13,7 @@ from datus_db_core import (
13
13
  TABLE_TYPE,
14
14
  DatusDbException,
15
15
  ErrorCode,
16
+ MigrationTargetMixin,
16
17
  get_logger,
17
18
  list_to_in_str,
18
19
  )
@@ -52,7 +53,7 @@ def _get_metadata_config(table_type: TABLE_TYPE) -> TableMetadataNames:
52
53
  return METADATA_DICT[table_type]
53
54
 
54
55
 
55
- class PostgreSQLConnector(SQLAlchemyConnector):
56
+ class PostgreSQLConnector(SQLAlchemyConnector, MigrationTargetMixin):
56
57
  """PostgreSQL database connector."""
57
58
 
58
59
  def __init__(self, config: Union[PostgreSQLConfig, dict]):
@@ -68,7 +69,6 @@ class PostgreSQLConnector(SQLAlchemyConnector):
68
69
  elif not isinstance(config, PostgreSQLConfig):
69
70
  raise TypeError(f"config must be PostgreSQLConfig or dict, got {type(config)}")
70
71
 
71
- self.config = config
72
72
  self.host = config.host
73
73
  self.port = config.port
74
74
  self.username = config.username
@@ -90,8 +90,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
90
90
  dialect="postgresql",
91
91
  timeout_seconds=config.timeout_seconds,
92
92
  )
93
- self.database_name = database
94
- self.schema_name = config.schema_name or "public"
93
+ # Set after super().__init__() so BaseSqlConnector doesn't overwrite
94
+ # with a plain ConnectionConfig (which lacks sslmode, etc.)
95
+ self.config = config
96
+ self._default_database = database
97
+ self._default_schema = config.schema_name or "public"
98
+ self._engines: OrderedDict = OrderedDict() # LRU cache: database_name -> engine
99
+ self._max_engines = 8
95
100
 
96
101
  # ==================== System Resources ====================
97
102
 
@@ -113,11 +118,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
113
118
 
114
119
  # ==================== Utility Methods ====================
115
120
 
116
- @staticmethod
117
- def _quote_identifier(identifier: str) -> str:
118
- """Safely wrap identifiers with double quotes for PostgreSQL."""
119
- escaped = identifier.replace('"', '""')
120
- return f'"{escaped}"'
121
+ # quote_identifier: uses BaseSqlConnector default (ANSI double quotes)
121
122
 
122
123
  def _build_connection_string(self, database_name: str) -> str:
123
124
  """Build a PostgreSQL connection string for a given database."""
@@ -128,24 +129,6 @@ class PostgreSQLConnector(SQLAlchemyConnector):
128
129
  f"@{self.host}:{self.port}/{database_name}?sslmode={self.config.sslmode}"
129
130
  )
130
131
 
131
- def _execute_on_database(self, sql: str, database_name: str) -> DataFrame:
132
- """Execute a query on a specific database using a temporary connection.
133
-
134
- Thread-safe: creates an isolated connection without mutating self.
135
- """
136
- if database_name == self.database_name:
137
- return self._execute_pandas(sql)
138
-
139
- conn_str = self._build_connection_string(database_name)
140
- engine = create_engine(conn_str)
141
- try:
142
- with engine.connect() as conn:
143
- result = conn.execute(text(sql))
144
- rows = [row._asdict() for row in result.fetchall()]
145
- return DataFrame(rows)
146
- finally:
147
- engine.dispose()
148
-
149
132
  # ==================== Metadata Retrieval ====================
150
133
 
151
134
  def _get_metadata(
@@ -177,8 +160,9 @@ class PostgreSQLConnector(SQLAlchemyConnector):
177
160
  if table_type == "mv":
178
161
  # pg_matviews is scoped to the current database connection.
179
162
  # Use a temporary connection if a different database is requested (thread-safe).
163
+ safe_schema = schema_name.replace("'", "''") if schema_name else ""
180
164
  if schema_name:
181
- where = f"schemaname = '{schema_name}'"
165
+ where = f"schemaname = '{safe_schema}'"
182
166
  else:
183
167
  where = f"{list_to_in_str('schemaname not in', list(self._sys_schemas()))}"
184
168
 
@@ -187,11 +171,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
187
171
  FROM pg_matviews
188
172
  WHERE {where}
189
173
  """
190
- query_result = self._execute_on_database(query, database_name)
174
+ query_result = self._execute_pandas(query, database_name=database_name)
191
175
  else:
192
176
  # Tables and views use information_schema (supports table_catalog filter)
177
+ safe_schema = schema_name.replace("'", "''") if schema_name else ""
178
+ safe_db = database_name.replace("'", "''") if database_name else ""
193
179
  if schema_name:
194
- where = f"table_schema = '{schema_name}'"
180
+ where = f"table_schema = '{safe_schema}'"
195
181
  else:
196
182
  where = f"{list_to_in_str('table_schema not in', list(self._sys_schemas()))}"
197
183
 
@@ -203,9 +189,9 @@ class PostgreSQLConnector(SQLAlchemyConnector):
203
189
  query = f"""
204
190
  SELECT table_schema, table_name
205
191
  FROM information_schema.{metadata_config.info_table}
206
- WHERE table_catalog = '{database_name}' AND {where} {type_filter}
192
+ WHERE table_catalog = '{safe_db}' AND {where} {type_filter}
207
193
  """
208
- query_result = self._execute_pandas(query)
194
+ query_result = self._execute_pandas(query, database_name=database_name)
209
195
 
210
196
  # Format results
211
197
  result = []
@@ -238,10 +224,13 @@ class PostgreSQLConnector(SQLAlchemyConnector):
238
224
  """
239
225
  full_name = self.full_name(schema_name=schema_name, table_name=table_name)
240
226
 
227
+ safe_schema = schema_name.replace("'", "''") if schema_name else ""
228
+ safe_table = table_name.replace("'", "''") if table_name else ""
229
+
241
230
  if object_type == "VIEW":
242
231
  # Get view definition
243
232
  sql = f"""
244
- SELECT pg_get_viewdef('{schema_name}.{table_name}'::regclass, true) as definition
233
+ SELECT pg_get_viewdef('{safe_schema}.{safe_table}'::regclass, true) as definition
245
234
  """
246
235
  result = self._execute_pandas(sql)
247
236
  if not result.empty and result["definition"][0]:
@@ -253,7 +242,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
253
242
  sql = f"""
254
243
  SELECT definition
255
244
  FROM pg_matviews
256
- WHERE schemaname = '{schema_name}' AND matviewname = '{table_name}'
245
+ WHERE schemaname = '{safe_schema}' AND matviewname = '{safe_table}'
257
246
  """
258
247
  result = self._execute_pandas(sql)
259
248
  if not result.empty and result["definition"][0]:
@@ -269,7 +258,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
269
258
  col_defs = []
270
259
  pk_cols = []
271
260
  for col in columns:
272
- col_def = f" {self._quote_identifier(col['name'])} {col['type']}"
261
+ col_def = f" {self.quote_identifier(col['name'])} {col['type']}"
273
262
  if not col.get("nullable", True):
274
263
  col_def += " NOT NULL"
275
264
  if col.get("default_value"):
@@ -281,7 +270,7 @@ class PostgreSQLConnector(SQLAlchemyConnector):
281
270
  ddl = f"CREATE TABLE {full_name} (\n"
282
271
  ddl += ",\n".join(col_defs)
283
272
  if pk_cols:
284
- pk_names = ", ".join(self._quote_identifier(c) for c in pk_cols)
273
+ pk_names = ", ".join(self.quote_identifier(c) for c in pk_cols)
285
274
  ddl += f",\n PRIMARY KEY ({pk_names})"
286
275
  ddl += "\n);"
287
276
  return ddl
@@ -397,6 +386,10 @@ class PostgreSQLConnector(SQLAlchemyConnector):
397
386
  database_name = database_name or self.database_name
398
387
  schema_name = schema_name or self.schema_name
399
388
 
389
+ safe_db = database_name.replace("'", "''") if database_name else ""
390
+ safe_schema = schema_name.replace("'", "''") if schema_name else ""
391
+ safe_table = table_name.replace("'", "''") if table_name else ""
392
+
400
393
  # Use INFORMATION_SCHEMA to get schema with comments
401
394
  sql = f"""
402
395
  SELECT
@@ -414,16 +407,16 @@ class PostgreSQLConnector(SQLAlchemyConnector):
414
407
  ON tc.constraint_name = kcu.constraint_name
415
408
  AND tc.table_schema = kcu.table_schema
416
409
  WHERE tc.constraint_type = 'PRIMARY KEY'
417
- AND tc.table_schema = '{schema_name}'
418
- AND tc.table_name = '{table_name}'
410
+ AND tc.table_schema = '{safe_schema}'
411
+ AND tc.table_name = '{safe_table}'
419
412
  ) pk ON c.column_name = pk.column_name
420
413
  LEFT JOIN pg_catalog.pg_statio_all_tables st
421
414
  ON st.schemaname = c.table_schema AND st.relname = c.table_name
422
415
  LEFT JOIN pg_catalog.pg_description pgd
423
416
  ON pgd.objoid = st.relid AND pgd.objsubid = c.ordinal_position
424
- WHERE c.table_catalog = '{database_name}'
425
- AND c.table_schema = '{schema_name}'
426
- AND c.table_name = '{table_name}'
417
+ WHERE c.table_catalog = '{safe_db}'
418
+ AND c.table_schema = '{safe_schema}'
419
+ AND c.table_name = '{safe_table}'
427
420
  ORDER BY c.ordinal_position
428
421
  """
429
422
  query_result = self._execute_pandas(sql)
@@ -462,7 +455,8 @@ class PostgreSQLConnector(SQLAlchemyConnector):
462
455
  def get_schemas(self, catalog_name: str = "", database_name: str = "", include_sys: bool = False) -> List[str]:
463
456
  """Get list of schemas in the current database."""
464
457
  database_name = database_name or self.database_name
465
- sql = f"SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '{database_name}'"
458
+ safe_db = database_name.replace("'", "''") if database_name else ""
459
+ sql = f"SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '{safe_db}'"
466
460
  result = self._execute_pandas(sql)
467
461
  schemas = result["schema_name"].tolist()
468
462
 
@@ -479,21 +473,88 @@ class PostgreSQLConnector(SQLAlchemyConnector):
479
473
  """Get schema name for SQLAlchemy Inspector."""
480
474
  return schema_name or self.schema_name
481
475
 
476
+ def _get_engine(self, database_name: str = ""):
477
+ """Get or create engine for the given database. Thread-safe.
478
+
479
+ PostgreSQL requires different connection strings per database,
480
+ so each database gets its own engine with connection pool.
481
+ Uses LRU eviction (max 8 engines) to avoid holding too many connections.
482
+ """
483
+ db = database_name or self.database_name
484
+ with self._engine_lock:
485
+ if db in self._engines:
486
+ self._engines.move_to_end(db)
487
+ return self._engines[db]
488
+ conn_str = self._build_connection_string(db)
489
+ engine = create_engine(
490
+ conn_str,
491
+ pool_size=5,
492
+ max_overflow=10,
493
+ pool_timeout=self.timeout_seconds,
494
+ pool_recycle=3600,
495
+ pool_pre_ping=True,
496
+ )
497
+ self._engines[db] = engine
498
+ while len(self._engines) > self._max_engines:
499
+ _, evicted = self._engines.popitem(last=False)
500
+ try:
501
+ evicted.dispose()
502
+ except Exception as e:
503
+ logger.warning(f"Error disposing evicted engine: {e}")
504
+ return engine
505
+
506
+ @override
507
+ def _conn(self, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
508
+ """Checkout connection from the correct per-database engine. Thread-safe.
509
+
510
+ Overrides base _conn() to avoid writing to shared self.engine.
511
+ Each thread gets a connection from the engine matching its database_name.
512
+ """
513
+ from contextlib import contextmanager
514
+
515
+ @contextmanager
516
+ def _pg_conn():
517
+ effective_database = database_name or self.database_name
518
+ effective_schema = schema_name or self.schema_name
519
+ effective_catalog = catalog_name or self.catalog_name
520
+ engine = self._get_engine(effective_database)
521
+ conn = engine.connect()
522
+ try:
523
+ self.do_switch_context(conn, effective_catalog, effective_database, effective_schema)
524
+ yield conn
525
+ except Exception:
526
+ try:
527
+ conn.rollback()
528
+ except Exception:
529
+ pass
530
+ raise
531
+ finally:
532
+ conn.close()
533
+
534
+ return _pg_conn()
535
+
536
+ @override
537
+ def close(self):
538
+ """Dispose all engines (per-database pool + parent engine)."""
539
+ for engine in self._engines.values():
540
+ try:
541
+ engine.dispose()
542
+ except Exception as e:
543
+ logger.warning(f"Error disposing engine: {e}")
544
+ self._engines.clear()
545
+ # Dispose parent engine that may have been created via connect()/_ensure_engine()
546
+ super().close()
547
+
482
548
  @override
483
- def do_switch_context(self, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
484
- """Switch database/schema context.
549
+ def do_switch_context(self, conn, catalog_name: str = "", database_name: str = "", schema_name: str = ""):
550
+ """Apply schema context to a connection.
485
551
 
486
- PostgreSQL requires reconnection to switch databases.
487
- Schema switching only updates self.schema_name since all queries
488
- use explicit schema qualification via full_name().
552
+ Database switching is handled by _conn() which picks the right engine
553
+ based on the effective database_name.
489
554
  """
490
- if database_name and database_name != self.database_name:
491
- self.connection_string = self._build_connection_string(database_name)
492
- self.close()
493
- self.connect()
494
- self.database_name = database_name
495
555
  if schema_name:
496
- self.schema_name = schema_name
556
+ conn.execute(text(f"SET search_path TO {self.quote_identifier(schema_name)}"))
557
+ conn.commit()
497
558
 
498
559
  # ==================== Sample Data ====================
499
560
 
@@ -591,10 +652,10 @@ class PostgreSQLConnector(SQLAlchemyConnector):
591
652
  database_name = database_name or self.database_name
592
653
  schema_name = schema_name or self.schema_name
593
654
  if database_name and schema_name:
594
- return f"{self._quote_identifier(database_name)}.{self._quote_identifier(schema_name)}.{self._quote_identifier(table_name)}"
655
+ return f"{self.quote_identifier(database_name)}.{self.quote_identifier(schema_name)}.{self.quote_identifier(table_name)}"
595
656
  if schema_name:
596
- return f"{self._quote_identifier(schema_name)}.{self._quote_identifier(table_name)}"
597
- return self._quote_identifier(table_name)
657
+ return f"{self.quote_identifier(schema_name)}.{self.quote_identifier(table_name)}"
658
+ return self.quote_identifier(table_name)
598
659
 
599
660
  @override
600
661
  def _reset_filter_tables(
@@ -607,3 +668,66 @@ class PostgreSQLConnector(SQLAlchemyConnector):
607
668
  """Reset filter tables with full names."""
608
669
  schema_name = schema_name or self.schema_name
609
670
  return super()._reset_filter_tables(tables, "", database_name, schema_name)
671
+
672
+ # ==================== MigrationTargetMixin ====================
673
+
674
+ def describe_migration_capabilities(self) -> Dict[str, Any]:
675
+ return {
676
+ "supported": True,
677
+ "dialect_family": "postgres-like",
678
+ "requires": [], # OLTP — no distribution/partition required
679
+ "forbids": [
680
+ "DUPLICATE KEY (StarRocks-only)",
681
+ "DISTRIBUTED BY HASH ... BUCKETS (StarRocks-only)",
682
+ "ENGINE = (MySQL/ClickHouse syntax)",
683
+ ],
684
+ "type_hints": {
685
+ "HUGEINT": "NUMERIC(38,0) (Postgres has no HUGEINT/LARGEINT)",
686
+ "LARGEINT": "NUMERIC(38,0)",
687
+ "unbounded VARCHAR": "TEXT (prefer TEXT over unbounded VARCHAR)",
688
+ "TIMESTAMP WITH TIME ZONE": "TIMESTAMPTZ",
689
+ "JSON": "JSONB (prefer for indexing)",
690
+ "BOOLEAN": "BOOLEAN (no TINYINT cast needed)",
691
+ },
692
+ "example_ddl": (
693
+ "CREATE TABLE public.t (\n"
694
+ " id BIGSERIAL PRIMARY KEY,\n"
695
+ " name VARCHAR(255),\n"
696
+ " created_at TIMESTAMPTZ DEFAULT now()\n"
697
+ ")"
698
+ ),
699
+ }
700
+
701
+ def suggest_table_layout(self, columns: List[Dict[str, Any]]) -> Dict[str, Any]:
702
+ # Postgres is OLTP — no distribution keys or bucketing required
703
+ return {}
704
+
705
+ def validate_ddl(self, ddl: str) -> List[str]:
706
+ errors: List[str] = []
707
+ upper = ddl.upper()
708
+
709
+ if "DUPLICATE KEY" in upper:
710
+ errors.append("DUPLICATE KEY is StarRocks-only syntax; Postgres does not support it")
711
+ if "BUCKETS" in upper and "DISTRIBUTED BY" in upper:
712
+ errors.append("DISTRIBUTED BY ... BUCKETS is StarRocks syntax; Postgres does not support it")
713
+ if "ENGINE =" in upper or "ENGINE=" in upper:
714
+ errors.append("ENGINE clause is MySQL/ClickHouse syntax; not supported in Postgres")
715
+ if "ORDER BY" in upper and "CREATE TABLE" in upper:
716
+ # Rough heuristic: top-level ORDER BY inside CREATE TABLE is ClickHouse's
717
+ # MergeTree syntax. Postgres allows ORDER BY inside CTAS SELECT, so this
718
+ # check is intentionally loose (only flags when accompanied by ENGINE).
719
+ if "ENGINE" in upper:
720
+ errors.append("ORDER BY inside CREATE TABLE is ClickHouse syntax; use CREATE INDEX in Postgres")
721
+
722
+ return errors
723
+
724
+ def map_source_type(self, source_dialect: str, source_type: str) -> Optional[str]:
725
+ import re as _re
726
+
727
+ base = _re.sub(r"\(.*\)", "", source_type.strip().upper()).strip()
728
+ overrides = {
729
+ "HUGEINT": "NUMERIC(38,0)",
730
+ "LARGEINT": "NUMERIC(38,0)",
731
+ "DATETIME": "TIMESTAMP",
732
+ }
733
+ return overrides.get(base)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datus-postgresql"
3
- version = "0.1.4"
3
+ version = "0.1.5"
4
4
  description = "PostgreSQL database adapter for Datus"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -18,8 +18,8 @@ classifiers = [
18
18
  ]
19
19
 
20
20
  dependencies = [
21
- "datus-db-core>=0.1.0",
22
- "datus-sqlalchemy>=0.1.2",
21
+ "datus-db-core>=0.1.3",
22
+ "datus-sqlalchemy>=0.1.6",
23
23
  "psycopg2-binary>=2.9.11",
24
24
  "pydantic>=2.0.0",
25
25
  ]
@@ -2,7 +2,7 @@
2
2
  # Licensed under the Apache License, Version 2.0.
3
3
  # See http://www.apache.org/licenses/LICENSE-2.0 for details.
4
4
 
5
- from unittest.mock import patch
5
+ from unittest.mock import MagicMock, patch
6
6
 
7
7
  import pytest
8
8
 
@@ -193,28 +193,28 @@ def test_sys_schemas():
193
193
  @pytest.mark.acceptance
194
194
  def test_quote_identifier_basic():
195
195
  """Test _quote_identifier with basic identifier."""
196
- assert PostgreSQLConnector._quote_identifier("table_name") == '"table_name"'
196
+ assert PostgreSQLConnector.quote_identifier(MagicMock(), "table_name") == '"table_name"'
197
197
 
198
198
 
199
199
  @pytest.mark.acceptance
200
200
  def test_quote_identifier_with_double_quotes():
201
201
  """Test _quote_identifier escapes double quotes."""
202
- assert PostgreSQLConnector._quote_identifier('table"name') == '"table""name"'
202
+ assert PostgreSQLConnector.quote_identifier(MagicMock(), 'table"name') == '"table""name"'
203
203
 
204
204
 
205
205
  def test_quote_identifier_with_multiple_double_quotes():
206
206
  """Test _quote_identifier escapes multiple double quotes."""
207
- assert PostgreSQLConnector._quote_identifier('ta"ble"name') == '"ta""ble""name"'
207
+ assert PostgreSQLConnector.quote_identifier(MagicMock(), 'ta"ble"name') == '"ta""ble""name"'
208
208
 
209
209
 
210
210
  def test_quote_identifier_empty_string():
211
211
  """Test _quote_identifier with empty string."""
212
- assert PostgreSQLConnector._quote_identifier("") == '""'
212
+ assert PostgreSQLConnector.quote_identifier(MagicMock(), "") == '""'
213
213
 
214
214
 
215
215
  def test_quote_identifier_special_characters():
216
216
  """Test _quote_identifier with special characters."""
217
- assert PostgreSQLConnector._quote_identifier("table-name_123") == '"table-name_123"'
217
+ assert PostgreSQLConnector.quote_identifier(MagicMock(), "table-name_123") == '"table-name_123"'
218
218
 
219
219
 
220
220
  @pytest.mark.acceptance
@@ -396,3 +396,117 @@ def test_connector_schema_name_default():
396
396
  connector = PostgreSQLConnector(config)
397
397
 
398
398
  assert connector.schema_name == "public"
399
+
400
+
401
+ # ==================== _get_engine LRU Cache Tests ====================
402
+
403
+
404
+ def _make_connector():
405
+ """Helper: create a PostgreSQLConnector with mocked parent __init__."""
406
+ import threading
407
+
408
+ config = PostgreSQLConfig(username="user", password="pass", database="default_db")
409
+ with patch("datus_sqlalchemy.SQLAlchemyConnector.__init__", return_value=None):
410
+ connector = PostgreSQLConnector(config)
411
+ # Parent __init__ is mocked, so set attributes that _get_engine needs
412
+ connector._engine_lock = threading.Lock()
413
+ connector.engine = None
414
+ connector._owns_engine = False
415
+ connector.timeout_seconds = 30
416
+ return connector
417
+
418
+
419
+ def test_get_engine_returns_same_engine_for_same_db():
420
+ """Requesting the same database twice returns the cached engine."""
421
+ connector = _make_connector()
422
+ with patch("datus_postgresql.connector.create_engine", return_value=MagicMock()) as mock_ce:
423
+ e1 = connector._get_engine("db1")
424
+ e2 = connector._get_engine("db1")
425
+
426
+ assert e1 is e2
427
+ mock_ce.assert_called_once()
428
+
429
+
430
+ def test_get_engine_creates_different_engines_per_db():
431
+ """Different databases get different engines."""
432
+ connector = _make_connector()
433
+ engines = [MagicMock(), MagicMock()]
434
+ with patch("datus_postgresql.connector.create_engine", side_effect=engines):
435
+ e1 = connector._get_engine("db1")
436
+ e2 = connector._get_engine("db2")
437
+
438
+ assert e1 is not e2
439
+
440
+
441
+ def test_get_engine_evicts_lru_when_over_max():
442
+ """When cache exceeds max_engines, the least-recently-used engine is disposed."""
443
+ connector = _make_connector()
444
+ connector._max_engines = 3
445
+
446
+ created_engines = []
447
+
448
+ def make_engine(*args, **kwargs):
449
+ e = MagicMock()
450
+ created_engines.append(e)
451
+ return e
452
+
453
+ with patch("datus_postgresql.connector.create_engine", side_effect=make_engine):
454
+ connector._get_engine("db1")
455
+ connector._get_engine("db2")
456
+ connector._get_engine("db3")
457
+ # All 3 fit within max_engines=3
458
+ assert len(connector._engines) == 3
459
+ created_engines[0].dispose.assert_not_called()
460
+
461
+ # Adding a 4th should evict db1 (LRU)
462
+ connector._get_engine("db4")
463
+ assert len(connector._engines) == 3
464
+ assert "db1" not in connector._engines
465
+ created_engines[0].dispose.assert_called_once()
466
+
467
+
468
+ def test_get_engine_lru_access_refreshes_order():
469
+ """Accessing an existing engine moves it to most-recently-used, protecting it from eviction."""
470
+ connector = _make_connector()
471
+ connector._max_engines = 3
472
+
473
+ created_engines = {}
474
+
475
+ def make_engine(*args, **kwargs):
476
+ e = MagicMock()
477
+ created_engines[len(created_engines)] = e
478
+ return e
479
+
480
+ with patch("datus_postgresql.connector.create_engine", side_effect=make_engine):
481
+ connector._get_engine("db1") # engines[0]
482
+ connector._get_engine("db2") # engines[1]
483
+ connector._get_engine("db3") # engines[2]
484
+
485
+ # Access db1 again — moves it to MRU
486
+ connector._get_engine("db1")
487
+
488
+ # Add db4 — should evict db2 (now LRU), NOT db1
489
+ connector._get_engine("db4")
490
+
491
+ assert "db1" in connector._engines
492
+ assert "db2" not in connector._engines
493
+ assert "db3" in connector._engines
494
+ assert "db4" in connector._engines
495
+ created_engines[1].dispose.assert_called_once() # db2 evicted
496
+
497
+
498
+ def test_close_disposes_all_cached_engines():
499
+ """close() disposes all cached engines and clears the cache."""
500
+ connector = _make_connector()
501
+
502
+ mock_engines = [MagicMock(), MagicMock()]
503
+ with patch("datus_postgresql.connector.create_engine", side_effect=mock_engines):
504
+ connector._get_engine("db1")
505
+ connector._get_engine("db2")
506
+
507
+ with patch("datus_sqlalchemy.SQLAlchemyConnector.close"):
508
+ connector.close()
509
+
510
+ for e in mock_engines:
511
+ e.dispose.assert_called_once()
512
+ assert len(connector._engines) == 0
@@ -0,0 +1,89 @@
1
+ # Copyright 2025-present DatusAI, Inc.
2
+ # Licensed under the Apache License, Version 2.0.
3
+ # See http://www.apache.org/licenses/LICENSE-2.0 for details.
4
+
5
+ """Tests for PostgreSQL MigrationTargetMixin implementation."""
6
+
7
+ import pytest
8
+
9
+ from datus_db_core import MigrationTargetMixin
10
+ from datus_postgresql import PostgreSQLConnector
11
+
12
+
13
+ @pytest.fixture
14
+ def connector():
15
+ return PostgreSQLConnector.__new__(PostgreSQLConnector)
16
+
17
+
18
+ class TestMixinInheritance:
19
+ def test_postgresql_is_migration_target(self, connector):
20
+ assert isinstance(connector, MigrationTargetMixin)
21
+
22
+
23
+ class TestDescribeMigrationCapabilities:
24
+ def test_supported_true(self, connector):
25
+ result = connector.describe_migration_capabilities()
26
+ assert result["supported"] is True
27
+
28
+ def test_dialect_family_postgres_like(self, connector):
29
+ result = connector.describe_migration_capabilities()
30
+ assert result["dialect_family"] == "postgres-like"
31
+
32
+ def test_no_hard_requirements(self, connector):
33
+ """Postgres is OLTP — no distribution/partition required."""
34
+ result = connector.describe_migration_capabilities()
35
+ assert result["requires"] == []
36
+
37
+ def test_type_hints_mention_text_over_varchar(self, connector):
38
+ result = connector.describe_migration_capabilities()
39
+ hints_str = " ".join(result["type_hints"].values()).upper()
40
+ assert "TEXT" in hints_str
41
+
42
+ def test_example_ddl_is_simple(self, connector):
43
+ result = connector.describe_migration_capabilities()
44
+ ddl = result["example_ddl"].upper()
45
+ assert "CREATE TABLE" in ddl
46
+ # Should NOT contain DUPLICATE KEY or BUCKETS
47
+ assert "DUPLICATE KEY" not in ddl
48
+ assert "BUCKETS" not in ddl
49
+
50
+
51
+ class TestValidateDdl:
52
+ def test_accepts_standard_postgres_ddl(self, connector):
53
+ ddl = "CREATE TABLE public.t (id BIGSERIAL PRIMARY KEY, name VARCHAR(255))"
54
+ assert connector.validate_ddl(ddl) == []
55
+
56
+ def test_rejects_duplicate_key_starrocks_syntax(self, connector):
57
+ ddl = """CREATE TABLE public.t (id BIGINT)
58
+ DUPLICATE KEY(id)
59
+ DISTRIBUTED BY HASH(id) BUCKETS 10"""
60
+ errors = connector.validate_ddl(ddl)
61
+ assert any("DUPLICATE KEY" in e.upper() or "STARROCKS" in e.upper() for e in errors)
62
+
63
+ def test_rejects_distributed_by_hash_buckets(self, connector):
64
+ ddl = "CREATE TABLE public.t (id BIGINT) DISTRIBUTED BY HASH(id) BUCKETS 10"
65
+ errors = connector.validate_ddl(ddl)
66
+ assert any("BUCKETS" in e.upper() or "STARROCKS" in e.upper() for e in errors)
67
+
68
+
69
+ class TestSuggestTableLayout:
70
+ def test_returns_empty_dict(self, connector):
71
+ """Postgres doesn't need distribution keys — OLTP."""
72
+ columns = [
73
+ {"name": "id", "type": "BIGINT", "nullable": False},
74
+ {"name": "name", "type": "VARCHAR", "nullable": True},
75
+ ]
76
+ layout = connector.suggest_table_layout(columns)
77
+ assert layout == {}
78
+
79
+
80
+ class TestMapSourceType:
81
+ def test_hugeint_to_numeric(self, connector):
82
+ assert connector.map_source_type("duckdb", "HUGEINT") == "NUMERIC(38,0)"
83
+
84
+ def test_largeint_to_numeric(self, connector):
85
+ """StarRocks LARGEINT has no direct Postgres equivalent."""
86
+ assert connector.map_source_type("starrocks", "LARGEINT") == "NUMERIC(38,0)"
87
+
88
+ def test_unknown_returns_none(self, connector):
89
+ assert connector.map_source_type("duckdb", "VARCHAR") is None