pgsync 5.0.0__tar.gz → 6.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pgsync-5.0.0 → pgsync-6.0.0}/PKG-INFO +9 -8
  2. {pgsync-5.0.0 → pgsync-6.0.0}/README.md +26 -0
  3. {pgsync-5.0.0 → pgsync-6.0.0}/bin/bootstrap +26 -5
  4. {pgsync-5.0.0 → pgsync-6.0.0}/bin/parallel_sync +33 -7
  5. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/__init__.py +1 -1
  6. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/base.py +61 -18
  7. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/constants.py +2 -0
  8. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/helper.py +9 -2
  9. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/node.py +5 -3
  10. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/plugin.py +2 -2
  11. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/redisqueue.py +8 -3
  12. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/settings.py +18 -0
  13. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/singleton.py +1 -1
  14. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/sync.py +84 -28
  15. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/trigger.py +24 -4
  16. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/urls.py +14 -2
  17. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/utils.py +66 -12
  18. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/view.py +65 -11
  19. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/PKG-INFO +9 -8
  20. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/requires.txt +7 -7
  21. {pgsync-5.0.0 → pgsync-6.0.0}/setup.py +1 -0
  22. {pgsync-5.0.0 → pgsync-6.0.0}/tests/conftest.py +46 -14
  23. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_constants.py +1 -0
  24. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync.py +4 -0
  25. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_nested_children.py +35 -3
  26. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_root.py +19 -2
  27. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_child.py +24 -2
  28. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_parent.py +24 -2
  29. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_trigger.py +24 -4
  30. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_unique_behaviour.py +1 -2
  31. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_urls.py +2 -2
  32. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_utils.py +13 -1
  33. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_view.py +3 -2
  34. {pgsync-5.0.0 → pgsync-6.0.0}/AUTHORS.rst +0 -0
  35. {pgsync-5.0.0 → pgsync-6.0.0}/CONTRIBUTING.rst +0 -0
  36. {pgsync-5.0.0 → pgsync-6.0.0}/HISTORY.rst +0 -0
  37. {pgsync-5.0.0 → pgsync-6.0.0}/LICENSE +0 -0
  38. {pgsync-5.0.0 → pgsync-6.0.0}/MANIFEST.in +0 -0
  39. {pgsync-5.0.0 → pgsync-6.0.0}/README.rst +0 -0
  40. {pgsync-5.0.0 → pgsync-6.0.0}/bin/pgsync +0 -0
  41. {pgsync-5.0.0 → pgsync-6.0.0}/docs/Makefile +0 -0
  42. {pgsync-5.0.0 → pgsync-6.0.0}/docs/authors.rst +0 -0
  43. {pgsync-5.0.0 → pgsync-6.0.0}/docs/changelog.rst +0 -0
  44. {pgsync-5.0.0 → pgsync-6.0.0}/docs/conf.py +0 -0
  45. {pgsync-5.0.0 → pgsync-6.0.0}/docs/contributing.rst +0 -0
  46. {pgsync-5.0.0 → pgsync-6.0.0}/docs/history.rst +0 -0
  47. {pgsync-5.0.0 → pgsync-6.0.0}/docs/index.rst +0 -0
  48. {pgsync-5.0.0 → pgsync-6.0.0}/docs/installation.rst +0 -0
  49. {pgsync-5.0.0 → pgsync-6.0.0}/docs/logo.png +0 -0
  50. {pgsync-5.0.0 → pgsync-6.0.0}/docs/make.bat +0 -0
  51. {pgsync-5.0.0 → pgsync-6.0.0}/docs/readme.rst +0 -0
  52. {pgsync-5.0.0 → pgsync-6.0.0}/docs/usage.rst +0 -0
  53. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/exc.py +0 -0
  54. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/querybuilder.py +0 -0
  55. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/search_client.py +0 -0
  56. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/transform.py +0 -0
  57. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/SOURCES.txt +0 -0
  58. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/dependency_links.txt +0 -0
  59. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/not-zip-safe +0 -0
  60. {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/top_level.txt +0 -0
  61. {pgsync-5.0.0 → pgsync-6.0.0}/pyproject.toml +0 -0
  62. {pgsync-5.0.0 → pgsync-6.0.0}/setup.cfg +0 -0
  63. {pgsync-5.0.0 → pgsync-6.0.0}/tests/__init__.py +0 -0
  64. {pgsync-5.0.0 → pgsync-6.0.0}/tests/fixtures/schema.json +0 -0
  65. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_base.py +0 -0
  66. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_env_vars.py +0 -0
  67. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_helper.py +0 -0
  68. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_log_handlers.py +0 -0
  69. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_node.py +0 -0
  70. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_query_builder.py +0 -0
  71. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_redisqueue.py +0 -0
  72. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_search_client.py +0 -0
  73. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_settings.py +0 -0
  74. {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_transform.py +0 -0
  75. {pgsync-5.0.0 → pgsync-6.0.0}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 5.0.0
3
+ Version: 6.0.0
4
4
  Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Classifier: Programming Language :: Python :: Implementation :: CPython
26
27
  Classifier: Programming Language :: Python :: Implementation :: PyPy
27
28
  Classifier: License :: OSI Approved :: MIT License
@@ -32,17 +33,17 @@ License-File: LICENSE
32
33
  License-File: AUTHORS.rst
33
34
  Requires-Dist: async-timeout==5.0.1
34
35
  Requires-Dist: backports-datetime-fromisoformat==2.0.3
35
- Requires-Dist: boto3==1.40.50
36
- Requires-Dist: botocore==1.40.50
36
+ Requires-Dist: boto3==1.40.64
37
+ Requires-Dist: botocore==1.40.64
37
38
  Requires-Dist: certifi==2025.10.5
38
- Requires-Dist: charset-normalizer==3.4.3
39
+ Requires-Dist: charset-normalizer==3.4.4
39
40
  Requires-Dist: click==8.1.8
40
41
  Requires-Dist: elastic-transport==9.1.0
41
42
  Requires-Dist: elasticsearch==7.17.12
42
43
  Requires-Dist: elasticsearch-dsl==7.4.1
43
- Requires-Dist: environs==14.3.0
44
+ Requires-Dist: environs==14.4.0
44
45
  Requires-Dist: events==0.5
45
- Requires-Dist: idna==3.10
46
+ Requires-Dist: idna==3.11
46
47
  Requires-Dist: jmespath==1.0.1
47
48
  Requires-Dist: marshmallow==4.0.1
48
49
  Requires-Dist: mysql-replication==1.0.9
@@ -52,8 +53,8 @@ Requires-Dist: packaging==25.0
52
53
  Requires-Dist: psycopg2-binary==2.9.11
53
54
  Requires-Dist: pymysql==1.1.2
54
55
  Requires-Dist: python-dateutil==2.9.0.post0
55
- Requires-Dist: python-dotenv==1.1.1
56
- Requires-Dist: redis==6.4.0
56
+ Requires-Dist: python-dotenv==1.2.1
57
+ Requires-Dist: redis==7.0.1
57
58
  Requires-Dist: requests==2.32.5
58
59
  Requires-Dist: requests-aws4auth==1.3.1
59
60
  Requires-Dist: s3transfer==0.14.0
@@ -5,6 +5,7 @@
5
5
  [![Python versions](https://img.shields.io/pypi/pyversions/pgsync)](https://pypi.org/project/pgsync)
6
6
  [![Downloads](https://img.shields.io/pypi/dm/pgsync)](https://pypi.org/project/pgsync)
7
7
  [![codecov](https://codecov.io/gh/toluaina/pgsync/branch/main/graph/badge.svg?token=cvQzYkz6CV)](https://codecov.io/gh/toluaina/pgsync)
8
+ [![Sponsored by DigitalOcean](https://img.shields.io/badge/Sponsored%20by-DigitalOcean-0080FF?logo=digitalocean&logoColor=white)](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
8
9
 
9
10
 
10
11
  ## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
@@ -43,6 +44,23 @@ Other benefits of PGSync include:
43
44
  - Scale on-demand (multiple consumers)
44
45
  - Easily join multiple nested tables
45
46
 
47
+ ## Sponsors
48
+
49
+ [PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
50
+
51
+ <p>
52
+ <a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
53
+ <img
54
+ src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
55
+ alt="DigitalOcean"
56
+ width="210"
57
+ loading="lazy"
58
+ decoding="async"
59
+ >
60
+ </a>
61
+ </p>
62
+
63
+
46
64
  #### Why?
47
65
 
48
66
  At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
@@ -91,6 +109,14 @@ There are several ways of installing and trying PGSync
91
109
  - [Running in Docker](#running-in-docker) is the easiest way to get up and running.
92
110
  - [Manual configuration](#manual-configuration)
93
111
 
112
+ #### Book Demo Example (requires a DigitalOcean account)
113
+
114
+ [![Deploy to DO](https://www.deploytodo.com/do-btn-blue.svg)](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
115
+
116
+ Fill in the following during the setup
117
+ - `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
118
+ - `REDIS_URL` e.g. rediss://default:pass@host:port/0
119
+
94
120
 
95
121
  ##### Running in Docker (Using Github Repository)
96
122
 
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
26
26
  default=settings.SCHEMA,
27
27
  show_default=True,
28
28
  cls=MutuallyExclusiveOption,
29
- mutually_exclusive=["s3_schema_url"],
29
+ mutually_exclusive=["s3_schema_url", "schema_url"],
30
+ )
31
+ @click.option(
32
+ "--schema_url",
33
+ help="URL for schema config",
34
+ type=click.STRING,
35
+ default=settings.SCHEMA_URL,
36
+ show_default=True,
37
+ cls=MutuallyExclusiveOption,
38
+ mutually_exclusive=["config", "s3_schema_url"],
30
39
  )
31
40
  @click.option(
32
41
  "--s3_schema_url",
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
35
44
  default=settings.S3_SCHEMA_URL,
36
45
  show_default=True,
37
46
  cls=MutuallyExclusiveOption,
38
- mutually_exclusive=["config"],
47
+ mutually_exclusive=["config", "schema_url"],
39
48
  )
40
49
  @click.option("--host", "-h", help="PG_HOST override")
41
50
  @click.option("--password", is_flag=True, help="Prompt for database password")
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
67
76
  def main(
68
77
  teardown: bool,
69
78
  config: str,
79
+ schema_url: str,
70
80
  s3_schema_url: str,
71
81
  user: str,
72
82
  password: bool,
@@ -89,13 +99,24 @@ def main(
89
99
  )
90
100
  kwargs = {key: value for key, value in kwargs.items() if value is not None}
91
101
 
92
- validate_config(config=config, s3_schema_url=s3_schema_url)
102
+ validate_config(
103
+ config=config,
104
+ schema_url=schema_url,
105
+ s3_schema_url=s3_schema_url,
106
+ )
93
107
 
94
- show_settings(config=config, s3_schema_url=s3_schema_url)
108
+ show_settings(
109
+ config=config,
110
+ schema_url=schema_url,
111
+ s3_schema_url=s3_schema_url,
112
+ **kwargs,
113
+ )
95
114
 
96
115
  validate: bool = False if teardown else True
97
116
 
98
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
117
+ for doc in config_loader(
118
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
119
+ ):
99
120
  sync: Sync = Sync(
100
121
  doc,
101
122
  verbose=verbose,
@@ -53,7 +53,13 @@ from threading import Thread
53
53
  import click
54
54
  import sqlalchemy as sa
55
55
 
56
- from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
56
+ from pgsync.settings import (
57
+ BLOCK_SIZE,
58
+ CHECKPOINT_PATH,
59
+ S3_SCHEMA_URL,
60
+ SCHEMA,
61
+ SCHEMA_URL,
62
+ )
57
63
  from pgsync.sync import Sync
58
64
  from pgsync.utils import (
59
65
  config_loader,
@@ -386,7 +392,16 @@ def run_task(
386
392
  default=SCHEMA,
387
393
  show_default=True,
388
394
  cls=MutuallyExclusiveOption,
389
- mutually_exclusive=["s3_schema_url"],
395
+ mutually_exclusive=["s3_schema_url", "schema_url"],
396
+ )
397
+ @click.option(
398
+ "--schema_url",
399
+ help="URL for schema config",
400
+ type=click.STRING,
401
+ default=SCHEMA_URL,
402
+ show_default=True,
403
+ cls=MutuallyExclusiveOption,
404
+ mutually_exclusive=["config", "s3_schema_url"],
390
405
  )
391
406
  @click.option(
392
407
  "--s3_schema_url",
@@ -395,7 +410,7 @@ def run_task(
395
410
  default=S3_SCHEMA_URL,
396
411
  show_default=True,
397
412
  cls=MutuallyExclusiveOption,
398
- mutually_exclusive=["config"],
413
+ mutually_exclusive=["config", "schema_url"],
399
414
  )
400
415
  @click.option(
401
416
  "--verbose",
@@ -428,7 +443,12 @@ def run_task(
428
443
  default="multiprocess_async",
429
444
  )
430
445
  def main(
431
- config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
446
+ config: str,
447
+ schema_url: str,
448
+ s3_schema_url: str,
449
+ nprocs: int,
450
+ mode: str,
451
+ verbose: bool,
432
452
  ) -> None:
433
453
  """
434
454
  TODO:
@@ -436,11 +456,17 @@ def main(
436
456
  - Handle KeyboardInterrupt Exception
437
457
  """
438
458
 
439
- validate_config(config=config, s3_schema_url=s3_schema_url)
459
+ validate_config(
460
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
461
+ )
440
462
 
441
- show_settings(config=config, s3_schema_url=s3_schema_url)
463
+ show_settings(
464
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
465
+ )
442
466
 
443
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
467
+ for doc in config_loader(
468
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
469
+ ):
444
470
  tasks: t.Generator = fetch_tasks(doc)
445
471
  if mode == "synchronous":
446
472
  synchronous(tasks, doc, verbose=verbose)
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "5.0.0"
5
+ __version__ = "6.0.0"
@@ -31,6 +31,8 @@ from .exc import (
31
31
  )
32
32
  from .settings import (
33
33
  IS_MYSQL_COMPAT,
34
+ MYSQL_DATABASE,
35
+ PG_DATABASE,
34
36
  PG_HOST_RO,
35
37
  PG_PASSWORD_RO,
36
38
  PG_PORT_RO,
@@ -39,6 +41,12 @@ from .settings import (
39
41
  PG_URL_RO,
40
42
  PG_USER_RO,
41
43
  QUERY_CHUNK_SIZE,
44
+ SQLALCHEMY_MAX_OVERFLOW,
45
+ SQLALCHEMY_POOL_PRE_PING,
46
+ SQLALCHEMY_POOL_RECYCLE,
47
+ SQLALCHEMY_POOL_SIZE,
48
+ SQLALCHEMY_POOL_TIMEOUT,
49
+ SQLALCHEMY_USE_NULLPOOL,
42
50
  STREAM_RESULTS,
43
51
  )
44
52
  from .trigger import CREATE_TRIGGER_TEMPLATE
@@ -230,6 +238,7 @@ class Base(object):
230
238
  self.__columns: dict = {}
231
239
  self.verbose: bool = verbose
232
240
  self._conn = None
241
+ self._session = None
233
242
 
234
243
  def connect(self) -> None:
235
244
  """Connect to database."""
@@ -338,8 +347,19 @@ class Base(object):
338
347
 
339
348
  @property
340
349
  def session(self) -> sessionmaker:
341
- Session = sessionmaker(bind=self.engine.connect(), autoflush=True)
342
- return Session()
350
+ if self._session is None:
351
+ Session = sessionmaker(bind=self.engine, autoflush=True)
352
+ self._session = Session()
353
+ return self._session
354
+
355
+ def close_session(self) -> None:
356
+ """Close the cached session and reset it."""
357
+ if self._session is not None:
358
+ try:
359
+ self._session.close()
360
+ except Exception:
361
+ pass
362
+ self._session = None
343
363
 
344
364
  @property
345
365
  def engine(self) -> sa.engine.Engine:
@@ -822,6 +842,7 @@ class Base(object):
822
842
  schema: str,
823
843
  tables: t.Set,
824
844
  user_defined_fkey_tables: dict,
845
+ node_columns: dict,
825
846
  ) -> None:
826
847
  create_view(
827
848
  self.engine,
@@ -832,6 +853,7 @@ class Base(object):
832
853
  tables,
833
854
  user_defined_fkey_tables,
834
855
  self._materialized_views(schema),
856
+ node_columns,
835
857
  )
836
858
 
837
859
  def drop_view(self, schema: str) -> None:
@@ -946,9 +968,9 @@ class Base(object):
946
968
  """Check if the trigger function exists."""
947
969
  return self.exists(
948
970
  sa.text(
949
- f"SELECT 1 FROM pg_proc WHERE proname = :name "
950
- f"AND pronamespace = (SELECT oid FROM pg_namespace "
951
- f"WHERE nspname = :schema)"
971
+ "SELECT 1 FROM pg_proc WHERE proname = :name "
972
+ "AND pronamespace = (SELECT oid FROM pg_namespace "
973
+ "WHERE nspname = :schema)"
952
974
  ).bindparams(name=TRIGGER_FUNC, schema=schema),
953
975
  )
954
976
 
@@ -989,7 +1011,7 @@ class Base(object):
989
1011
  self.disable_trigger(schema, table)
990
1012
  logger.debug(f"Disabled trigger on table: {schema}.{table}")
991
1013
 
992
- def enable_trigger(self, schema: str, table, str) -> None:
1014
+ def enable_trigger(self, schema: str, table: str) -> None:
993
1015
  """Enable a pgsync defined trigger."""
994
1016
  for name in ("notify", "truncate"):
995
1017
  self.execute(
@@ -1107,20 +1129,21 @@ class Base(object):
1107
1129
  # including trailing space below is deliberate
1108
1130
  suffix: str = f"{row[span[1]:]} "
1109
1131
 
1110
- if "old-key" and "new-tuple" in suffix:
1132
+ if "old-key" in suffix and "new-tuple" in suffix:
1111
1133
  # this can only be an UPDATE operation
1112
1134
  if payload.tg_op != UPDATE:
1113
1135
  msg = f"Unknown {payload.tg_op} operation for row: {row}"
1114
1136
  raise LogicalSlotParseError(msg)
1115
1137
 
1116
- i: int = suffix.index("old-key:")
1138
+ i: int = suffix.find("old-key:")
1117
1139
  if i > -1:
1118
- j: int = suffix.index("new-tuple:")
1119
- s: str = suffix[i + len("old-key:") : j]
1120
- for key, value in _parse_logical_slot(s):
1121
- payload.old[key] = value
1140
+ j: int = suffix.find("new-tuple:")
1141
+ if j > -1:
1142
+ s: str = suffix[i + len("old-key:") : j]
1143
+ for key, value in _parse_logical_slot(s):
1144
+ payload.old[key] = value
1122
1145
 
1123
- i = suffix.index("new-tuple:")
1146
+ i = suffix.find("new-tuple:")
1124
1147
  if i > -1:
1125
1148
  s = suffix[i + len("new-tuple:") :]
1126
1149
  for key, value in _parse_logical_slot(s):
@@ -1336,7 +1359,28 @@ def _pg_engine(
1336
1359
  password=password,
1337
1360
  port=port,
1338
1361
  )
1339
- return sa.create_engine(url, echo=echo, connect_args=connect_args)
1362
+
1363
+ # Use NullPool for testing to avoid connection exhaustion
1364
+ if SQLALCHEMY_USE_NULLPOOL:
1365
+ from sqlalchemy.pool import NullPool
1366
+
1367
+ return sa.create_engine(
1368
+ url,
1369
+ echo=echo,
1370
+ connect_args=connect_args,
1371
+ poolclass=NullPool,
1372
+ )
1373
+
1374
+ return sa.create_engine(
1375
+ url,
1376
+ echo=echo,
1377
+ connect_args=connect_args,
1378
+ pool_size=SQLALCHEMY_POOL_SIZE,
1379
+ max_overflow=SQLALCHEMY_MAX_OVERFLOW,
1380
+ pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
1381
+ pool_recycle=SQLALCHEMY_POOL_RECYCLE,
1382
+ pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
1383
+ )
1340
1384
 
1341
1385
 
1342
1386
  def pg_execute(
@@ -1367,7 +1411,7 @@ def create_database(database: str, echo: bool = False) -> None:
1367
1411
  """Create a database."""
1368
1412
  logger.debug(f"Creating database: {database}")
1369
1413
  with pg_engine(
1370
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1414
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
1371
1415
  echo=echo,
1372
1416
  ) as engine:
1373
1417
  pg_execute(
@@ -1382,8 +1426,7 @@ def drop_database(database: str, echo: bool = False) -> None:
1382
1426
  """Drop a database."""
1383
1427
  logger.debug(f"Dropping database: {database}")
1384
1428
  with pg_engine(
1385
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1386
- echo=echo,
1429
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE, echo=echo
1387
1430
  ) as engine:
1388
1431
  pg_execute(
1389
1432
  engine,
@@ -1396,7 +1439,7 @@ def drop_database(database: str, echo: bool = False) -> None:
1396
1439
  def database_exists(database: str, echo: bool = False) -> bool:
1397
1440
  """Check if database is present."""
1398
1441
  with pg_engine(
1399
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1442
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
1400
1443
  echo=echo,
1401
1444
  ) as engine:
1402
1445
  with engine.connect() as conn:
@@ -135,6 +135,7 @@ ELASTICSEARCH_MAPPING_PARAMETERS = [
135
135
  "boost",
136
136
  "coerce",
137
137
  "copy_to",
138
+ "dimension",
138
139
  "doc_values",
139
140
  "dynamic",
140
141
  "eager_global_ordinals",
@@ -198,6 +199,7 @@ MATERIALIZED_VIEW_COLUMNS = [
198
199
  "indices",
199
200
  "primary_keys",
200
201
  "table_name",
202
+ "columns",
201
203
  ]
202
204
 
203
205
  # Primary key delimiter
@@ -20,6 +20,7 @@ def teardown(
20
20
  drop_index: bool = True,
21
21
  delete_checkpoint: bool = True,
22
22
  config: t.Optional[str] = None,
23
+ schema_url: t.Optional[str] = None,
23
24
  s3_schema_url: t.Optional[str] = None,
24
25
  validate: bool = False,
25
26
  ) -> None:
@@ -33,11 +34,17 @@ def teardown(
33
34
  drop_index (bool, optional): Whether to drop the index. Defaults to True.
34
35
  delete_checkpoint (bool, optional): Whether to delete the checkpoint. Defaults to True.
35
36
  config (Optional[str], optional): The configuration file path. Defaults to None.
37
+ schema_url (Optional[str], optional): The schema URL. Defaults to None.
38
+ s3_schema_url (Optional[str], optional): The S3 schema URL. Defaults to
36
39
  validate (bool, optional): Whether to validate the configuration. Defaults to False.
37
40
  """
38
- validate_config(config=config, s3_schema_url=s3_schema_url)
41
+ validate_config(
42
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
43
+ )
39
44
 
40
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
45
+ for doc in config_loader(
46
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
47
+ ):
41
48
  if not database_exists(doc["database"]):
42
49
  logger.warning(f'Database {doc["database"]} does not exist')
43
50
  continue
@@ -294,7 +294,7 @@ class Node(object):
294
294
  self.columns.append(self.model.c[column_name])
295
295
 
296
296
  @property
297
- def primary_keys(self):
297
+ def primary_keys(self) -> t.List[sa.sql.ColumnElement]:
298
298
  return [
299
299
  self.model.c[str(sa.text(primary_key))]
300
300
  for primary_key in self.model.primary_keys
@@ -311,7 +311,7 @@ class Node(object):
311
311
 
312
312
  def add_child(self, node: Node) -> None:
313
313
  """All nodes except the root node must have a relationship defined."""
314
- node.parent: Node = self
314
+ node.parent = self
315
315
  if not node.is_root and (
316
316
  not node.relationship.type or not node.relationship.variant
317
317
  ):
@@ -388,7 +388,9 @@ class Tree(threading.local):
388
388
  raise TableNotInNodeError(f"Table not specified in node: {nodes}")
389
389
 
390
390
  if not set(nodes.keys()).issubset(set(NODE_ATTRIBUTES)):
391
- attrs = set(nodes.keys()).difference(set(NODE_ATTRIBUTES))
391
+ attrs: t.Set[str] = set(nodes.keys()).difference(
392
+ set(NODE_ATTRIBUTES)
393
+ )
392
394
  raise NodeAttributeError(f"Unknown node attribute(s): {attrs}")
393
395
 
394
396
  node: Node = Node(
@@ -16,7 +16,7 @@ class Plugin(ABC):
16
16
  """Plugin base class."""
17
17
 
18
18
  @abstractmethod
19
- def transform(self, doc: dict, **kwargs) -> dict:
19
+ def transform(self, doc: dict, **kwargs: t.Any) -> dict:
20
20
  """This must be implemented by all derived classes."""
21
21
  pass
22
22
 
@@ -90,7 +90,7 @@ class Plugins(object):
90
90
  ]:
91
91
  self.walk(f"{package}.{pkg}")
92
92
 
93
- def transform(self, docs: list) -> t.Generator:
93
+ def transform(self, docs: t.Iterable[dict]) -> t.Generator:
94
94
  """Applies all plugins to each doc."""
95
95
  for doc in docs:
96
96
  for plugin in self.plugins:
@@ -69,10 +69,15 @@ class RedisQueue(object):
69
69
  return []
70
70
  payloads = [json.loads(i) for i in items]
71
71
  visible_map: dict = pg_visible_in_snapshot()(
72
- [payload["xmin"] for payload in payloads]
72
+ [payload.get("xmin") for payload in payloads if "xmin" in payload]
73
73
  )
74
74
  visible: t.List[dict] = []
75
75
  for item, payload in zip(items, payloads):
76
+ if "xmin" not in payload:
77
+ logger.warning(
78
+ f"Skipping payload without 'xmin' key: {payload}"
79
+ )
80
+ continue
76
81
  if visible_map.get(payload["xmin"]):
77
82
  # Claim atomically
78
83
  removed = self.__db.lrem(self.key, 1, item)
@@ -80,7 +85,7 @@ class RedisQueue(object):
80
85
  visible.append(payload)
81
86
  return visible
82
87
 
83
- def push(self, items: t.List) -> None:
88
+ def push(self, items: t.Iterable[dict]) -> None:
84
89
  """Push multiple items onto the queue."""
85
90
  self.__db.rpush(self.key, *map(json.dumps, items))
86
91
 
@@ -97,5 +102,5 @@ class RedisQueue(object):
97
102
 
98
103
  def get_meta(self, default: t.Any = None) -> t.Any:
99
104
  """Retrieve the stored value (or *default* if nothing is set)."""
100
- raw = self.__db.get(self._meta_key)
105
+ raw: t.Optional[str] = self.__db.get(self._meta_key)
101
106
  return json.loads(raw) if raw is not None else default
@@ -45,12 +45,27 @@ REPLICATION_SLOT_CLEANUP_INTERVAL = env.float(
45
45
  # path to the application schema config
46
46
  SCHEMA = env.str("SCHEMA", default=None)
47
47
  S3_SCHEMA_URL = env.str("S3_SCHEMA_URL", default=None)
48
+ SCHEMA_URL = env.str("SCHEMA_URL", default=None)
48
49
  USE_ASYNC = env.bool("USE_ASYNC", default=False)
49
50
  STREAM_RESULTS = env.bool("STREAM_RESULTS", default=True)
50
51
  # db polling interval
51
52
  POLL_INTERVAL = env.float("POLL_INTERVAL", default=0.1)
52
53
  FORMAT_WITH_COMMAS = env.bool("FORMAT_WITH_COMMAS", default=True)
53
54
 
55
+ # SQLAlchemy Settings:
56
+ # Use NullPool (no connection pooling) - useful for testing or when you want to close connections immediately
57
+ SQLALCHEMY_USE_NULLPOOL = env.bool("SQLALCHEMY_USE_NULLPOOL", default=False)
58
+ # This is the number of connections that will be persistently maintained in the pool.
59
+ SQLALCHEMY_POOL_SIZE = env.int("SQLALCHEMY_POOL_SIZE", default=5)
60
+ # This is the number of connections that can be opened beyond the pool_size when all connections in the pool are in use.
61
+ SQLALCHEMY_MAX_OVERFLOW = env.int("SQLALCHEMY_MAX_OVERFLOW", default=10)
62
+ # When set to True, a "ping" will be performed on connections before they are checked out of the pool to ensure they are still live.
63
+ SQLALCHEMY_POOL_PRE_PING = env.bool("SQLALCHEMY_POOL_PRE_PING", default=False)
64
+ # This means connections are not recycled based on a timeout. If set to a positive integer, connections will be recycled after that many seconds. For example, 3600 recycles connections after one hour.
65
+ SQLALCHEMY_POOL_RECYCLE = env.int("SQLALCHEMY_POOL_RECYCLE", default=-1)
66
+ # This is the number of seconds to wait for a connection to become available from the pool before raising a TimeoutError.
67
+ SQLALCHEMY_POOL_TIMEOUT = env.int("SQLALCHEMY_POOL_TIMEOUT", default=30)
68
+
54
69
  # Elasticsearch/OpenSearch:
55
70
  ELASTICSEARCH_API_KEY = env.str("ELASTICSEARCH_API_KEY", default=None)
56
71
  ELASTICSEARCH_API_KEY_ID = env.str("ELASTICSEARCH_API_KEY_ID", default=None)
@@ -187,6 +202,9 @@ if PG_URL:
187
202
  else:
188
203
  # If PG_URL is not set, we need to use the other PG_* variables
189
204
  PG_USER = env.str("PG_USER")
205
+ # The default database name e.g postges or defaultdb
206
+ PG_DATABASE = env.str("PG_DATABASE", default="postgres")
207
+ MYSQL_DATABASE = env.str("MYSQL_DATABASE", default="information_schema")
190
208
 
191
209
  # Read-only Postgres:
192
210
  # This is used for read-only consumers that do not require replication slots or triggers.
@@ -14,7 +14,7 @@ class Singleton(type):
14
14
 
15
15
  _instances: dict = {}
16
16
 
17
- def __call__(cls, *args, **kwargs):
17
+ def __call__(cls, *args: t.Any, **kwargs: t.Any) -> t.Any:
18
18
  """
19
19
  If an instance of the class has already been created with the same arguments,
20
20
  return that instance. Otherwise, create a new instance and return it.