pgsync 5.0.0__tar.gz → 6.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-5.0.0 → pgsync-6.0.0}/PKG-INFO +9 -8
- {pgsync-5.0.0 → pgsync-6.0.0}/README.md +26 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/bin/bootstrap +26 -5
- {pgsync-5.0.0 → pgsync-6.0.0}/bin/parallel_sync +33 -7
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/__init__.py +1 -1
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/base.py +61 -18
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/constants.py +2 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/helper.py +9 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/node.py +5 -3
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/plugin.py +2 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/redisqueue.py +8 -3
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/settings.py +18 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/singleton.py +1 -1
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/sync.py +84 -28
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/trigger.py +24 -4
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/urls.py +14 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/utils.py +66 -12
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/view.py +65 -11
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/PKG-INFO +9 -8
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/requires.txt +7 -7
- {pgsync-5.0.0 → pgsync-6.0.0}/setup.py +1 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/conftest.py +46 -14
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_constants.py +1 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync.py +4 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_nested_children.py +35 -3
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_root.py +19 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_child.py +24 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_parent.py +24 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_trigger.py +24 -4
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_unique_behaviour.py +1 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_urls.py +2 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_utils.py +13 -1
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_view.py +3 -2
- {pgsync-5.0.0 → pgsync-6.0.0}/AUTHORS.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/CONTRIBUTING.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/HISTORY.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/LICENSE +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/MANIFEST.in +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/README.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/bin/pgsync +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/Makefile +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/authors.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/changelog.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/conf.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/contributing.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/history.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/index.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/installation.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/logo.png +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/make.bat +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/readme.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/docs/usage.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/exc.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/querybuilder.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/search_client.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync/transform.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/pyproject.toml +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/setup.cfg +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/__init__.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/fixtures/schema.json +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_base.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_env_vars.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_helper.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_log_handlers.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_node.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_query_builder.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_redisqueue.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_search_client.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_settings.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/test_transform.py +0 -0
- {pgsync-5.0.0 → pgsync-6.0.0}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.0.0
|
|
4
4
|
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
26
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
26
27
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
27
28
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -32,17 +33,17 @@ License-File: LICENSE
|
|
|
32
33
|
License-File: AUTHORS.rst
|
|
33
34
|
Requires-Dist: async-timeout==5.0.1
|
|
34
35
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.40.
|
|
36
|
-
Requires-Dist: botocore==1.40.
|
|
36
|
+
Requires-Dist: boto3==1.40.64
|
|
37
|
+
Requires-Dist: botocore==1.40.64
|
|
37
38
|
Requires-Dist: certifi==2025.10.5
|
|
38
|
-
Requires-Dist: charset-normalizer==3.4.
|
|
39
|
+
Requires-Dist: charset-normalizer==3.4.4
|
|
39
40
|
Requires-Dist: click==8.1.8
|
|
40
41
|
Requires-Dist: elastic-transport==9.1.0
|
|
41
42
|
Requires-Dist: elasticsearch==7.17.12
|
|
42
43
|
Requires-Dist: elasticsearch-dsl==7.4.1
|
|
43
|
-
Requires-Dist: environs==14.
|
|
44
|
+
Requires-Dist: environs==14.4.0
|
|
44
45
|
Requires-Dist: events==0.5
|
|
45
|
-
Requires-Dist: idna==3.
|
|
46
|
+
Requires-Dist: idna==3.11
|
|
46
47
|
Requires-Dist: jmespath==1.0.1
|
|
47
48
|
Requires-Dist: marshmallow==4.0.1
|
|
48
49
|
Requires-Dist: mysql-replication==1.0.9
|
|
@@ -52,8 +53,8 @@ Requires-Dist: packaging==25.0
|
|
|
52
53
|
Requires-Dist: psycopg2-binary==2.9.11
|
|
53
54
|
Requires-Dist: pymysql==1.1.2
|
|
54
55
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
55
|
-
Requires-Dist: python-dotenv==1.
|
|
56
|
-
Requires-Dist: redis==
|
|
56
|
+
Requires-Dist: python-dotenv==1.2.1
|
|
57
|
+
Requires-Dist: redis==7.0.1
|
|
57
58
|
Requires-Dist: requests==2.32.5
|
|
58
59
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
59
60
|
Requires-Dist: s3transfer==0.14.0
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
[](https://pypi.org/project/pgsync)
|
|
6
6
|
[](https://pypi.org/project/pgsync)
|
|
7
7
|
[](https://codecov.io/gh/toluaina/pgsync)
|
|
8
|
+
[](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
@@ -43,6 +44,23 @@ Other benefits of PGSync include:
|
|
|
43
44
|
- Scale on-demand (multiple consumers)
|
|
44
45
|
- Easily join multiple nested tables
|
|
45
46
|
|
|
47
|
+
## Sponsors
|
|
48
|
+
|
|
49
|
+
[PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
|
|
50
|
+
|
|
51
|
+
<p>
|
|
52
|
+
<a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
|
|
53
|
+
<img
|
|
54
|
+
src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
|
|
55
|
+
alt="DigitalOcean"
|
|
56
|
+
width="210"
|
|
57
|
+
loading="lazy"
|
|
58
|
+
decoding="async"
|
|
59
|
+
>
|
|
60
|
+
</a>
|
|
61
|
+
</p>
|
|
62
|
+
|
|
63
|
+
|
|
46
64
|
#### Why?
|
|
47
65
|
|
|
48
66
|
At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
|
|
@@ -91,6 +109,14 @@ There are several ways of installing and trying PGSync
|
|
|
91
109
|
- [Running in Docker](#running-in-docker) is the easiest way to get up and running.
|
|
92
110
|
- [Manual configuration](#manual-configuration)
|
|
93
111
|
|
|
112
|
+
#### Book Demo Example (requires a DigitalOcean account)
|
|
113
|
+
|
|
114
|
+
[](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
|
|
115
|
+
|
|
116
|
+
Fill in the following during the setup
|
|
117
|
+
- `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
|
|
118
|
+
- `REDIS_URL` e.g. rediss://default:pass@host:port/0
|
|
119
|
+
|
|
94
120
|
|
|
95
121
|
##### Running in Docker (Using Github Repository)
|
|
96
122
|
|
|
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
default=settings.SCHEMA,
|
|
27
27
|
show_default=True,
|
|
28
28
|
cls=MutuallyExclusiveOption,
|
|
29
|
-
mutually_exclusive=["s3_schema_url"],
|
|
29
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--schema_url",
|
|
33
|
+
help="URL for schema config",
|
|
34
|
+
type=click.STRING,
|
|
35
|
+
default=settings.SCHEMA_URL,
|
|
36
|
+
show_default=True,
|
|
37
|
+
cls=MutuallyExclusiveOption,
|
|
38
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
30
39
|
)
|
|
31
40
|
@click.option(
|
|
32
41
|
"--s3_schema_url",
|
|
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
35
44
|
default=settings.S3_SCHEMA_URL,
|
|
36
45
|
show_default=True,
|
|
37
46
|
cls=MutuallyExclusiveOption,
|
|
38
|
-
mutually_exclusive=["config"],
|
|
47
|
+
mutually_exclusive=["config", "schema_url"],
|
|
39
48
|
)
|
|
40
49
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
41
50
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
|
|
|
67
76
|
def main(
|
|
68
77
|
teardown: bool,
|
|
69
78
|
config: str,
|
|
79
|
+
schema_url: str,
|
|
70
80
|
s3_schema_url: str,
|
|
71
81
|
user: str,
|
|
72
82
|
password: bool,
|
|
@@ -89,13 +99,24 @@ def main(
|
|
|
89
99
|
)
|
|
90
100
|
kwargs = {key: value for key, value in kwargs.items() if value is not None}
|
|
91
101
|
|
|
92
|
-
validate_config(
|
|
102
|
+
validate_config(
|
|
103
|
+
config=config,
|
|
104
|
+
schema_url=schema_url,
|
|
105
|
+
s3_schema_url=s3_schema_url,
|
|
106
|
+
)
|
|
93
107
|
|
|
94
|
-
show_settings(
|
|
108
|
+
show_settings(
|
|
109
|
+
config=config,
|
|
110
|
+
schema_url=schema_url,
|
|
111
|
+
s3_schema_url=s3_schema_url,
|
|
112
|
+
**kwargs,
|
|
113
|
+
)
|
|
95
114
|
|
|
96
115
|
validate: bool = False if teardown else True
|
|
97
116
|
|
|
98
|
-
for doc in config_loader(
|
|
117
|
+
for doc in config_loader(
|
|
118
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
119
|
+
):
|
|
99
120
|
sync: Sync = Sync(
|
|
100
121
|
doc,
|
|
101
122
|
verbose=verbose,
|
|
@@ -53,7 +53,13 @@ from threading import Thread
|
|
|
53
53
|
import click
|
|
54
54
|
import sqlalchemy as sa
|
|
55
55
|
|
|
56
|
-
from pgsync.settings import
|
|
56
|
+
from pgsync.settings import (
|
|
57
|
+
BLOCK_SIZE,
|
|
58
|
+
CHECKPOINT_PATH,
|
|
59
|
+
S3_SCHEMA_URL,
|
|
60
|
+
SCHEMA,
|
|
61
|
+
SCHEMA_URL,
|
|
62
|
+
)
|
|
57
63
|
from pgsync.sync import Sync
|
|
58
64
|
from pgsync.utils import (
|
|
59
65
|
config_loader,
|
|
@@ -386,7 +392,16 @@ def run_task(
|
|
|
386
392
|
default=SCHEMA,
|
|
387
393
|
show_default=True,
|
|
388
394
|
cls=MutuallyExclusiveOption,
|
|
389
|
-
mutually_exclusive=["s3_schema_url"],
|
|
395
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
396
|
+
)
|
|
397
|
+
@click.option(
|
|
398
|
+
"--schema_url",
|
|
399
|
+
help="URL for schema config",
|
|
400
|
+
type=click.STRING,
|
|
401
|
+
default=SCHEMA_URL,
|
|
402
|
+
show_default=True,
|
|
403
|
+
cls=MutuallyExclusiveOption,
|
|
404
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
390
405
|
)
|
|
391
406
|
@click.option(
|
|
392
407
|
"--s3_schema_url",
|
|
@@ -395,7 +410,7 @@ def run_task(
|
|
|
395
410
|
default=S3_SCHEMA_URL,
|
|
396
411
|
show_default=True,
|
|
397
412
|
cls=MutuallyExclusiveOption,
|
|
398
|
-
mutually_exclusive=["config"],
|
|
413
|
+
mutually_exclusive=["config", "schema_url"],
|
|
399
414
|
)
|
|
400
415
|
@click.option(
|
|
401
416
|
"--verbose",
|
|
@@ -428,7 +443,12 @@ def run_task(
|
|
|
428
443
|
default="multiprocess_async",
|
|
429
444
|
)
|
|
430
445
|
def main(
|
|
431
|
-
config: str,
|
|
446
|
+
config: str,
|
|
447
|
+
schema_url: str,
|
|
448
|
+
s3_schema_url: str,
|
|
449
|
+
nprocs: int,
|
|
450
|
+
mode: str,
|
|
451
|
+
verbose: bool,
|
|
432
452
|
) -> None:
|
|
433
453
|
"""
|
|
434
454
|
TODO:
|
|
@@ -436,11 +456,17 @@ def main(
|
|
|
436
456
|
- Handle KeyboardInterrupt Exception
|
|
437
457
|
"""
|
|
438
458
|
|
|
439
|
-
validate_config(
|
|
459
|
+
validate_config(
|
|
460
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
461
|
+
)
|
|
440
462
|
|
|
441
|
-
show_settings(
|
|
463
|
+
show_settings(
|
|
464
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
465
|
+
)
|
|
442
466
|
|
|
443
|
-
for doc in config_loader(
|
|
467
|
+
for doc in config_loader(
|
|
468
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
469
|
+
):
|
|
444
470
|
tasks: t.Generator = fetch_tasks(doc)
|
|
445
471
|
if mode == "synchronous":
|
|
446
472
|
synchronous(tasks, doc, verbose=verbose)
|
|
@@ -31,6 +31,8 @@ from .exc import (
|
|
|
31
31
|
)
|
|
32
32
|
from .settings import (
|
|
33
33
|
IS_MYSQL_COMPAT,
|
|
34
|
+
MYSQL_DATABASE,
|
|
35
|
+
PG_DATABASE,
|
|
34
36
|
PG_HOST_RO,
|
|
35
37
|
PG_PASSWORD_RO,
|
|
36
38
|
PG_PORT_RO,
|
|
@@ -39,6 +41,12 @@ from .settings import (
|
|
|
39
41
|
PG_URL_RO,
|
|
40
42
|
PG_USER_RO,
|
|
41
43
|
QUERY_CHUNK_SIZE,
|
|
44
|
+
SQLALCHEMY_MAX_OVERFLOW,
|
|
45
|
+
SQLALCHEMY_POOL_PRE_PING,
|
|
46
|
+
SQLALCHEMY_POOL_RECYCLE,
|
|
47
|
+
SQLALCHEMY_POOL_SIZE,
|
|
48
|
+
SQLALCHEMY_POOL_TIMEOUT,
|
|
49
|
+
SQLALCHEMY_USE_NULLPOOL,
|
|
42
50
|
STREAM_RESULTS,
|
|
43
51
|
)
|
|
44
52
|
from .trigger import CREATE_TRIGGER_TEMPLATE
|
|
@@ -230,6 +238,7 @@ class Base(object):
|
|
|
230
238
|
self.__columns: dict = {}
|
|
231
239
|
self.verbose: bool = verbose
|
|
232
240
|
self._conn = None
|
|
241
|
+
self._session = None
|
|
233
242
|
|
|
234
243
|
def connect(self) -> None:
|
|
235
244
|
"""Connect to database."""
|
|
@@ -338,8 +347,19 @@ class Base(object):
|
|
|
338
347
|
|
|
339
348
|
@property
|
|
340
349
|
def session(self) -> sessionmaker:
|
|
341
|
-
|
|
342
|
-
|
|
350
|
+
if self._session is None:
|
|
351
|
+
Session = sessionmaker(bind=self.engine, autoflush=True)
|
|
352
|
+
self._session = Session()
|
|
353
|
+
return self._session
|
|
354
|
+
|
|
355
|
+
def close_session(self) -> None:
|
|
356
|
+
"""Close the cached session and reset it."""
|
|
357
|
+
if self._session is not None:
|
|
358
|
+
try:
|
|
359
|
+
self._session.close()
|
|
360
|
+
except Exception:
|
|
361
|
+
pass
|
|
362
|
+
self._session = None
|
|
343
363
|
|
|
344
364
|
@property
|
|
345
365
|
def engine(self) -> sa.engine.Engine:
|
|
@@ -822,6 +842,7 @@ class Base(object):
|
|
|
822
842
|
schema: str,
|
|
823
843
|
tables: t.Set,
|
|
824
844
|
user_defined_fkey_tables: dict,
|
|
845
|
+
node_columns: dict,
|
|
825
846
|
) -> None:
|
|
826
847
|
create_view(
|
|
827
848
|
self.engine,
|
|
@@ -832,6 +853,7 @@ class Base(object):
|
|
|
832
853
|
tables,
|
|
833
854
|
user_defined_fkey_tables,
|
|
834
855
|
self._materialized_views(schema),
|
|
856
|
+
node_columns,
|
|
835
857
|
)
|
|
836
858
|
|
|
837
859
|
def drop_view(self, schema: str) -> None:
|
|
@@ -946,9 +968,9 @@ class Base(object):
|
|
|
946
968
|
"""Check if the trigger function exists."""
|
|
947
969
|
return self.exists(
|
|
948
970
|
sa.text(
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
971
|
+
"SELECT 1 FROM pg_proc WHERE proname = :name "
|
|
972
|
+
"AND pronamespace = (SELECT oid FROM pg_namespace "
|
|
973
|
+
"WHERE nspname = :schema)"
|
|
952
974
|
).bindparams(name=TRIGGER_FUNC, schema=schema),
|
|
953
975
|
)
|
|
954
976
|
|
|
@@ -989,7 +1011,7 @@ class Base(object):
|
|
|
989
1011
|
self.disable_trigger(schema, table)
|
|
990
1012
|
logger.debug(f"Disabled trigger on table: {schema}.{table}")
|
|
991
1013
|
|
|
992
|
-
def enable_trigger(self, schema: str, table
|
|
1014
|
+
def enable_trigger(self, schema: str, table: str) -> None:
|
|
993
1015
|
"""Enable a pgsync defined trigger."""
|
|
994
1016
|
for name in ("notify", "truncate"):
|
|
995
1017
|
self.execute(
|
|
@@ -1107,20 +1129,21 @@ class Base(object):
|
|
|
1107
1129
|
# including trailing space below is deliberate
|
|
1108
1130
|
suffix: str = f"{row[span[1]:]} "
|
|
1109
1131
|
|
|
1110
|
-
if "old-key" and "new-tuple" in suffix:
|
|
1132
|
+
if "old-key" in suffix and "new-tuple" in suffix:
|
|
1111
1133
|
# this can only be an UPDATE operation
|
|
1112
1134
|
if payload.tg_op != UPDATE:
|
|
1113
1135
|
msg = f"Unknown {payload.tg_op} operation for row: {row}"
|
|
1114
1136
|
raise LogicalSlotParseError(msg)
|
|
1115
1137
|
|
|
1116
|
-
i: int = suffix.
|
|
1138
|
+
i: int = suffix.find("old-key:")
|
|
1117
1139
|
if i > -1:
|
|
1118
|
-
j: int = suffix.
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1140
|
+
j: int = suffix.find("new-tuple:")
|
|
1141
|
+
if j > -1:
|
|
1142
|
+
s: str = suffix[i + len("old-key:") : j]
|
|
1143
|
+
for key, value in _parse_logical_slot(s):
|
|
1144
|
+
payload.old[key] = value
|
|
1122
1145
|
|
|
1123
|
-
i = suffix.
|
|
1146
|
+
i = suffix.find("new-tuple:")
|
|
1124
1147
|
if i > -1:
|
|
1125
1148
|
s = suffix[i + len("new-tuple:") :]
|
|
1126
1149
|
for key, value in _parse_logical_slot(s):
|
|
@@ -1336,7 +1359,28 @@ def _pg_engine(
|
|
|
1336
1359
|
password=password,
|
|
1337
1360
|
port=port,
|
|
1338
1361
|
)
|
|
1339
|
-
|
|
1362
|
+
|
|
1363
|
+
# Use NullPool for testing to avoid connection exhaustion
|
|
1364
|
+
if SQLALCHEMY_USE_NULLPOOL:
|
|
1365
|
+
from sqlalchemy.pool import NullPool
|
|
1366
|
+
|
|
1367
|
+
return sa.create_engine(
|
|
1368
|
+
url,
|
|
1369
|
+
echo=echo,
|
|
1370
|
+
connect_args=connect_args,
|
|
1371
|
+
poolclass=NullPool,
|
|
1372
|
+
)
|
|
1373
|
+
|
|
1374
|
+
return sa.create_engine(
|
|
1375
|
+
url,
|
|
1376
|
+
echo=echo,
|
|
1377
|
+
connect_args=connect_args,
|
|
1378
|
+
pool_size=SQLALCHEMY_POOL_SIZE,
|
|
1379
|
+
max_overflow=SQLALCHEMY_MAX_OVERFLOW,
|
|
1380
|
+
pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
|
|
1381
|
+
pool_recycle=SQLALCHEMY_POOL_RECYCLE,
|
|
1382
|
+
pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
|
|
1383
|
+
)
|
|
1340
1384
|
|
|
1341
1385
|
|
|
1342
1386
|
def pg_execute(
|
|
@@ -1367,7 +1411,7 @@ def create_database(database: str, echo: bool = False) -> None:
|
|
|
1367
1411
|
"""Create a database."""
|
|
1368
1412
|
logger.debug(f"Creating database: {database}")
|
|
1369
1413
|
with pg_engine(
|
|
1370
|
-
|
|
1414
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
|
|
1371
1415
|
echo=echo,
|
|
1372
1416
|
) as engine:
|
|
1373
1417
|
pg_execute(
|
|
@@ -1382,8 +1426,7 @@ def drop_database(database: str, echo: bool = False) -> None:
|
|
|
1382
1426
|
"""Drop a database."""
|
|
1383
1427
|
logger.debug(f"Dropping database: {database}")
|
|
1384
1428
|
with pg_engine(
|
|
1385
|
-
|
|
1386
|
-
echo=echo,
|
|
1429
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE, echo=echo
|
|
1387
1430
|
) as engine:
|
|
1388
1431
|
pg_execute(
|
|
1389
1432
|
engine,
|
|
@@ -1396,7 +1439,7 @@ def drop_database(database: str, echo: bool = False) -> None:
|
|
|
1396
1439
|
def database_exists(database: str, echo: bool = False) -> bool:
|
|
1397
1440
|
"""Check if database is present."""
|
|
1398
1441
|
with pg_engine(
|
|
1399
|
-
|
|
1442
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
|
|
1400
1443
|
echo=echo,
|
|
1401
1444
|
) as engine:
|
|
1402
1445
|
with engine.connect() as conn:
|
|
@@ -135,6 +135,7 @@ ELASTICSEARCH_MAPPING_PARAMETERS = [
|
|
|
135
135
|
"boost",
|
|
136
136
|
"coerce",
|
|
137
137
|
"copy_to",
|
|
138
|
+
"dimension",
|
|
138
139
|
"doc_values",
|
|
139
140
|
"dynamic",
|
|
140
141
|
"eager_global_ordinals",
|
|
@@ -198,6 +199,7 @@ MATERIALIZED_VIEW_COLUMNS = [
|
|
|
198
199
|
"indices",
|
|
199
200
|
"primary_keys",
|
|
200
201
|
"table_name",
|
|
202
|
+
"columns",
|
|
201
203
|
]
|
|
202
204
|
|
|
203
205
|
# Primary key delimiter
|
|
@@ -20,6 +20,7 @@ def teardown(
|
|
|
20
20
|
drop_index: bool = True,
|
|
21
21
|
delete_checkpoint: bool = True,
|
|
22
22
|
config: t.Optional[str] = None,
|
|
23
|
+
schema_url: t.Optional[str] = None,
|
|
23
24
|
s3_schema_url: t.Optional[str] = None,
|
|
24
25
|
validate: bool = False,
|
|
25
26
|
) -> None:
|
|
@@ -33,11 +34,17 @@ def teardown(
|
|
|
33
34
|
drop_index (bool, optional): Whether to drop the index. Defaults to True.
|
|
34
35
|
delete_checkpoint (bool, optional): Whether to delete the checkpoint. Defaults to True.
|
|
35
36
|
config (Optional[str], optional): The configuration file path. Defaults to None.
|
|
37
|
+
schema_url (Optional[str], optional): The schema URL. Defaults to None.
|
|
38
|
+
s3_schema_url (Optional[str], optional): The S3 schema URL. Defaults to
|
|
36
39
|
validate (bool, optional): Whether to validate the configuration. Defaults to False.
|
|
37
40
|
"""
|
|
38
|
-
validate_config(
|
|
41
|
+
validate_config(
|
|
42
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
43
|
+
)
|
|
39
44
|
|
|
40
|
-
for doc in config_loader(
|
|
45
|
+
for doc in config_loader(
|
|
46
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
47
|
+
):
|
|
41
48
|
if not database_exists(doc["database"]):
|
|
42
49
|
logger.warning(f'Database {doc["database"]} does not exist')
|
|
43
50
|
continue
|
|
@@ -294,7 +294,7 @@ class Node(object):
|
|
|
294
294
|
self.columns.append(self.model.c[column_name])
|
|
295
295
|
|
|
296
296
|
@property
|
|
297
|
-
def primary_keys(self):
|
|
297
|
+
def primary_keys(self) -> t.List[sa.sql.ColumnElement]:
|
|
298
298
|
return [
|
|
299
299
|
self.model.c[str(sa.text(primary_key))]
|
|
300
300
|
for primary_key in self.model.primary_keys
|
|
@@ -311,7 +311,7 @@ class Node(object):
|
|
|
311
311
|
|
|
312
312
|
def add_child(self, node: Node) -> None:
|
|
313
313
|
"""All nodes except the root node must have a relationship defined."""
|
|
314
|
-
node.parent
|
|
314
|
+
node.parent = self
|
|
315
315
|
if not node.is_root and (
|
|
316
316
|
not node.relationship.type or not node.relationship.variant
|
|
317
317
|
):
|
|
@@ -388,7 +388,9 @@ class Tree(threading.local):
|
|
|
388
388
|
raise TableNotInNodeError(f"Table not specified in node: {nodes}")
|
|
389
389
|
|
|
390
390
|
if not set(nodes.keys()).issubset(set(NODE_ATTRIBUTES)):
|
|
391
|
-
attrs = set(nodes.keys()).difference(
|
|
391
|
+
attrs: t.Set[str] = set(nodes.keys()).difference(
|
|
392
|
+
set(NODE_ATTRIBUTES)
|
|
393
|
+
)
|
|
392
394
|
raise NodeAttributeError(f"Unknown node attribute(s): {attrs}")
|
|
393
395
|
|
|
394
396
|
node: Node = Node(
|
|
@@ -16,7 +16,7 @@ class Plugin(ABC):
|
|
|
16
16
|
"""Plugin base class."""
|
|
17
17
|
|
|
18
18
|
@abstractmethod
|
|
19
|
-
def transform(self, doc: dict, **kwargs) -> dict:
|
|
19
|
+
def transform(self, doc: dict, **kwargs: t.Any) -> dict:
|
|
20
20
|
"""This must be implemented by all derived classes."""
|
|
21
21
|
pass
|
|
22
22
|
|
|
@@ -90,7 +90,7 @@ class Plugins(object):
|
|
|
90
90
|
]:
|
|
91
91
|
self.walk(f"{package}.{pkg}")
|
|
92
92
|
|
|
93
|
-
def transform(self, docs:
|
|
93
|
+
def transform(self, docs: t.Iterable[dict]) -> t.Generator:
|
|
94
94
|
"""Applies all plugins to each doc."""
|
|
95
95
|
for doc in docs:
|
|
96
96
|
for plugin in self.plugins:
|
|
@@ -69,10 +69,15 @@ class RedisQueue(object):
|
|
|
69
69
|
return []
|
|
70
70
|
payloads = [json.loads(i) for i in items]
|
|
71
71
|
visible_map: dict = pg_visible_in_snapshot()(
|
|
72
|
-
[payload
|
|
72
|
+
[payload.get("xmin") for payload in payloads if "xmin" in payload]
|
|
73
73
|
)
|
|
74
74
|
visible: t.List[dict] = []
|
|
75
75
|
for item, payload in zip(items, payloads):
|
|
76
|
+
if "xmin" not in payload:
|
|
77
|
+
logger.warning(
|
|
78
|
+
f"Skipping payload without 'xmin' key: {payload}"
|
|
79
|
+
)
|
|
80
|
+
continue
|
|
76
81
|
if visible_map.get(payload["xmin"]):
|
|
77
82
|
# Claim atomically
|
|
78
83
|
removed = self.__db.lrem(self.key, 1, item)
|
|
@@ -80,7 +85,7 @@ class RedisQueue(object):
|
|
|
80
85
|
visible.append(payload)
|
|
81
86
|
return visible
|
|
82
87
|
|
|
83
|
-
def push(self, items: t.
|
|
88
|
+
def push(self, items: t.Iterable[dict]) -> None:
|
|
84
89
|
"""Push multiple items onto the queue."""
|
|
85
90
|
self.__db.rpush(self.key, *map(json.dumps, items))
|
|
86
91
|
|
|
@@ -97,5 +102,5 @@ class RedisQueue(object):
|
|
|
97
102
|
|
|
98
103
|
def get_meta(self, default: t.Any = None) -> t.Any:
|
|
99
104
|
"""Retrieve the stored value (or *default* if nothing is set)."""
|
|
100
|
-
raw = self.__db.get(self._meta_key)
|
|
105
|
+
raw: t.Optional[str] = self.__db.get(self._meta_key)
|
|
101
106
|
return json.loads(raw) if raw is not None else default
|
|
@@ -45,12 +45,27 @@ REPLICATION_SLOT_CLEANUP_INTERVAL = env.float(
|
|
|
45
45
|
# path to the application schema config
|
|
46
46
|
SCHEMA = env.str("SCHEMA", default=None)
|
|
47
47
|
S3_SCHEMA_URL = env.str("S3_SCHEMA_URL", default=None)
|
|
48
|
+
SCHEMA_URL = env.str("SCHEMA_URL", default=None)
|
|
48
49
|
USE_ASYNC = env.bool("USE_ASYNC", default=False)
|
|
49
50
|
STREAM_RESULTS = env.bool("STREAM_RESULTS", default=True)
|
|
50
51
|
# db polling interval
|
|
51
52
|
POLL_INTERVAL = env.float("POLL_INTERVAL", default=0.1)
|
|
52
53
|
FORMAT_WITH_COMMAS = env.bool("FORMAT_WITH_COMMAS", default=True)
|
|
53
54
|
|
|
55
|
+
# SQLAlchemy Settings:
|
|
56
|
+
# Use NullPool (no connection pooling) - useful for testing or when you want to close connections immediately
|
|
57
|
+
SQLALCHEMY_USE_NULLPOOL = env.bool("SQLALCHEMY_USE_NULLPOOL", default=False)
|
|
58
|
+
# This is the number of connections that will be persistently maintained in the pool.
|
|
59
|
+
SQLALCHEMY_POOL_SIZE = env.int("SQLALCHEMY_POOL_SIZE", default=5)
|
|
60
|
+
# This is the number of connections that can be opened beyond the pool_size when all connections in the pool are in use.
|
|
61
|
+
SQLALCHEMY_MAX_OVERFLOW = env.int("SQLALCHEMY_MAX_OVERFLOW", default=10)
|
|
62
|
+
# When set to True, a "ping" will be performed on connections before they are checked out of the pool to ensure they are still live.
|
|
63
|
+
SQLALCHEMY_POOL_PRE_PING = env.bool("SQLALCHEMY_POOL_PRE_PING", default=False)
|
|
64
|
+
# This means connections are not recycled based on a timeout. If set to a positive integer, connections will be recycled after that many seconds. For example, 3600 recycles connections after one hour.
|
|
65
|
+
SQLALCHEMY_POOL_RECYCLE = env.int("SQLALCHEMY_POOL_RECYCLE", default=-1)
|
|
66
|
+
# This is the number of seconds to wait for a connection to become available from the pool before raising a TimeoutError.
|
|
67
|
+
SQLALCHEMY_POOL_TIMEOUT = env.int("SQLALCHEMY_POOL_TIMEOUT", default=30)
|
|
68
|
+
|
|
54
69
|
# Elasticsearch/OpenSearch:
|
|
55
70
|
ELASTICSEARCH_API_KEY = env.str("ELASTICSEARCH_API_KEY", default=None)
|
|
56
71
|
ELASTICSEARCH_API_KEY_ID = env.str("ELASTICSEARCH_API_KEY_ID", default=None)
|
|
@@ -187,6 +202,9 @@ if PG_URL:
|
|
|
187
202
|
else:
|
|
188
203
|
# If PG_URL is not set, we need to use the other PG_* variables
|
|
189
204
|
PG_USER = env.str("PG_USER")
|
|
205
|
+
# The default database name e.g postges or defaultdb
|
|
206
|
+
PG_DATABASE = env.str("PG_DATABASE", default="postgres")
|
|
207
|
+
MYSQL_DATABASE = env.str("MYSQL_DATABASE", default="information_schema")
|
|
190
208
|
|
|
191
209
|
# Read-only Postgres:
|
|
192
210
|
# This is used for read-only consumers that do not require replication slots or triggers.
|
|
@@ -14,7 +14,7 @@ class Singleton(type):
|
|
|
14
14
|
|
|
15
15
|
_instances: dict = {}
|
|
16
16
|
|
|
17
|
-
def __call__(cls, *args, **kwargs):
|
|
17
|
+
def __call__(cls, *args: t.Any, **kwargs: t.Any) -> t.Any:
|
|
18
18
|
"""
|
|
19
19
|
If an instance of the class has already been created with the same arguments,
|
|
20
20
|
return that instance. Otherwise, create a new instance and return it.
|