pgsync 4.1.0__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-4.1.0 → pgsync-4.2.0}/PKG-INFO +11 -11
- {pgsync-4.1.0 → pgsync-4.2.0}/README.md +42 -4
- {pgsync-4.1.0 → pgsync-4.2.0}/README.rst +1 -1
- {pgsync-4.1.0 → pgsync-4.2.0}/bin/bootstrap +24 -4
- {pgsync-4.1.0 → pgsync-4.2.0}/bin/parallel_sync +28 -7
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/__init__.py +1 -1
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/base.py +70 -8
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/helper.py +4 -3
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/redisqueue.py +28 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/settings.py +19 -1
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/sync.py +85 -11
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/utils.py +76 -35
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/PKG-INFO +11 -11
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/requires.txt +9 -9
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_helper.py +19 -10
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_redisqueue.py +63 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_urls.py +16 -10
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_utils.py +36 -11
- {pgsync-4.1.0 → pgsync-4.2.0}/AUTHORS.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/CONTRIBUTING.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/HISTORY.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/LICENSE +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/MANIFEST.in +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/bin/pgsync +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/Makefile +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/authors.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/changelog.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/conf.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/contributing.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/history.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/index.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/installation.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/logo.png +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/make.bat +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/readme.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/docs/usage.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/constants.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/exc.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/node.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/plugin.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/querybuilder.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/search_client.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/singleton.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/transform.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/trigger.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/urls.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/view.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/pyproject.toml +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/setup.cfg +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/setup.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/__init__.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/conftest.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/fixtures/schema.json +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_base.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_constants.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_env_vars.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_log_handlers.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_node.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_query_builder.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_search_client.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_settings.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_nested_children.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_root.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_transform.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_trigger.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_unique_behaviour.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_view.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.0}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Postgres to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -32,15 +32,15 @@ License-File: LICENSE
|
|
|
32
32
|
License-File: AUTHORS.rst
|
|
33
33
|
Requires-Dist: async-timeout==5.0.1
|
|
34
34
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.
|
|
36
|
-
Requires-Dist: botocore==1.
|
|
37
|
-
Requires-Dist: certifi==2025.
|
|
35
|
+
Requires-Dist: boto3==1.40.1
|
|
36
|
+
Requires-Dist: botocore==1.40.1
|
|
37
|
+
Requires-Dist: certifi==2025.8.3
|
|
38
38
|
Requires-Dist: charset-normalizer==3.4.2
|
|
39
39
|
Requires-Dist: click==8.1.8
|
|
40
40
|
Requires-Dist: elastic-transport==8.17.1
|
|
41
|
-
Requires-Dist: elasticsearch==8.
|
|
42
|
-
Requires-Dist: elasticsearch-dsl==8.
|
|
43
|
-
Requires-Dist: environs==14.
|
|
41
|
+
Requires-Dist: elasticsearch==8.19.0
|
|
42
|
+
Requires-Dist: elasticsearch-dsl==8.15.4
|
|
43
|
+
Requires-Dist: environs==14.3.0
|
|
44
44
|
Requires-Dist: events==0.5
|
|
45
45
|
Requires-Dist: greenlet==3.2.3
|
|
46
46
|
Requires-Dist: idna==3.10
|
|
@@ -54,11 +54,11 @@ Requires-Dist: python-dotenv==1.1.1
|
|
|
54
54
|
Requires-Dist: redis==6.2.0
|
|
55
55
|
Requires-Dist: requests==2.32.4
|
|
56
56
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
57
|
-
Requires-Dist: s3transfer==0.13.
|
|
57
|
+
Requires-Dist: s3transfer==0.13.1
|
|
58
58
|
Requires-Dist: six==1.17.0
|
|
59
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
59
|
+
Requires-Dist: sqlalchemy==2.0.42
|
|
60
60
|
Requires-Dist: sqlparse==0.5.3
|
|
61
|
-
Requires-Dist: typing-extensions==4.14.
|
|
61
|
+
Requires-Dist: typing-extensions==4.14.1
|
|
62
62
|
Requires-Dist: urllib3==1.26.20
|
|
63
63
|
Dynamic: author
|
|
64
64
|
Dynamic: author-email
|
|
@@ -90,7 +90,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
90
90
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
91
91
|
- [Redis](https://redis.io) 3.1.0+
|
|
92
92
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
93
|
-
- [
|
|
93
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
94
94
|
|
|
95
95
|
### Postgres setup
|
|
96
96
|
|
|
@@ -40,7 +40,7 @@ of engineering and development.
|
|
|
40
40
|
Other benefits of PGSync include:
|
|
41
41
|
- Real-time analytics
|
|
42
42
|
- Reliable primary datastore/source of truth
|
|
43
|
-
- Scale on-demand
|
|
43
|
+
- Scale on-demand (multiple consumers)
|
|
44
44
|
- Easily join multiple nested tables
|
|
45
45
|
|
|
46
46
|
#### Why?
|
|
@@ -66,7 +66,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
|
|
|
66
66
|
|
|
67
67
|
#### How it works
|
|
68
68
|
|
|
69
|
-
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [
|
|
69
|
+
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
70
70
|
|
|
71
71
|
PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
|
|
72
72
|
This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
|
|
@@ -93,9 +93,14 @@ There are several ways of installing and trying PGSync
|
|
|
93
93
|
- [Manual configuration](#manual-configuration)
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
##### Running in Docker
|
|
96
|
+
##### Running in Docker (Using Github Repository)
|
|
97
97
|
|
|
98
98
|
To startup all services with docker.
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
$ git checkout https://github.com/toluaina/pgsync
|
|
102
|
+
```
|
|
103
|
+
|
|
99
104
|
Run:
|
|
100
105
|
```
|
|
101
106
|
$ docker-compose up
|
|
@@ -106,6 +111,39 @@ Show the content in Elasticsearch/OpenSearch
|
|
|
106
111
|
$ curl -X GET http://[Elasticsearch/OpenSearch host]:9201/reservations/_search?pretty=true
|
|
107
112
|
```
|
|
108
113
|
|
|
114
|
+
|
|
115
|
+
##### Running with Docker (Using Image Repository)
|
|
116
|
+
|
|
117
|
+
To start all services with Docker, follow these steps:
|
|
118
|
+
|
|
119
|
+
1. Pull the Docker image:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
$ docker pull toluaina1/pgsync:latest
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
2. Run the container:
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
$ docker run --rm -it \
|
|
129
|
+
-e REDIS_CHECKPOINT=true \
|
|
130
|
+
-e REDIS_HOST=<redis_host_address> \
|
|
131
|
+
-e PG_URL=postgres://<username>:<password>@<postgres_host>/<database> \
|
|
132
|
+
-e ELASTICSEARCH_URL=http://<elasticsearch_host>:9200 \
|
|
133
|
+
-v "$(pwd)/schema.json:/app/schema.json" \
|
|
134
|
+
toluaina1/pgsync:latest -c schema.json -d -b
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
|
|
138
|
+
|
|
139
|
+
- redis_host_address — Address of the Redis server (e.g., host.docker.internal for local Docker setup)
|
|
140
|
+
- username — PostgreSQL username
|
|
141
|
+
- password — PostgreSQL password
|
|
142
|
+
- postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
|
|
143
|
+
- database — Name of PostgreSQL database
|
|
144
|
+
- elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
|
|
145
|
+
|
|
146
|
+
|
|
109
147
|
##### Manual configuration
|
|
110
148
|
|
|
111
149
|
- Setup
|
|
@@ -156,7 +194,7 @@ Key features of PGSync are:
|
|
|
156
194
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
157
195
|
- [Redis](https://redis.io) 3.1.0+
|
|
158
196
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
159
|
-
- [
|
|
197
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
160
198
|
|
|
161
199
|
|
|
162
200
|
#### Example
|
|
@@ -12,7 +12,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
12
12
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
13
13
|
- [Redis](https://redis.io) 3.1.0+
|
|
14
14
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
15
|
-
- [
|
|
15
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
16
16
|
|
|
17
17
|
### Postgres setup
|
|
18
18
|
|
|
@@ -5,8 +5,14 @@ import logging
|
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
7
|
|
|
8
|
+
from pgsync import settings
|
|
8
9
|
from pgsync.sync import Sync
|
|
9
|
-
from pgsync.utils import
|
|
10
|
+
from pgsync.utils import (
|
|
11
|
+
config_loader,
|
|
12
|
+
MutuallyExclusiveOption,
|
|
13
|
+
show_settings,
|
|
14
|
+
validate_config,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
@@ -17,6 +23,19 @@ logger = logging.getLogger(__name__)
|
|
|
17
23
|
"-c",
|
|
18
24
|
help="Schema config",
|
|
19
25
|
type=click.Path(exists=True),
|
|
26
|
+
default=settings.SCHEMA,
|
|
27
|
+
show_default=True,
|
|
28
|
+
cls=MutuallyExclusiveOption,
|
|
29
|
+
mutually_exclusive=["s3_schema_url"],
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--s3_schema_url",
|
|
33
|
+
help="S3 URL for schema config",
|
|
34
|
+
type=click.STRING,
|
|
35
|
+
default=settings.S3_SCHEMA_URL,
|
|
36
|
+
show_default=True,
|
|
37
|
+
cls=MutuallyExclusiveOption,
|
|
38
|
+
mutually_exclusive=["config"],
|
|
20
39
|
)
|
|
21
40
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
22
41
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -48,6 +67,7 @@ logger = logging.getLogger(__name__)
|
|
|
48
67
|
def main(
|
|
49
68
|
teardown: bool,
|
|
50
69
|
config: str,
|
|
70
|
+
s3_schema_url: str,
|
|
51
71
|
user: str,
|
|
52
72
|
password: bool,
|
|
53
73
|
host: str,
|
|
@@ -69,13 +89,13 @@ def main(
|
|
|
69
89
|
)
|
|
70
90
|
kwargs = {key: value for key, value in kwargs.items() if value is not None}
|
|
71
91
|
|
|
72
|
-
config
|
|
92
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
73
93
|
|
|
74
|
-
show_settings(config)
|
|
94
|
+
show_settings(config=config, s3_schema_url=s3_schema_url)
|
|
75
95
|
|
|
76
96
|
validate: bool = False if teardown else True
|
|
77
97
|
|
|
78
|
-
for doc in config_loader(config):
|
|
98
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
79
99
|
sync: Sync = Sync(
|
|
80
100
|
doc,
|
|
81
101
|
verbose=verbose,
|
|
@@ -47,16 +47,21 @@ import sys
|
|
|
47
47
|
import typing as t
|
|
48
48
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
|
49
49
|
from dataclasses import dataclass
|
|
50
|
-
from pathlib import Path
|
|
51
50
|
from queue import Queue
|
|
52
51
|
from threading import Thread
|
|
53
52
|
|
|
54
53
|
import click
|
|
55
54
|
import sqlalchemy as sa
|
|
56
55
|
|
|
57
|
-
from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
|
|
56
|
+
from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
|
|
58
57
|
from pgsync.sync import Sync
|
|
59
|
-
from pgsync.utils import
|
|
58
|
+
from pgsync.utils import (
|
|
59
|
+
config_loader,
|
|
60
|
+
MutuallyExclusiveOption,
|
|
61
|
+
show_settings,
|
|
62
|
+
timeit,
|
|
63
|
+
validate_config,
|
|
64
|
+
)
|
|
60
65
|
|
|
61
66
|
|
|
62
67
|
def save_ctid(page: int, row: int, filename: str) -> None:
|
|
@@ -378,6 +383,19 @@ def run_task(
|
|
|
378
383
|
"-c",
|
|
379
384
|
help="Schema config",
|
|
380
385
|
type=click.Path(exists=True),
|
|
386
|
+
default=SCHEMA,
|
|
387
|
+
show_default=True,
|
|
388
|
+
cls=MutuallyExclusiveOption,
|
|
389
|
+
mutually_exclusive=["s3_schema_url"],
|
|
390
|
+
)
|
|
391
|
+
@click.option(
|
|
392
|
+
"--s3_schema_url",
|
|
393
|
+
help="S3 URL for schema config",
|
|
394
|
+
type=click.STRING,
|
|
395
|
+
default=S3_SCHEMA_URL,
|
|
396
|
+
show_default=True,
|
|
397
|
+
cls=MutuallyExclusiveOption,
|
|
398
|
+
mutually_exclusive=["config"],
|
|
381
399
|
)
|
|
382
400
|
@click.option(
|
|
383
401
|
"--verbose",
|
|
@@ -409,17 +427,20 @@ def run_task(
|
|
|
409
427
|
),
|
|
410
428
|
default="multiprocess_async",
|
|
411
429
|
)
|
|
412
|
-
def main(
|
|
430
|
+
def main(
|
|
431
|
+
config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
|
|
432
|
+
) -> None:
|
|
413
433
|
"""
|
|
414
434
|
TODO:
|
|
415
435
|
- Track progress across cpus/threads
|
|
416
436
|
- Handle KeyboardInterrupt Exception
|
|
417
437
|
"""
|
|
418
|
-
config: str = get_config(config)
|
|
419
438
|
|
|
420
|
-
|
|
439
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
440
|
+
|
|
441
|
+
show_settings(config=config, s3_schema_url=s3_schema_url)
|
|
421
442
|
|
|
422
|
-
for doc in config_loader(config):
|
|
443
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
423
444
|
tasks: t.Generator = fetch_tasks(doc)
|
|
424
445
|
if mode == "synchronous":
|
|
425
446
|
synchronous(tasks, doc, verbose=verbose)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import threading
|
|
5
6
|
import time
|
|
6
7
|
import typing as t
|
|
7
8
|
from contextlib import contextmanager
|
|
@@ -28,8 +29,13 @@ from .exc import (
|
|
|
28
29
|
TableNotFoundError,
|
|
29
30
|
)
|
|
30
31
|
from .settings import (
|
|
32
|
+
PG_HOST_RO,
|
|
33
|
+
PG_PASSWORD_RO,
|
|
34
|
+
PG_PORT_RO,
|
|
31
35
|
PG_SSLMODE,
|
|
32
36
|
PG_SSLROOTCERT,
|
|
37
|
+
PG_URL_RO,
|
|
38
|
+
PG_USER_RO,
|
|
33
39
|
QUERY_CHUNK_SIZE,
|
|
34
40
|
STREAM_RESULTS,
|
|
35
41
|
)
|
|
@@ -48,7 +54,6 @@ try:
|
|
|
48
54
|
except ImportError:
|
|
49
55
|
pass
|
|
50
56
|
|
|
51
|
-
|
|
52
57
|
logger = logging.getLogger(__name__)
|
|
53
58
|
|
|
54
59
|
SSL_MODES = (
|
|
@@ -153,6 +158,8 @@ class TupleIdentifierType(sa.types.UserDefinedType):
|
|
|
153
158
|
|
|
154
159
|
|
|
155
160
|
class Base(object):
|
|
161
|
+
_thread_local = threading.local()
|
|
162
|
+
|
|
156
163
|
INT_TYPES = (
|
|
157
164
|
"bigint",
|
|
158
165
|
"bigserial",
|
|
@@ -190,6 +197,26 @@ class Base(object):
|
|
|
190
197
|
self.__engine: sa.engine.Engine = _pg_engine(
|
|
191
198
|
database, echo=False, **kwargs
|
|
192
199
|
)
|
|
200
|
+
self.__engine_ro: t.Optional[sa.engine.Engine] = None
|
|
201
|
+
if (
|
|
202
|
+
PG_USER_RO
|
|
203
|
+
or PG_HOST_RO
|
|
204
|
+
or PG_PASSWORD_RO
|
|
205
|
+
or PG_PORT_RO
|
|
206
|
+
or PG_URL_RO
|
|
207
|
+
):
|
|
208
|
+
kwargs.update(
|
|
209
|
+
{
|
|
210
|
+
"user": PG_USER_RO,
|
|
211
|
+
"host": PG_HOST_RO,
|
|
212
|
+
"password": PG_PASSWORD_RO,
|
|
213
|
+
"port": PG_PORT_RO,
|
|
214
|
+
"url": PG_URL_RO,
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
self.__engine_ro: sa.engine.Engine = _pg_engine(
|
|
218
|
+
database, echo=False, **kwargs
|
|
219
|
+
)
|
|
193
220
|
self.__schemas: t.Optional[dict] = None
|
|
194
221
|
# models is a dict of f'{schema}.{table}'
|
|
195
222
|
self.__models: dict = {}
|
|
@@ -307,6 +334,8 @@ class Base(object):
|
|
|
307
334
|
@property
|
|
308
335
|
def engine(self) -> sa.engine.Engine:
|
|
309
336
|
"""Get the database engine."""
|
|
337
|
+
if getattr(self._thread_local, "read_only", False):
|
|
338
|
+
return self.__engine_ro
|
|
310
339
|
return self.__engine
|
|
311
340
|
|
|
312
341
|
@property
|
|
@@ -910,6 +939,37 @@ class Base(object):
|
|
|
910
939
|
label="txid_current",
|
|
911
940
|
)[0]
|
|
912
941
|
|
|
942
|
+
def pg_visible_in_snapshot(
|
|
943
|
+
self, literal_binds: bool = False
|
|
944
|
+
) -> t.Callable[[t.List[int]], dict]:
|
|
945
|
+
def _pg_visible_in_snapshot(xid8s: t.List[int]) -> dict:
|
|
946
|
+
if not xid8s:
|
|
947
|
+
return {}
|
|
948
|
+
# TODO: use the SQLAlchemy ORM to handle this query
|
|
949
|
+
statement = sa.text(
|
|
950
|
+
"""
|
|
951
|
+
SELECT xid AS xid8,
|
|
952
|
+
PG_VISIBLE_IN_SNAPSHOT(xid::xid8, PG_CURRENT_SNAPSHOT()) AS visible
|
|
953
|
+
FROM UNNEST(CAST(:xid8s AS text[]))
|
|
954
|
+
WITH ORDINALITY AS t(xid, ord)
|
|
955
|
+
ORDER BY t.ord
|
|
956
|
+
"""
|
|
957
|
+
)
|
|
958
|
+
if self.verbose:
|
|
959
|
+
compiled_query(
|
|
960
|
+
statement,
|
|
961
|
+
label="xmin_visibility",
|
|
962
|
+
literal_binds=literal_binds,
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
# xid8s = list of xid8 strings
|
|
966
|
+
params: dict = {"xid8s": list(map(str, xid8s))}
|
|
967
|
+
with self.__engine_ro.connect() as conn:
|
|
968
|
+
result = conn.execute(statement, params)
|
|
969
|
+
return {int(row.xid8): row.visible for row in result}
|
|
970
|
+
|
|
971
|
+
return _pg_visible_in_snapshot
|
|
972
|
+
|
|
913
973
|
def parse_value(self, type_: str, value: str) -> t.Optional[str]:
|
|
914
974
|
"""
|
|
915
975
|
Parse datatypes from db.
|
|
@@ -1168,6 +1228,7 @@ def _pg_engine(
|
|
|
1168
1228
|
echo: bool = False,
|
|
1169
1229
|
sslmode: t.Optional[str] = None,
|
|
1170
1230
|
sslrootcert: t.Optional[str] = None,
|
|
1231
|
+
url: t.Optional[str] = None,
|
|
1171
1232
|
) -> sa.engine.Engine:
|
|
1172
1233
|
connect_args: dict = {}
|
|
1173
1234
|
sslmode = sslmode or PG_SSLMODE
|
|
@@ -1187,13 +1248,14 @@ def _pg_engine(
|
|
|
1187
1248
|
)
|
|
1188
1249
|
connect_args["sslrootcert"] = sslrootcert
|
|
1189
1250
|
|
|
1190
|
-
url
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1251
|
+
if url is None:
|
|
1252
|
+
url: str = get_postgres_url(
|
|
1253
|
+
database,
|
|
1254
|
+
user=user,
|
|
1255
|
+
host=host,
|
|
1256
|
+
password=password,
|
|
1257
|
+
port=port,
|
|
1258
|
+
)
|
|
1197
1259
|
return sa.create_engine(url, echo=echo, connect_args=connect_args)
|
|
1198
1260
|
|
|
1199
1261
|
|
|
@@ -8,7 +8,7 @@ import sqlalchemy as sa
|
|
|
8
8
|
|
|
9
9
|
from .base import database_exists, drop_database
|
|
10
10
|
from .sync import Sync
|
|
11
|
-
from .utils import config_loader,
|
|
11
|
+
from .utils import config_loader, validate_config
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -20,6 +20,7 @@ def teardown(
|
|
|
20
20
|
drop_index: bool = True,
|
|
21
21
|
delete_checkpoint: bool = True,
|
|
22
22
|
config: t.Optional[str] = None,
|
|
23
|
+
s3_schema_url: t.Optional[str] = None,
|
|
23
24
|
validate: bool = False,
|
|
24
25
|
) -> None:
|
|
25
26
|
"""
|
|
@@ -34,9 +35,9 @@ def teardown(
|
|
|
34
35
|
config (Optional[str], optional): The configuration file path. Defaults to None.
|
|
35
36
|
validate (bool, optional): Whether to validate the configuration. Defaults to False.
|
|
36
37
|
"""
|
|
37
|
-
config
|
|
38
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
38
39
|
|
|
39
|
-
for doc in config_loader(config):
|
|
40
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
40
41
|
if not database_exists(doc["database"]):
|
|
41
42
|
logger.warning(f'Database {doc["database"]} does not exist')
|
|
42
43
|
continue
|
|
@@ -52,6 +52,34 @@ class RedisQueue(object):
|
|
|
52
52
|
logger.debug(f"pop size: {len(items[0])}")
|
|
53
53
|
return list(map(lambda value: json.loads(value), items[0]))
|
|
54
54
|
|
|
55
|
+
def pop_visible_in_snapshot(
|
|
56
|
+
self,
|
|
57
|
+
pg_visible_in_snapshot: t.Callable[[t.List[int]], dict],
|
|
58
|
+
chunk_size: t.Optional[int] = None,
|
|
59
|
+
) -> t.List[dict]:
|
|
60
|
+
"""
|
|
61
|
+
Pop items in the queue that are visible in the current snapshot.
|
|
62
|
+
Uses the provided pg_visible_in_snapshot function to determine visibility.
|
|
63
|
+
This function is useful for read-only consumers that need to process items
|
|
64
|
+
that are visible in the current PostgreSQL snapshot.
|
|
65
|
+
"""
|
|
66
|
+
chunk_size = chunk_size or REDIS_READ_CHUNK_SIZE
|
|
67
|
+
items: t.List = self.__db.lrange(self.key, 0, chunk_size - 1)
|
|
68
|
+
if not items:
|
|
69
|
+
return []
|
|
70
|
+
payloads = [json.loads(i) for i in items]
|
|
71
|
+
visible_map: dict = pg_visible_in_snapshot()(
|
|
72
|
+
[payload["xmin"] for payload in payloads]
|
|
73
|
+
)
|
|
74
|
+
visible: t.List[dict] = []
|
|
75
|
+
for item, payload in zip(items, payloads):
|
|
76
|
+
if visible_map.get(payload["xmin"]):
|
|
77
|
+
# Claim atomically
|
|
78
|
+
removed = self.__db.lrem(self.key, 1, item)
|
|
79
|
+
if removed:
|
|
80
|
+
visible.append(payload)
|
|
81
|
+
return visible
|
|
82
|
+
|
|
55
83
|
def push(self, items: t.List) -> None:
|
|
56
84
|
"""Push multiple items onto the queue."""
|
|
57
85
|
self.__db.rpush(self.key, *map(json.dumps, items))
|
|
@@ -43,6 +43,7 @@ REPLICATION_SLOT_CLEANUP_INTERVAL = env.float(
|
|
|
43
43
|
)
|
|
44
44
|
# path to the application schema config
|
|
45
45
|
SCHEMA = env.str("SCHEMA", default=None)
|
|
46
|
+
S3_SCHEMA_URL = env.str("S3_SCHEMA_URL", default=None)
|
|
46
47
|
USE_ASYNC = env.bool("USE_ASYNC", default=False)
|
|
47
48
|
STREAM_RESULTS = env.bool("STREAM_RESULTS", default=True)
|
|
48
49
|
# db polling interval
|
|
@@ -173,9 +174,26 @@ if PG_URL:
|
|
|
173
174
|
PG_USER = env.str("PG_USER", default=None)
|
|
174
175
|
else:
|
|
175
176
|
# If PG_URL is not set, we need to use the other PG_* variables
|
|
176
|
-
PG_URL = None
|
|
177
177
|
PG_USER = env.str("PG_USER")
|
|
178
178
|
|
|
179
|
+
# Read-only Postgres:
|
|
180
|
+
# This is used for read-only consumers that do not require replication slots or triggers.
|
|
181
|
+
# full database url including user, password, host, port and dbname
|
|
182
|
+
PG_URL_RO = env.str("PG_URL_RO", default=None)
|
|
183
|
+
PG_HOST_RO = env.str("PG_HOST_RO", default=None)
|
|
184
|
+
PG_PASSWORD_RO = env.str("PG_PASSWORD_RO", default=None)
|
|
185
|
+
PG_PORT_RO = env.int("PG_PORT_RO", default=None)
|
|
186
|
+
PG_SSLMODE_RO = env.str("PG_SSLMODE_RO", default=None)
|
|
187
|
+
PG_SSLROOTCERT_RO = env.str("PG_SSLROOTCERT_RO", default=None)
|
|
188
|
+
PG_USER_RO = env.str("PG_USER_RO", default=None)
|
|
189
|
+
if PG_URL_RO:
|
|
190
|
+
# If PG_URL_RO is set, we don't need to use the other PG_*_RO variables
|
|
191
|
+
PG_HOST_RO = None
|
|
192
|
+
PG_PASSWORD_RO = None
|
|
193
|
+
PG_PORT_RO = None
|
|
194
|
+
PG_SSLMODE_RO = None
|
|
195
|
+
PG_SSLROOTCERT_RO = None
|
|
196
|
+
|
|
179
197
|
# Redis:
|
|
180
198
|
REDIS_AUTH = env.str("REDIS_AUTH", default=None)
|
|
181
199
|
REDIS_USER = env.str("REDIS_USER", default=None)
|