pgsync 4.1.0__tar.gz → 4.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-4.1.0 → pgsync-4.2.1}/PKG-INFO +17 -17
- {pgsync-4.1.0 → pgsync-4.2.1}/README.md +43 -5
- {pgsync-4.1.0 → pgsync-4.2.1}/README.rst +2 -2
- {pgsync-4.1.0 → pgsync-4.2.1}/bin/bootstrap +24 -4
- {pgsync-4.1.0 → pgsync-4.2.1}/bin/parallel_sync +28 -7
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/__init__.py +1 -1
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/base.py +70 -8
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/helper.py +5 -4
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/node.py +4 -2
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/querybuilder.py +128 -66
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/redisqueue.py +31 -2
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/settings.py +24 -6
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/sync.py +152 -36
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/trigger.py +1 -1
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/urls.py +8 -8
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/utils.py +76 -35
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/PKG-INFO +17 -17
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/requires.txt +14 -14
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_helper.py +19 -10
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_redisqueue.py +68 -4
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_sync_nested_children.py +102 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_urls.py +16 -10
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_utils.py +36 -11
- {pgsync-4.1.0 → pgsync-4.2.1}/AUTHORS.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/CONTRIBUTING.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/HISTORY.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/LICENSE +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/MANIFEST.in +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/bin/pgsync +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/Makefile +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/authors.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/changelog.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/conf.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/contributing.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/history.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/index.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/installation.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/logo.png +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/make.bat +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/readme.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/docs/usage.rst +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/constants.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/exc.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/plugin.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/search_client.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/singleton.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/transform.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync/view.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/pyproject.toml +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/setup.cfg +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/setup.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/__init__.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/conftest.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/fixtures/schema.json +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_base.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_constants.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_env_vars.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_log_handlers.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_node.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_query_builder.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_search_client.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_settings.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_sync.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_sync_root.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_sync_single_child_fk_on_child.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_sync_single_child_fk_on_parent.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_transform.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_trigger.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_unique_behaviour.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/test_view.py +0 -0
- {pgsync-4.1.0 → pgsync-4.2.1}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 4.1
|
|
3
|
+
Version: 4.2.1
|
|
4
4
|
Summary: Postgres to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -32,33 +32,33 @@ License-File: LICENSE
|
|
|
32
32
|
License-File: AUTHORS.rst
|
|
33
33
|
Requires-Dist: async-timeout==5.0.1
|
|
34
34
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.
|
|
36
|
-
Requires-Dist: botocore==1.
|
|
37
|
-
Requires-Dist: certifi==2025.
|
|
38
|
-
Requires-Dist: charset-normalizer==3.4.
|
|
35
|
+
Requires-Dist: boto3==1.40.35
|
|
36
|
+
Requires-Dist: botocore==1.40.35
|
|
37
|
+
Requires-Dist: certifi==2025.8.3
|
|
38
|
+
Requires-Dist: charset-normalizer==3.4.3
|
|
39
39
|
Requires-Dist: click==8.1.8
|
|
40
40
|
Requires-Dist: elastic-transport==8.17.1
|
|
41
|
-
Requires-Dist: elasticsearch==8.
|
|
42
|
-
Requires-Dist: elasticsearch-dsl==8.
|
|
43
|
-
Requires-Dist: environs==14.
|
|
41
|
+
Requires-Dist: elasticsearch==8.19.1
|
|
42
|
+
Requires-Dist: elasticsearch-dsl==8.15.4
|
|
43
|
+
Requires-Dist: environs==14.3.0
|
|
44
44
|
Requires-Dist: events==0.5
|
|
45
|
-
Requires-Dist: greenlet==3.2.
|
|
45
|
+
Requires-Dist: greenlet==3.2.4
|
|
46
46
|
Requires-Dist: idna==3.10
|
|
47
47
|
Requires-Dist: jmespath==1.0.1
|
|
48
|
-
Requires-Dist: marshmallow==4.0.
|
|
48
|
+
Requires-Dist: marshmallow==4.0.1
|
|
49
49
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
50
50
|
Requires-Dist: opensearch-py==3.0.0
|
|
51
51
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
52
52
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
53
53
|
Requires-Dist: python-dotenv==1.1.1
|
|
54
|
-
Requires-Dist: redis==6.
|
|
55
|
-
Requires-Dist: requests==2.32.
|
|
54
|
+
Requires-Dist: redis==6.4.0
|
|
55
|
+
Requires-Dist: requests==2.32.5
|
|
56
56
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
57
|
-
Requires-Dist: s3transfer==0.
|
|
57
|
+
Requires-Dist: s3transfer==0.14.0
|
|
58
58
|
Requires-Dist: six==1.17.0
|
|
59
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
59
|
+
Requires-Dist: sqlalchemy==2.0.43
|
|
60
60
|
Requires-Dist: sqlparse==0.5.3
|
|
61
|
-
Requires-Dist: typing-extensions==4.
|
|
61
|
+
Requires-Dist: typing-extensions==4.15.0
|
|
62
62
|
Requires-Dist: urllib3==1.26.20
|
|
63
63
|
Dynamic: author
|
|
64
64
|
Dynamic: author-email
|
|
@@ -88,9 +88,9 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
88
88
|
|
|
89
89
|
- [Python](https://www.python.org) 3.9+
|
|
90
90
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
91
|
-
- [Redis](https://redis.io) 3.1.0+
|
|
91
|
+
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
92
92
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
93
|
-
- [
|
|
93
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
94
94
|
|
|
95
95
|
### Postgres setup
|
|
96
96
|
|
|
@@ -40,7 +40,7 @@ of engineering and development.
|
|
|
40
40
|
Other benefits of PGSync include:
|
|
41
41
|
- Real-time analytics
|
|
42
42
|
- Reliable primary datastore/source of truth
|
|
43
|
-
- Scale on-demand
|
|
43
|
+
- Scale on-demand (multiple consumers)
|
|
44
44
|
- Easily join multiple nested tables
|
|
45
45
|
|
|
46
46
|
#### Why?
|
|
@@ -66,7 +66,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
|
|
|
66
66
|
|
|
67
67
|
#### How it works
|
|
68
68
|
|
|
69
|
-
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [
|
|
69
|
+
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
70
70
|
|
|
71
71
|
PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
|
|
72
72
|
This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
|
|
@@ -93,9 +93,14 @@ There are several ways of installing and trying PGSync
|
|
|
93
93
|
- [Manual configuration](#manual-configuration)
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
##### Running in Docker
|
|
96
|
+
##### Running in Docker (Using Github Repository)
|
|
97
97
|
|
|
98
98
|
To startup all services with docker.
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
$ git checkout https://github.com/toluaina/pgsync
|
|
102
|
+
```
|
|
103
|
+
|
|
99
104
|
Run:
|
|
100
105
|
```
|
|
101
106
|
$ docker-compose up
|
|
@@ -106,6 +111,39 @@ Show the content in Elasticsearch/OpenSearch
|
|
|
106
111
|
$ curl -X GET http://[Elasticsearch/OpenSearch host]:9201/reservations/_search?pretty=true
|
|
107
112
|
```
|
|
108
113
|
|
|
114
|
+
|
|
115
|
+
##### Running with Docker (Using Image Repository)
|
|
116
|
+
|
|
117
|
+
To start all services with Docker, follow these steps:
|
|
118
|
+
|
|
119
|
+
1. Pull the Docker image:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
$ docker pull toluaina1/pgsync:latest
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
2. Run the container:
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
$ docker run --rm -it \
|
|
129
|
+
-e REDIS_CHECKPOINT=true \
|
|
130
|
+
-e REDIS_HOST=<redis_host_address> \
|
|
131
|
+
-e PG_URL=postgres://<username>:<password>@<postgres_host>/<database> \
|
|
132
|
+
-e ELASTICSEARCH_URL=http://<elasticsearch_host>:9200 \
|
|
133
|
+
-v "$(pwd)/schema.json:/app/schema.json" \
|
|
134
|
+
toluaina1/pgsync:latest -c schema.json -d -b
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
|
|
138
|
+
|
|
139
|
+
- redis_host_address — Address of the Redis/Valkey server (e.g., host.docker.internal for local Docker setup)
|
|
140
|
+
- username — PostgreSQL username
|
|
141
|
+
- password — PostgreSQL password
|
|
142
|
+
- postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
|
|
143
|
+
- database — Name of PostgreSQL database
|
|
144
|
+
- elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
|
|
145
|
+
|
|
146
|
+
|
|
109
147
|
##### Manual configuration
|
|
110
148
|
|
|
111
149
|
- Setup
|
|
@@ -154,9 +192,9 @@ Key features of PGSync are:
|
|
|
154
192
|
|
|
155
193
|
- [Python](https://www.python.org) 3.9+
|
|
156
194
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
157
|
-
- [Redis](https://redis.io) 3.1.0+
|
|
195
|
+
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
158
196
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
159
|
-
- [
|
|
197
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
160
198
|
|
|
161
199
|
|
|
162
200
|
#### Example
|
|
@@ -10,9 +10,9 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
10
10
|
|
|
11
11
|
- [Python](https://www.python.org) 3.9+
|
|
12
12
|
- [Postgres](https://www.postgresql.org) 9.6+
|
|
13
|
-
- [Redis](https://redis.io) 3.1.0+
|
|
13
|
+
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
14
14
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
15
|
-
- [
|
|
15
|
+
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
16
16
|
|
|
17
17
|
### Postgres setup
|
|
18
18
|
|
|
@@ -5,8 +5,14 @@ import logging
|
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
7
|
|
|
8
|
+
from pgsync import settings
|
|
8
9
|
from pgsync.sync import Sync
|
|
9
|
-
from pgsync.utils import
|
|
10
|
+
from pgsync.utils import (
|
|
11
|
+
config_loader,
|
|
12
|
+
MutuallyExclusiveOption,
|
|
13
|
+
show_settings,
|
|
14
|
+
validate_config,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
@@ -17,6 +23,19 @@ logger = logging.getLogger(__name__)
|
|
|
17
23
|
"-c",
|
|
18
24
|
help="Schema config",
|
|
19
25
|
type=click.Path(exists=True),
|
|
26
|
+
default=settings.SCHEMA,
|
|
27
|
+
show_default=True,
|
|
28
|
+
cls=MutuallyExclusiveOption,
|
|
29
|
+
mutually_exclusive=["s3_schema_url"],
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--s3_schema_url",
|
|
33
|
+
help="S3 URL for schema config",
|
|
34
|
+
type=click.STRING,
|
|
35
|
+
default=settings.S3_SCHEMA_URL,
|
|
36
|
+
show_default=True,
|
|
37
|
+
cls=MutuallyExclusiveOption,
|
|
38
|
+
mutually_exclusive=["config"],
|
|
20
39
|
)
|
|
21
40
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
22
41
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -48,6 +67,7 @@ logger = logging.getLogger(__name__)
|
|
|
48
67
|
def main(
|
|
49
68
|
teardown: bool,
|
|
50
69
|
config: str,
|
|
70
|
+
s3_schema_url: str,
|
|
51
71
|
user: str,
|
|
52
72
|
password: bool,
|
|
53
73
|
host: str,
|
|
@@ -69,13 +89,13 @@ def main(
|
|
|
69
89
|
)
|
|
70
90
|
kwargs = {key: value for key, value in kwargs.items() if value is not None}
|
|
71
91
|
|
|
72
|
-
config
|
|
92
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
73
93
|
|
|
74
|
-
show_settings(config)
|
|
94
|
+
show_settings(config=config, s3_schema_url=s3_schema_url)
|
|
75
95
|
|
|
76
96
|
validate: bool = False if teardown else True
|
|
77
97
|
|
|
78
|
-
for doc in config_loader(config):
|
|
98
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
79
99
|
sync: Sync = Sync(
|
|
80
100
|
doc,
|
|
81
101
|
verbose=verbose,
|
|
@@ -47,16 +47,21 @@ import sys
|
|
|
47
47
|
import typing as t
|
|
48
48
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
|
49
49
|
from dataclasses import dataclass
|
|
50
|
-
from pathlib import Path
|
|
51
50
|
from queue import Queue
|
|
52
51
|
from threading import Thread
|
|
53
52
|
|
|
54
53
|
import click
|
|
55
54
|
import sqlalchemy as sa
|
|
56
55
|
|
|
57
|
-
from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
|
|
56
|
+
from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
|
|
58
57
|
from pgsync.sync import Sync
|
|
59
|
-
from pgsync.utils import
|
|
58
|
+
from pgsync.utils import (
|
|
59
|
+
config_loader,
|
|
60
|
+
MutuallyExclusiveOption,
|
|
61
|
+
show_settings,
|
|
62
|
+
timeit,
|
|
63
|
+
validate_config,
|
|
64
|
+
)
|
|
60
65
|
|
|
61
66
|
|
|
62
67
|
def save_ctid(page: int, row: int, filename: str) -> None:
|
|
@@ -378,6 +383,19 @@ def run_task(
|
|
|
378
383
|
"-c",
|
|
379
384
|
help="Schema config",
|
|
380
385
|
type=click.Path(exists=True),
|
|
386
|
+
default=SCHEMA,
|
|
387
|
+
show_default=True,
|
|
388
|
+
cls=MutuallyExclusiveOption,
|
|
389
|
+
mutually_exclusive=["s3_schema_url"],
|
|
390
|
+
)
|
|
391
|
+
@click.option(
|
|
392
|
+
"--s3_schema_url",
|
|
393
|
+
help="S3 URL for schema config",
|
|
394
|
+
type=click.STRING,
|
|
395
|
+
default=S3_SCHEMA_URL,
|
|
396
|
+
show_default=True,
|
|
397
|
+
cls=MutuallyExclusiveOption,
|
|
398
|
+
mutually_exclusive=["config"],
|
|
381
399
|
)
|
|
382
400
|
@click.option(
|
|
383
401
|
"--verbose",
|
|
@@ -409,17 +427,20 @@ def run_task(
|
|
|
409
427
|
),
|
|
410
428
|
default="multiprocess_async",
|
|
411
429
|
)
|
|
412
|
-
def main(
|
|
430
|
+
def main(
|
|
431
|
+
config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
|
|
432
|
+
) -> None:
|
|
413
433
|
"""
|
|
414
434
|
TODO:
|
|
415
435
|
- Track progress across cpus/threads
|
|
416
436
|
- Handle KeyboardInterrupt Exception
|
|
417
437
|
"""
|
|
418
|
-
config: str = get_config(config)
|
|
419
438
|
|
|
420
|
-
|
|
439
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
440
|
+
|
|
441
|
+
show_settings(config=config, s3_schema_url=s3_schema_url)
|
|
421
442
|
|
|
422
|
-
for doc in config_loader(config):
|
|
443
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
423
444
|
tasks: t.Generator = fetch_tasks(doc)
|
|
424
445
|
if mode == "synchronous":
|
|
425
446
|
synchronous(tasks, doc, verbose=verbose)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import threading
|
|
5
6
|
import time
|
|
6
7
|
import typing as t
|
|
7
8
|
from contextlib import contextmanager
|
|
@@ -28,8 +29,13 @@ from .exc import (
|
|
|
28
29
|
TableNotFoundError,
|
|
29
30
|
)
|
|
30
31
|
from .settings import (
|
|
32
|
+
PG_HOST_RO,
|
|
33
|
+
PG_PASSWORD_RO,
|
|
34
|
+
PG_PORT_RO,
|
|
31
35
|
PG_SSLMODE,
|
|
32
36
|
PG_SSLROOTCERT,
|
|
37
|
+
PG_URL_RO,
|
|
38
|
+
PG_USER_RO,
|
|
33
39
|
QUERY_CHUNK_SIZE,
|
|
34
40
|
STREAM_RESULTS,
|
|
35
41
|
)
|
|
@@ -48,7 +54,6 @@ try:
|
|
|
48
54
|
except ImportError:
|
|
49
55
|
pass
|
|
50
56
|
|
|
51
|
-
|
|
52
57
|
logger = logging.getLogger(__name__)
|
|
53
58
|
|
|
54
59
|
SSL_MODES = (
|
|
@@ -153,6 +158,8 @@ class TupleIdentifierType(sa.types.UserDefinedType):
|
|
|
153
158
|
|
|
154
159
|
|
|
155
160
|
class Base(object):
|
|
161
|
+
_thread_local = threading.local()
|
|
162
|
+
|
|
156
163
|
INT_TYPES = (
|
|
157
164
|
"bigint",
|
|
158
165
|
"bigserial",
|
|
@@ -190,6 +197,26 @@ class Base(object):
|
|
|
190
197
|
self.__engine: sa.engine.Engine = _pg_engine(
|
|
191
198
|
database, echo=False, **kwargs
|
|
192
199
|
)
|
|
200
|
+
self.__engine_ro: t.Optional[sa.engine.Engine] = None
|
|
201
|
+
if (
|
|
202
|
+
PG_USER_RO
|
|
203
|
+
or PG_HOST_RO
|
|
204
|
+
or PG_PASSWORD_RO
|
|
205
|
+
or PG_PORT_RO
|
|
206
|
+
or PG_URL_RO
|
|
207
|
+
):
|
|
208
|
+
kwargs.update(
|
|
209
|
+
{
|
|
210
|
+
"user": PG_USER_RO,
|
|
211
|
+
"host": PG_HOST_RO,
|
|
212
|
+
"password": PG_PASSWORD_RO,
|
|
213
|
+
"port": PG_PORT_RO,
|
|
214
|
+
"url": PG_URL_RO,
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
self.__engine_ro: sa.engine.Engine = _pg_engine(
|
|
218
|
+
database, echo=False, **kwargs
|
|
219
|
+
)
|
|
193
220
|
self.__schemas: t.Optional[dict] = None
|
|
194
221
|
# models is a dict of f'{schema}.{table}'
|
|
195
222
|
self.__models: dict = {}
|
|
@@ -307,6 +334,8 @@ class Base(object):
|
|
|
307
334
|
@property
|
|
308
335
|
def engine(self) -> sa.engine.Engine:
|
|
309
336
|
"""Get the database engine."""
|
|
337
|
+
if getattr(self._thread_local, "read_only", False):
|
|
338
|
+
return self.__engine_ro
|
|
310
339
|
return self.__engine
|
|
311
340
|
|
|
312
341
|
@property
|
|
@@ -910,6 +939,37 @@ class Base(object):
|
|
|
910
939
|
label="txid_current",
|
|
911
940
|
)[0]
|
|
912
941
|
|
|
942
|
+
def pg_visible_in_snapshot(
|
|
943
|
+
self, literal_binds: bool = False
|
|
944
|
+
) -> t.Callable[[t.List[int]], dict]:
|
|
945
|
+
def _pg_visible_in_snapshot(xid8s: t.List[int]) -> dict:
|
|
946
|
+
if not xid8s:
|
|
947
|
+
return {}
|
|
948
|
+
# TODO: use the SQLAlchemy ORM to handle this query
|
|
949
|
+
statement = sa.text(
|
|
950
|
+
"""
|
|
951
|
+
SELECT xid AS xid8,
|
|
952
|
+
PG_VISIBLE_IN_SNAPSHOT(xid::xid8, PG_CURRENT_SNAPSHOT()) AS visible
|
|
953
|
+
FROM UNNEST(CAST(:xid8s AS text[]))
|
|
954
|
+
WITH ORDINALITY AS t(xid, ord)
|
|
955
|
+
ORDER BY t.ord
|
|
956
|
+
"""
|
|
957
|
+
)
|
|
958
|
+
if self.verbose:
|
|
959
|
+
compiled_query(
|
|
960
|
+
statement,
|
|
961
|
+
label="xmin_visibility",
|
|
962
|
+
literal_binds=literal_binds,
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
# xid8s = list of xid8 strings
|
|
966
|
+
params: dict = {"xid8s": list(map(str, xid8s))}
|
|
967
|
+
with self.__engine_ro.connect() as conn:
|
|
968
|
+
result = conn.execute(statement, params)
|
|
969
|
+
return {int(row.xid8): row.visible for row in result}
|
|
970
|
+
|
|
971
|
+
return _pg_visible_in_snapshot
|
|
972
|
+
|
|
913
973
|
def parse_value(self, type_: str, value: str) -> t.Optional[str]:
|
|
914
974
|
"""
|
|
915
975
|
Parse datatypes from db.
|
|
@@ -1168,6 +1228,7 @@ def _pg_engine(
|
|
|
1168
1228
|
echo: bool = False,
|
|
1169
1229
|
sslmode: t.Optional[str] = None,
|
|
1170
1230
|
sslrootcert: t.Optional[str] = None,
|
|
1231
|
+
url: t.Optional[str] = None,
|
|
1171
1232
|
) -> sa.engine.Engine:
|
|
1172
1233
|
connect_args: dict = {}
|
|
1173
1234
|
sslmode = sslmode or PG_SSLMODE
|
|
@@ -1187,13 +1248,14 @@ def _pg_engine(
|
|
|
1187
1248
|
)
|
|
1188
1249
|
connect_args["sslrootcert"] = sslrootcert
|
|
1189
1250
|
|
|
1190
|
-
url
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1251
|
+
if url is None:
|
|
1252
|
+
url: str = get_postgres_url(
|
|
1253
|
+
database,
|
|
1254
|
+
user=user,
|
|
1255
|
+
host=host,
|
|
1256
|
+
password=password,
|
|
1257
|
+
port=port,
|
|
1258
|
+
)
|
|
1197
1259
|
return sa.create_engine(url, echo=echo, connect_args=connect_args)
|
|
1198
1260
|
|
|
1199
1261
|
|
|
@@ -8,7 +8,7 @@ import sqlalchemy as sa
|
|
|
8
8
|
|
|
9
9
|
from .base import database_exists, drop_database
|
|
10
10
|
from .sync import Sync
|
|
11
|
-
from .utils import config_loader,
|
|
11
|
+
from .utils import config_loader, validate_config
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -20,6 +20,7 @@ def teardown(
|
|
|
20
20
|
drop_index: bool = True,
|
|
21
21
|
delete_checkpoint: bool = True,
|
|
22
22
|
config: t.Optional[str] = None,
|
|
23
|
+
s3_schema_url: t.Optional[str] = None,
|
|
23
24
|
validate: bool = False,
|
|
24
25
|
) -> None:
|
|
25
26
|
"""
|
|
@@ -28,15 +29,15 @@ def teardown(
|
|
|
28
29
|
Args:
|
|
29
30
|
drop_db (bool, optional): Whether to drop the database. Defaults to True.
|
|
30
31
|
truncate_db (bool, optional): Whether to truncate the database. Defaults to True.
|
|
31
|
-
delete_redis (bool, optional): Whether to delete Redis. Defaults to True.
|
|
32
|
+
delete_redis (bool, optional): Whether to delete Redis/Valkey. Defaults to True.
|
|
32
33
|
drop_index (bool, optional): Whether to drop the index. Defaults to True.
|
|
33
34
|
delete_checkpoint (bool, optional): Whether to delete the checkpoint. Defaults to True.
|
|
34
35
|
config (Optional[str], optional): The configuration file path. Defaults to None.
|
|
35
36
|
validate (bool, optional): Whether to validate the configuration. Defaults to False.
|
|
36
37
|
"""
|
|
37
|
-
config
|
|
38
|
+
validate_config(config=config, s3_schema_url=s3_schema_url)
|
|
38
39
|
|
|
39
|
-
for doc in config_loader(config):
|
|
40
|
+
for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
|
|
40
41
|
if not database_exists(doc["database"]):
|
|
41
42
|
logger.warning(f'Database {doc["database"]} does not exist')
|
|
42
43
|
continue
|
|
@@ -133,6 +133,7 @@ class Node(object):
|
|
|
133
133
|
relationship: t.Optional[dict] = None
|
|
134
134
|
parent: t.Optional[Node] = None
|
|
135
135
|
base_tables: t.Optional[list] = None
|
|
136
|
+
is_through: bool = False
|
|
136
137
|
|
|
137
138
|
def __post_init__(self):
|
|
138
139
|
self.model: sa.sql.Alias = self.models(self.table, self.schema)
|
|
@@ -328,8 +329,9 @@ class Tree(threading.local):
|
|
|
328
329
|
self.root = node
|
|
329
330
|
|
|
330
331
|
self.tables.add(node.table)
|
|
331
|
-
for
|
|
332
|
-
|
|
332
|
+
for through_node in node.relationship.throughs:
|
|
333
|
+
through_node.is_through = True
|
|
334
|
+
self.tables.add(through_node.table)
|
|
333
335
|
|
|
334
336
|
for child in nodes.get("children", []):
|
|
335
337
|
node.add_child(self.build(child))
|