pgsync 5.0.0__tar.gz → 6.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-5.0.0 → pgsync-6.1.0}/PKG-INFO +13 -12
- {pgsync-5.0.0 → pgsync-6.1.0}/README.md +58 -29
- {pgsync-5.0.0 → pgsync-6.1.0}/README.rst +1 -1
- {pgsync-5.0.0 → pgsync-6.1.0}/bin/bootstrap +26 -5
- {pgsync-5.0.0 → pgsync-6.1.0}/bin/parallel_sync +33 -7
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/__init__.py +1 -1
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/base.py +79 -18
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/constants.py +2 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/helper.py +10 -3
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/node.py +5 -3
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/plugin.py +2 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/redisqueue.py +8 -3
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/settings.py +18 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/singleton.py +1 -1
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/sync.py +358 -149
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/trigger.py +24 -4
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/urls.py +14 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/utils.py +70 -13
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/view.py +65 -11
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/PKG-INFO +13 -12
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/requires.txt +10 -10
- {pgsync-5.0.0 → pgsync-6.1.0}/setup.py +1 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/conftest.py +46 -14
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_constants.py +1 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync.py +12 -5
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_nested_children.py +35 -3
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_root.py +19 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_child.py +24 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_parent.py +24 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_trigger.py +24 -4
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_unique_behaviour.py +1 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_urls.py +2 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_utils.py +13 -1
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_view.py +3 -2
- {pgsync-5.0.0 → pgsync-6.1.0}/AUTHORS.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/CONTRIBUTING.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/HISTORY.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/LICENSE +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/MANIFEST.in +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/bin/pgsync +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/Makefile +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/authors.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/changelog.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/conf.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/contributing.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/history.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/index.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/installation.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/logo.png +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/make.bat +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/readme.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/docs/usage.rst +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/exc.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/querybuilder.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/search_client.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/transform.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/pyproject.toml +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/setup.cfg +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/__init__.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/fixtures/schema.json +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_base.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_env_vars.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_helper.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_log_handlers.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_node.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_query_builder.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_redisqueue.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_search_client.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_settings.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_transform.py +0 -0
- {pgsync-5.0.0 → pgsync-6.1.0}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
26
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
26
27
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
27
28
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -32,31 +33,31 @@ License-File: LICENSE
|
|
|
32
33
|
License-File: AUTHORS.rst
|
|
33
34
|
Requires-Dist: async-timeout==5.0.1
|
|
34
35
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.
|
|
36
|
-
Requires-Dist: botocore==1.
|
|
37
|
-
Requires-Dist: certifi==2025.
|
|
38
|
-
Requires-Dist: charset-normalizer==3.4.
|
|
36
|
+
Requires-Dist: boto3==1.41.2
|
|
37
|
+
Requires-Dist: botocore==1.41.2
|
|
38
|
+
Requires-Dist: certifi==2025.11.12
|
|
39
|
+
Requires-Dist: charset-normalizer==3.4.4
|
|
39
40
|
Requires-Dist: click==8.1.8
|
|
40
41
|
Requires-Dist: elastic-transport==9.1.0
|
|
41
42
|
Requires-Dist: elasticsearch==7.17.12
|
|
42
43
|
Requires-Dist: elasticsearch-dsl==7.4.1
|
|
43
|
-
Requires-Dist: environs==14.
|
|
44
|
+
Requires-Dist: environs==14.4.0
|
|
44
45
|
Requires-Dist: events==0.5
|
|
45
|
-
Requires-Dist: idna==3.
|
|
46
|
+
Requires-Dist: idna==3.11
|
|
46
47
|
Requires-Dist: jmespath==1.0.1
|
|
47
48
|
Requires-Dist: marshmallow==4.0.1
|
|
48
|
-
Requires-Dist: mysql-replication==1.0.
|
|
49
|
+
Requires-Dist: mysql-replication==1.0.12
|
|
49
50
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
50
51
|
Requires-Dist: opensearch-py==3.0.0
|
|
51
52
|
Requires-Dist: packaging==25.0
|
|
52
53
|
Requires-Dist: psycopg2-binary==2.9.11
|
|
53
54
|
Requires-Dist: pymysql==1.1.2
|
|
54
55
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
55
|
-
Requires-Dist: python-dotenv==1.
|
|
56
|
-
Requires-Dist: redis==
|
|
56
|
+
Requires-Dist: python-dotenv==1.2.1
|
|
57
|
+
Requires-Dist: redis==7.0.1
|
|
57
58
|
Requires-Dist: requests==2.32.5
|
|
58
59
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
59
|
-
Requires-Dist: s3transfer==0.
|
|
60
|
+
Requires-Dist: s3transfer==0.15.0
|
|
60
61
|
Requires-Dist: six==1.17.0
|
|
61
62
|
Requires-Dist: sqlalchemy==2.0.44
|
|
62
63
|
Requires-Dist: sqlparse==0.5.3
|
|
@@ -78,7 +79,7 @@ Dynamic: requires-dist
|
|
|
78
79
|
Dynamic: requires-python
|
|
79
80
|
Dynamic: summary
|
|
80
81
|
|
|
81
|
-
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
82
|
+
# PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
82
83
|
|
|
83
84
|
|
|
84
85
|
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
[](https://pypi.org/project/pgsync)
|
|
6
6
|
[](https://pypi.org/project/pgsync)
|
|
7
7
|
[](https://codecov.io/gh/toluaina/pgsync)
|
|
8
|
+
[](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
@@ -43,6 +44,23 @@ Other benefits of PGSync include:
|
|
|
43
44
|
- Scale on-demand (multiple consumers)
|
|
44
45
|
- Easily join multiple nested tables
|
|
45
46
|
|
|
47
|
+
## Sponsors
|
|
48
|
+
|
|
49
|
+
[PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
|
|
50
|
+
|
|
51
|
+
<p>
|
|
52
|
+
<a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
|
|
53
|
+
<img
|
|
54
|
+
src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
|
|
55
|
+
alt="DigitalOcean"
|
|
56
|
+
width="210"
|
|
57
|
+
loading="lazy"
|
|
58
|
+
decoding="async"
|
|
59
|
+
>
|
|
60
|
+
</a>
|
|
61
|
+
</p>
|
|
62
|
+
|
|
63
|
+
|
|
46
64
|
#### Why?
|
|
47
65
|
|
|
48
66
|
At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
|
|
@@ -55,7 +73,7 @@ Of course, if your data never changed, then you could just take a snapshot in ti
|
|
|
55
73
|
PGSync is appropriate for you if:
|
|
56
74
|
- [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
57
75
|
read-only search layer.
|
|
58
|
-
- You need to denormalize relational data into a NoSQL data source.
|
|
76
|
+
- You need to denormalize relational data into a NoSQL data source like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
59
77
|
- Your data is constantly changing.
|
|
60
78
|
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
61
79
|
- You want to keep your existing data untouched whilst taking advantage of
|
|
@@ -81,7 +99,8 @@ PGSync operates in an event-driven model by creating triggers for tables in your
|
|
|
81
99
|
|
|
82
100
|
*This is the only time PGSync will ever make any changes to your database.*
|
|
83
101
|
|
|
84
|
-
|
|
102
|
+
>**NOTE**: **If you change the structure of your PGSync schema config, it's recommended and in most cases necessary to rebuild your Elasticsearch/OpenSearch indices.**
|
|
103
|
+
|
|
85
104
|
There are plans to support zero-downtime migrations to streamline this process.
|
|
86
105
|
|
|
87
106
|
|
|
@@ -91,6 +110,14 @@ There are several ways of installing and trying PGSync
|
|
|
91
110
|
- [Running in Docker](#running-in-docker) is the easiest way to get up and running.
|
|
92
111
|
- [Manual configuration](#manual-configuration)
|
|
93
112
|
|
|
113
|
+
#### Book Demo Example (requires a DigitalOcean account)
|
|
114
|
+
|
|
115
|
+
[](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
|
|
116
|
+
|
|
117
|
+
Fill in the following during the setup
|
|
118
|
+
- `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
|
|
119
|
+
- `REDIS_URL` e.g. rediss://default:pass@host:port/0
|
|
120
|
+
|
|
94
121
|
|
|
95
122
|
##### Running in Docker (Using Github Repository)
|
|
96
123
|
|
|
@@ -161,7 +188,6 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
161
188
|
### MySQL / MariaDB setup
|
|
162
189
|
|
|
163
190
|
- Enable binary logging in your MySQL / MariaDB setting.
|
|
164
|
-
|
|
165
191
|
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
166
192
|
|
|
167
193
|
```server-id = 1``` # any non-zero unique ID
|
|
@@ -169,10 +195,8 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
169
195
|
```log_bin = mysql-bin```
|
|
170
196
|
|
|
171
197
|
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
172
|
-
|
|
173
198
|
- optional housekeeping:
|
|
174
199
|
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
175
|
-
|
|
176
200
|
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
177
201
|
|
|
178
202
|
```sql
|
|
@@ -198,17 +222,19 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
198
222
|
|
|
199
223
|
Key features of PGSync are:
|
|
200
224
|
|
|
201
|
-
- Easily denormalize relational data
|
|
202
|
-
- Works with any PostgreSQL database (
|
|
203
|
-
- Negligible impact on database performance
|
|
204
|
-
- Transactionally consistent output in Elasticsearch/OpenSearch
|
|
205
|
-
-
|
|
206
|
-
-
|
|
207
|
-
-
|
|
208
|
-
-
|
|
209
|
-
- Supports
|
|
210
|
-
- Supports
|
|
211
|
-
-
|
|
225
|
+
- Easily denormalize relational data
|
|
226
|
+
- Works with any PostgreSQL database (9.6 or later)
|
|
227
|
+
- Negligible impact on database performance
|
|
228
|
+
- Transactionally consistent output in Elasticsearch/OpenSearch:
|
|
229
|
+
- Writes appear only after they’re committed
|
|
230
|
+
- Inserts, updates, and deletes appear in commit order (not eventually)
|
|
231
|
+
- Fault-tolerant: no data loss even on crashes or network issues; processing resumes from the last checkpoint
|
|
232
|
+
- Returns data directly as PostgreSQL/MySQL/MariaDB JSON for speed
|
|
233
|
+
- Supports composite primary and foreign keys
|
|
234
|
+
- Supports views and materialized views
|
|
235
|
+
- Handles arbitrarily deep nesting of related tables
|
|
236
|
+
- Supports PostgreSQL/MySQL/MariaDB JSON fields, allowing JSON properties to be extracted as separate document fields
|
|
237
|
+
- Customizable document structure
|
|
212
238
|
|
|
213
239
|
|
|
214
240
|
#### Requirements
|
|
@@ -334,23 +360,26 @@ e.g
|
|
|
334
360
|
}
|
|
335
361
|
```
|
|
336
362
|
|
|
337
|
-
PGSync addresses
|
|
338
|
-
|
|
339
|
-
-
|
|
340
|
-
-
|
|
341
|
-
-
|
|
342
|
-
-
|
|
363
|
+
PGSync addresses common data consistency challenges, such as:
|
|
364
|
+
|
|
365
|
+
- Updating an author's name in the database
|
|
366
|
+
- Adding an additional author to an existing book
|
|
367
|
+
- Changing an author's name across many existing documents
|
|
368
|
+
- Deleting or updating an author record
|
|
369
|
+
- Truncating an entire table and keeping indexes in sync
|
|
343
370
|
|
|
344
371
|
|
|
345
372
|
#### Benefits
|
|
346
373
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
-
|
|
350
|
-
-
|
|
351
|
-
-
|
|
352
|
-
-
|
|
353
|
-
-
|
|
374
|
+
PGSync is a simple, out-of-the-box solution for change data capture, designed to minimize development effort and keep your search indexes in sync.
|
|
375
|
+
|
|
376
|
+
- Handles data deletions automatically.
|
|
377
|
+
- Requires minimal setup. Just define a schema config that describes your data.
|
|
378
|
+
- Generates advanced queries directly from your schema.
|
|
379
|
+
- Makes it easy to rebuild indexes after schema changes.
|
|
380
|
+
- Lets you expose only the data you need in Elasticsearch/OpenSearch.
|
|
381
|
+
- Supports multiple Postgres/MySQL/MariaDB schemas for multi-tenant applications.
|
|
382
|
+
|
|
354
383
|
|
|
355
384
|
|
|
356
385
|
#### Contributing
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
1
|
+
# PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
|
|
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
default=settings.SCHEMA,
|
|
27
27
|
show_default=True,
|
|
28
28
|
cls=MutuallyExclusiveOption,
|
|
29
|
-
mutually_exclusive=["s3_schema_url"],
|
|
29
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--schema_url",
|
|
33
|
+
help="URL for schema config",
|
|
34
|
+
type=click.STRING,
|
|
35
|
+
default=settings.SCHEMA_URL,
|
|
36
|
+
show_default=True,
|
|
37
|
+
cls=MutuallyExclusiveOption,
|
|
38
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
30
39
|
)
|
|
31
40
|
@click.option(
|
|
32
41
|
"--s3_schema_url",
|
|
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
35
44
|
default=settings.S3_SCHEMA_URL,
|
|
36
45
|
show_default=True,
|
|
37
46
|
cls=MutuallyExclusiveOption,
|
|
38
|
-
mutually_exclusive=["config"],
|
|
47
|
+
mutually_exclusive=["config", "schema_url"],
|
|
39
48
|
)
|
|
40
49
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
41
50
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
|
|
|
67
76
|
def main(
|
|
68
77
|
teardown: bool,
|
|
69
78
|
config: str,
|
|
79
|
+
schema_url: str,
|
|
70
80
|
s3_schema_url: str,
|
|
71
81
|
user: str,
|
|
72
82
|
password: bool,
|
|
@@ -89,13 +99,24 @@ def main(
|
|
|
89
99
|
)
|
|
90
100
|
kwargs = {key: value for key, value in kwargs.items() if value is not None}
|
|
91
101
|
|
|
92
|
-
validate_config(
|
|
102
|
+
validate_config(
|
|
103
|
+
config=config,
|
|
104
|
+
schema_url=schema_url,
|
|
105
|
+
s3_schema_url=s3_schema_url,
|
|
106
|
+
)
|
|
93
107
|
|
|
94
|
-
show_settings(
|
|
108
|
+
show_settings(
|
|
109
|
+
config=config,
|
|
110
|
+
schema_url=schema_url,
|
|
111
|
+
s3_schema_url=s3_schema_url,
|
|
112
|
+
**kwargs,
|
|
113
|
+
)
|
|
95
114
|
|
|
96
115
|
validate: bool = False if teardown else True
|
|
97
116
|
|
|
98
|
-
for doc in config_loader(
|
|
117
|
+
for doc in config_loader(
|
|
118
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
119
|
+
):
|
|
99
120
|
sync: Sync = Sync(
|
|
100
121
|
doc,
|
|
101
122
|
verbose=verbose,
|
|
@@ -53,7 +53,13 @@ from threading import Thread
|
|
|
53
53
|
import click
|
|
54
54
|
import sqlalchemy as sa
|
|
55
55
|
|
|
56
|
-
from pgsync.settings import
|
|
56
|
+
from pgsync.settings import (
|
|
57
|
+
BLOCK_SIZE,
|
|
58
|
+
CHECKPOINT_PATH,
|
|
59
|
+
S3_SCHEMA_URL,
|
|
60
|
+
SCHEMA,
|
|
61
|
+
SCHEMA_URL,
|
|
62
|
+
)
|
|
57
63
|
from pgsync.sync import Sync
|
|
58
64
|
from pgsync.utils import (
|
|
59
65
|
config_loader,
|
|
@@ -386,7 +392,16 @@ def run_task(
|
|
|
386
392
|
default=SCHEMA,
|
|
387
393
|
show_default=True,
|
|
388
394
|
cls=MutuallyExclusiveOption,
|
|
389
|
-
mutually_exclusive=["s3_schema_url"],
|
|
395
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
396
|
+
)
|
|
397
|
+
@click.option(
|
|
398
|
+
"--schema_url",
|
|
399
|
+
help="URL for schema config",
|
|
400
|
+
type=click.STRING,
|
|
401
|
+
default=SCHEMA_URL,
|
|
402
|
+
show_default=True,
|
|
403
|
+
cls=MutuallyExclusiveOption,
|
|
404
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
390
405
|
)
|
|
391
406
|
@click.option(
|
|
392
407
|
"--s3_schema_url",
|
|
@@ -395,7 +410,7 @@ def run_task(
|
|
|
395
410
|
default=S3_SCHEMA_URL,
|
|
396
411
|
show_default=True,
|
|
397
412
|
cls=MutuallyExclusiveOption,
|
|
398
|
-
mutually_exclusive=["config"],
|
|
413
|
+
mutually_exclusive=["config", "schema_url"],
|
|
399
414
|
)
|
|
400
415
|
@click.option(
|
|
401
416
|
"--verbose",
|
|
@@ -428,7 +443,12 @@ def run_task(
|
|
|
428
443
|
default="multiprocess_async",
|
|
429
444
|
)
|
|
430
445
|
def main(
|
|
431
|
-
config: str,
|
|
446
|
+
config: str,
|
|
447
|
+
schema_url: str,
|
|
448
|
+
s3_schema_url: str,
|
|
449
|
+
nprocs: int,
|
|
450
|
+
mode: str,
|
|
451
|
+
verbose: bool,
|
|
432
452
|
) -> None:
|
|
433
453
|
"""
|
|
434
454
|
TODO:
|
|
@@ -436,11 +456,17 @@ def main(
|
|
|
436
456
|
- Handle KeyboardInterrupt Exception
|
|
437
457
|
"""
|
|
438
458
|
|
|
439
|
-
validate_config(
|
|
459
|
+
validate_config(
|
|
460
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
461
|
+
)
|
|
440
462
|
|
|
441
|
-
show_settings(
|
|
463
|
+
show_settings(
|
|
464
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
465
|
+
)
|
|
442
466
|
|
|
443
|
-
for doc in config_loader(
|
|
467
|
+
for doc in config_loader(
|
|
468
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
469
|
+
):
|
|
444
470
|
tasks: t.Generator = fetch_tasks(doc)
|
|
445
471
|
if mode == "synchronous":
|
|
446
472
|
synchronous(tasks, doc, verbose=verbose)
|
|
@@ -8,8 +8,11 @@ import time
|
|
|
8
8
|
import typing as t
|
|
9
9
|
from contextlib import contextmanager
|
|
10
10
|
|
|
11
|
+
import psycopg2
|
|
11
12
|
import sqlalchemy as sa
|
|
13
|
+
from psycopg2.extras import LogicalReplicationConnection
|
|
12
14
|
from sqlalchemy.dialects import postgresql # noqa
|
|
15
|
+
from sqlalchemy.engine.url import make_url
|
|
13
16
|
from sqlalchemy.orm import sessionmaker
|
|
14
17
|
|
|
15
18
|
from .constants import (
|
|
@@ -31,6 +34,8 @@ from .exc import (
|
|
|
31
34
|
)
|
|
32
35
|
from .settings import (
|
|
33
36
|
IS_MYSQL_COMPAT,
|
|
37
|
+
MYSQL_DATABASE,
|
|
38
|
+
PG_DATABASE,
|
|
34
39
|
PG_HOST_RO,
|
|
35
40
|
PG_PASSWORD_RO,
|
|
36
41
|
PG_PORT_RO,
|
|
@@ -39,6 +44,12 @@ from .settings import (
|
|
|
39
44
|
PG_URL_RO,
|
|
40
45
|
PG_USER_RO,
|
|
41
46
|
QUERY_CHUNK_SIZE,
|
|
47
|
+
SQLALCHEMY_MAX_OVERFLOW,
|
|
48
|
+
SQLALCHEMY_POOL_PRE_PING,
|
|
49
|
+
SQLALCHEMY_POOL_RECYCLE,
|
|
50
|
+
SQLALCHEMY_POOL_SIZE,
|
|
51
|
+
SQLALCHEMY_POOL_TIMEOUT,
|
|
52
|
+
SQLALCHEMY_USE_NULLPOOL,
|
|
42
53
|
STREAM_RESULTS,
|
|
43
54
|
)
|
|
44
55
|
from .trigger import CREATE_TRIGGER_TEMPLATE
|
|
@@ -230,6 +241,7 @@ class Base(object):
|
|
|
230
241
|
self.__columns: dict = {}
|
|
231
242
|
self.verbose: bool = verbose
|
|
232
243
|
self._conn = None
|
|
244
|
+
self._session = None
|
|
233
245
|
|
|
234
246
|
def connect(self) -> None:
|
|
235
247
|
"""Connect to database."""
|
|
@@ -338,8 +350,19 @@ class Base(object):
|
|
|
338
350
|
|
|
339
351
|
@property
|
|
340
352
|
def session(self) -> sessionmaker:
|
|
341
|
-
|
|
342
|
-
|
|
353
|
+
if self._session is None:
|
|
354
|
+
Session = sessionmaker(bind=self.engine, autoflush=True)
|
|
355
|
+
self._session = Session()
|
|
356
|
+
return self._session
|
|
357
|
+
|
|
358
|
+
def close_session(self) -> None:
|
|
359
|
+
"""Close the cached session and reset it."""
|
|
360
|
+
if self._session is not None:
|
|
361
|
+
try:
|
|
362
|
+
self._session.close()
|
|
363
|
+
except Exception:
|
|
364
|
+
pass
|
|
365
|
+
self._session = None
|
|
343
366
|
|
|
344
367
|
@property
|
|
345
368
|
def engine(self) -> sa.engine.Engine:
|
|
@@ -726,6 +749,21 @@ class Base(object):
|
|
|
726
749
|
)
|
|
727
750
|
)[0]
|
|
728
751
|
|
|
752
|
+
def get_replication_connection(
|
|
753
|
+
self, engine: sa.engine.Engine
|
|
754
|
+
) -> psycopg2.extensions.connection:
|
|
755
|
+
url: sa.engine.URL = make_url(str(engine.url))
|
|
756
|
+
# Build a libpq-style connection by keyword args
|
|
757
|
+
conn: psycopg2.extensions.connection = psycopg2.connect(
|
|
758
|
+
host=url.host,
|
|
759
|
+
port=url.port or 5432,
|
|
760
|
+
user=url.username,
|
|
761
|
+
password=url.password,
|
|
762
|
+
dbname=url.database,
|
|
763
|
+
connection_factory=LogicalReplicationConnection,
|
|
764
|
+
)
|
|
765
|
+
return conn
|
|
766
|
+
|
|
729
767
|
def logical_slot_get_changes(
|
|
730
768
|
self,
|
|
731
769
|
slot_name: str,
|
|
@@ -822,6 +860,7 @@ class Base(object):
|
|
|
822
860
|
schema: str,
|
|
823
861
|
tables: t.Set,
|
|
824
862
|
user_defined_fkey_tables: dict,
|
|
863
|
+
node_columns: dict,
|
|
825
864
|
) -> None:
|
|
826
865
|
create_view(
|
|
827
866
|
self.engine,
|
|
@@ -832,6 +871,7 @@ class Base(object):
|
|
|
832
871
|
tables,
|
|
833
872
|
user_defined_fkey_tables,
|
|
834
873
|
self._materialized_views(schema),
|
|
874
|
+
node_columns,
|
|
835
875
|
)
|
|
836
876
|
|
|
837
877
|
def drop_view(self, schema: str) -> None:
|
|
@@ -946,9 +986,9 @@ class Base(object):
|
|
|
946
986
|
"""Check if the trigger function exists."""
|
|
947
987
|
return self.exists(
|
|
948
988
|
sa.text(
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
989
|
+
"SELECT 1 FROM pg_proc WHERE proname = :name "
|
|
990
|
+
"AND pronamespace = (SELECT oid FROM pg_namespace "
|
|
991
|
+
"WHERE nspname = :schema)"
|
|
952
992
|
).bindparams(name=TRIGGER_FUNC, schema=schema),
|
|
953
993
|
)
|
|
954
994
|
|
|
@@ -989,7 +1029,7 @@ class Base(object):
|
|
|
989
1029
|
self.disable_trigger(schema, table)
|
|
990
1030
|
logger.debug(f"Disabled trigger on table: {schema}.{table}")
|
|
991
1031
|
|
|
992
|
-
def enable_trigger(self, schema: str, table
|
|
1032
|
+
def enable_trigger(self, schema: str, table: str) -> None:
|
|
993
1033
|
"""Enable a pgsync defined trigger."""
|
|
994
1034
|
for name in ("notify", "truncate"):
|
|
995
1035
|
self.execute(
|
|
@@ -1107,20 +1147,21 @@ class Base(object):
|
|
|
1107
1147
|
# including trailing space below is deliberate
|
|
1108
1148
|
suffix: str = f"{row[span[1]:]} "
|
|
1109
1149
|
|
|
1110
|
-
if "old-key" and "new-tuple" in suffix:
|
|
1150
|
+
if "old-key" in suffix and "new-tuple" in suffix:
|
|
1111
1151
|
# this can only be an UPDATE operation
|
|
1112
1152
|
if payload.tg_op != UPDATE:
|
|
1113
1153
|
msg = f"Unknown {payload.tg_op} operation for row: {row}"
|
|
1114
1154
|
raise LogicalSlotParseError(msg)
|
|
1115
1155
|
|
|
1116
|
-
i: int = suffix.
|
|
1156
|
+
i: int = suffix.find("old-key:")
|
|
1117
1157
|
if i > -1:
|
|
1118
|
-
j: int = suffix.
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1158
|
+
j: int = suffix.find("new-tuple:")
|
|
1159
|
+
if j > -1:
|
|
1160
|
+
s: str = suffix[i + len("old-key:") : j]
|
|
1161
|
+
for key, value in _parse_logical_slot(s):
|
|
1162
|
+
payload.old[key] = value
|
|
1122
1163
|
|
|
1123
|
-
i = suffix.
|
|
1164
|
+
i = suffix.find("new-tuple:")
|
|
1124
1165
|
if i > -1:
|
|
1125
1166
|
s = suffix[i + len("new-tuple:") :]
|
|
1126
1167
|
for key, value in _parse_logical_slot(s):
|
|
@@ -1336,7 +1377,28 @@ def _pg_engine(
|
|
|
1336
1377
|
password=password,
|
|
1337
1378
|
port=port,
|
|
1338
1379
|
)
|
|
1339
|
-
|
|
1380
|
+
|
|
1381
|
+
# Use NullPool for testing to avoid connection exhaustion
|
|
1382
|
+
if SQLALCHEMY_USE_NULLPOOL:
|
|
1383
|
+
from sqlalchemy.pool import NullPool
|
|
1384
|
+
|
|
1385
|
+
return sa.create_engine(
|
|
1386
|
+
url,
|
|
1387
|
+
echo=echo,
|
|
1388
|
+
connect_args=connect_args,
|
|
1389
|
+
poolclass=NullPool,
|
|
1390
|
+
)
|
|
1391
|
+
|
|
1392
|
+
return sa.create_engine(
|
|
1393
|
+
url,
|
|
1394
|
+
echo=echo,
|
|
1395
|
+
connect_args=connect_args,
|
|
1396
|
+
pool_size=SQLALCHEMY_POOL_SIZE,
|
|
1397
|
+
max_overflow=SQLALCHEMY_MAX_OVERFLOW,
|
|
1398
|
+
pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
|
|
1399
|
+
pool_recycle=SQLALCHEMY_POOL_RECYCLE,
|
|
1400
|
+
pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
|
|
1401
|
+
)
|
|
1340
1402
|
|
|
1341
1403
|
|
|
1342
1404
|
def pg_execute(
|
|
@@ -1367,7 +1429,7 @@ def create_database(database: str, echo: bool = False) -> None:
|
|
|
1367
1429
|
"""Create a database."""
|
|
1368
1430
|
logger.debug(f"Creating database: {database}")
|
|
1369
1431
|
with pg_engine(
|
|
1370
|
-
|
|
1432
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
|
|
1371
1433
|
echo=echo,
|
|
1372
1434
|
) as engine:
|
|
1373
1435
|
pg_execute(
|
|
@@ -1382,8 +1444,7 @@ def drop_database(database: str, echo: bool = False) -> None:
|
|
|
1382
1444
|
"""Drop a database."""
|
|
1383
1445
|
logger.debug(f"Dropping database: {database}")
|
|
1384
1446
|
with pg_engine(
|
|
1385
|
-
|
|
1386
|
-
echo=echo,
|
|
1447
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE, echo=echo
|
|
1387
1448
|
) as engine:
|
|
1388
1449
|
pg_execute(
|
|
1389
1450
|
engine,
|
|
@@ -1396,7 +1457,7 @@ def drop_database(database: str, echo: bool = False) -> None:
|
|
|
1396
1457
|
def database_exists(database: str, echo: bool = False) -> bool:
|
|
1397
1458
|
"""Check if database is present."""
|
|
1398
1459
|
with pg_engine(
|
|
1399
|
-
|
|
1460
|
+
MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
|
|
1400
1461
|
echo=echo,
|
|
1401
1462
|
) as engine:
|
|
1402
1463
|
with engine.connect() as conn:
|
|
@@ -135,6 +135,7 @@ ELASTICSEARCH_MAPPING_PARAMETERS = [
|
|
|
135
135
|
"boost",
|
|
136
136
|
"coerce",
|
|
137
137
|
"copy_to",
|
|
138
|
+
"dimension",
|
|
138
139
|
"doc_values",
|
|
139
140
|
"dynamic",
|
|
140
141
|
"eager_global_ordinals",
|
|
@@ -198,6 +199,7 @@ MATERIALIZED_VIEW_COLUMNS = [
|
|
|
198
199
|
"indices",
|
|
199
200
|
"primary_keys",
|
|
200
201
|
"table_name",
|
|
202
|
+
"columns",
|
|
201
203
|
]
|
|
202
204
|
|
|
203
205
|
# Primary key delimiter
|
|
@@ -20,6 +20,7 @@ def teardown(
|
|
|
20
20
|
drop_index: bool = True,
|
|
21
21
|
delete_checkpoint: bool = True,
|
|
22
22
|
config: t.Optional[str] = None,
|
|
23
|
+
schema_url: t.Optional[str] = None,
|
|
23
24
|
s3_schema_url: t.Optional[str] = None,
|
|
24
25
|
validate: bool = False,
|
|
25
26
|
) -> None:
|
|
@@ -33,11 +34,17 @@ def teardown(
|
|
|
33
34
|
drop_index (bool, optional): Whether to drop the index. Defaults to True.
|
|
34
35
|
delete_checkpoint (bool, optional): Whether to delete the checkpoint. Defaults to True.
|
|
35
36
|
config (Optional[str], optional): The configuration file path. Defaults to None.
|
|
37
|
+
schema_url (Optional[str], optional): The schema URL. Defaults to None.
|
|
38
|
+
s3_schema_url (Optional[str], optional): The S3 schema URL. Defaults to
|
|
36
39
|
validate (bool, optional): Whether to validate the configuration. Defaults to False.
|
|
37
40
|
"""
|
|
38
|
-
validate_config(
|
|
41
|
+
validate_config(
|
|
42
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
43
|
+
)
|
|
39
44
|
|
|
40
|
-
for doc in config_loader(
|
|
45
|
+
for doc in config_loader(
|
|
46
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
47
|
+
):
|
|
41
48
|
if not database_exists(doc["database"]):
|
|
42
49
|
logger.warning(f'Database {doc["database"]} does not exist')
|
|
43
50
|
continue
|
|
@@ -56,7 +63,7 @@ def teardown(
|
|
|
56
63
|
drop_database(sync.database)
|
|
57
64
|
if drop_index:
|
|
58
65
|
sync.search_client.teardown(sync.index)
|
|
59
|
-
if delete_redis:
|
|
66
|
+
if delete_redis and sync.redis is not None:
|
|
60
67
|
sync.redis.delete()
|
|
61
68
|
if delete_checkpoint:
|
|
62
69
|
try:
|
|
@@ -294,7 +294,7 @@ class Node(object):
|
|
|
294
294
|
self.columns.append(self.model.c[column_name])
|
|
295
295
|
|
|
296
296
|
@property
|
|
297
|
-
def primary_keys(self):
|
|
297
|
+
def primary_keys(self) -> t.List[sa.sql.ColumnElement]:
|
|
298
298
|
return [
|
|
299
299
|
self.model.c[str(sa.text(primary_key))]
|
|
300
300
|
for primary_key in self.model.primary_keys
|
|
@@ -311,7 +311,7 @@ class Node(object):
|
|
|
311
311
|
|
|
312
312
|
def add_child(self, node: Node) -> None:
|
|
313
313
|
"""All nodes except the root node must have a relationship defined."""
|
|
314
|
-
node.parent
|
|
314
|
+
node.parent = self
|
|
315
315
|
if not node.is_root and (
|
|
316
316
|
not node.relationship.type or not node.relationship.variant
|
|
317
317
|
):
|
|
@@ -388,7 +388,9 @@ class Tree(threading.local):
|
|
|
388
388
|
raise TableNotInNodeError(f"Table not specified in node: {nodes}")
|
|
389
389
|
|
|
390
390
|
if not set(nodes.keys()).issubset(set(NODE_ATTRIBUTES)):
|
|
391
|
-
attrs = set(nodes.keys()).difference(
|
|
391
|
+
attrs: t.Set[str] = set(nodes.keys()).difference(
|
|
392
|
+
set(NODE_ATTRIBUTES)
|
|
393
|
+
)
|
|
392
394
|
raise NodeAttributeError(f"Unknown node attribute(s): {attrs}")
|
|
393
395
|
|
|
394
396
|
node: Node = Node(
|