pgsync 4.2.1__tar.gz → 6.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-4.2.1 → pgsync-6.0.0}/PKG-INFO +46 -19
- {pgsync-4.2.1 → pgsync-6.0.0}/README.md +69 -26
- {pgsync-4.2.1 → pgsync-6.0.0}/README.rst +26 -2
- {pgsync-4.2.1 → pgsync-6.0.0}/bin/bootstrap +27 -6
- {pgsync-4.2.1 → pgsync-6.0.0}/bin/parallel_sync +35 -9
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/__init__.py +1 -1
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/base.py +226 -83
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/constants.py +5 -3
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/helper.py +10 -3
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/node.py +105 -26
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/plugin.py +2 -2
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/querybuilder.py +103 -61
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/redisqueue.py +9 -3
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/search_client.py +9 -1
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/settings.py +49 -12
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/singleton.py +1 -1
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/sync.py +656 -206
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/trigger.py +24 -4
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/urls.py +35 -7
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/utils.py +135 -23
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/view.py +65 -11
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/PKG-INFO +46 -19
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/requires.txt +16 -14
- {pgsync-4.2.1 → pgsync-6.0.0}/setup.py +4 -1
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/conftest.py +84 -44
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_base.py +63 -2
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_constants.py +1 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_node.py +20 -10
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_query_builder.py +21 -15
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_search_client.py +3 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_settings.py +5 -5
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync.py +17 -7
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_nested_children.py +43 -6
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_root.py +37 -15
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_child.py +43 -16
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_parent.py +43 -16
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_trigger.py +29 -4
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_unique_behaviour.py +7 -3
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_urls.py +20 -13
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_utils.py +36 -14
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_view.py +8 -2
- {pgsync-4.2.1 → pgsync-6.0.0}/AUTHORS.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/CONTRIBUTING.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/HISTORY.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/LICENSE +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/MANIFEST.in +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/bin/pgsync +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/Makefile +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/authors.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/changelog.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/conf.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/contributing.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/history.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/index.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/installation.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/logo.png +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/make.bat +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/readme.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/docs/usage.rst +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/exc.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/transform.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/pyproject.toml +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/setup.cfg +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/__init__.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/fixtures/schema.json +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_env_vars.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_helper.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_log_handlers.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_redisqueue.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_transform.py +0 -0
- {pgsync-4.2.1 → pgsync-6.0.0}/tests/testing_utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version:
|
|
4
|
-
Summary: Postgres to Elasticsearch/OpenSearch sync
|
|
3
|
+
Version: 6.0.0
|
|
4
|
+
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
7
7
|
Author-email: tolu@pgsync.com
|
|
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
|
|
|
13
13
|
Project-URL: Source, https://github.com/toluaina/pgsync
|
|
14
14
|
Project-URL: Web, https://pgsync.com
|
|
15
15
|
Project-URL: Documentation, https://pgsync.com
|
|
16
|
-
Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
|
|
16
|
+
Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres,mysql,mariadb
|
|
17
17
|
Classifier: Development Status :: 5 - Production/Stable
|
|
18
18
|
Classifier: Intended Audience :: Developers
|
|
19
19
|
Classifier: Natural Language :: English
|
|
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
26
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
26
27
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
27
28
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -32,31 +33,33 @@ License-File: LICENSE
|
|
|
32
33
|
License-File: AUTHORS.rst
|
|
33
34
|
Requires-Dist: async-timeout==5.0.1
|
|
34
35
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.40.
|
|
36
|
-
Requires-Dist: botocore==1.40.
|
|
37
|
-
Requires-Dist: certifi==2025.
|
|
38
|
-
Requires-Dist: charset-normalizer==3.4.
|
|
36
|
+
Requires-Dist: boto3==1.40.64
|
|
37
|
+
Requires-Dist: botocore==1.40.64
|
|
38
|
+
Requires-Dist: certifi==2025.10.5
|
|
39
|
+
Requires-Dist: charset-normalizer==3.4.4
|
|
39
40
|
Requires-Dist: click==8.1.8
|
|
40
|
-
Requires-Dist: elastic-transport==
|
|
41
|
-
Requires-Dist: elasticsearch==
|
|
42
|
-
Requires-Dist: elasticsearch-dsl==
|
|
43
|
-
Requires-Dist: environs==14.
|
|
41
|
+
Requires-Dist: elastic-transport==9.1.0
|
|
42
|
+
Requires-Dist: elasticsearch==7.17.12
|
|
43
|
+
Requires-Dist: elasticsearch-dsl==7.4.1
|
|
44
|
+
Requires-Dist: environs==14.4.0
|
|
44
45
|
Requires-Dist: events==0.5
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist: idna==3.10
|
|
46
|
+
Requires-Dist: idna==3.11
|
|
47
47
|
Requires-Dist: jmespath==1.0.1
|
|
48
48
|
Requires-Dist: marshmallow==4.0.1
|
|
49
|
+
Requires-Dist: mysql-replication==1.0.9
|
|
49
50
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
50
51
|
Requires-Dist: opensearch-py==3.0.0
|
|
51
|
-
Requires-Dist:
|
|
52
|
+
Requires-Dist: packaging==25.0
|
|
53
|
+
Requires-Dist: psycopg2-binary==2.9.11
|
|
54
|
+
Requires-Dist: pymysql==1.1.2
|
|
52
55
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
53
|
-
Requires-Dist: python-dotenv==1.
|
|
54
|
-
Requires-Dist: redis==
|
|
56
|
+
Requires-Dist: python-dotenv==1.2.1
|
|
57
|
+
Requires-Dist: redis==7.0.1
|
|
55
58
|
Requires-Dist: requests==2.32.5
|
|
56
59
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
57
60
|
Requires-Dist: s3transfer==0.14.0
|
|
58
61
|
Requires-Dist: six==1.17.0
|
|
59
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
62
|
+
Requires-Dist: sqlalchemy==2.0.44
|
|
60
63
|
Requires-Dist: sqlparse==0.5.3
|
|
61
64
|
Requires-Dist: typing-extensions==4.15.0
|
|
62
65
|
Requires-Dist: urllib3==1.26.20
|
|
@@ -87,12 +90,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
87
90
|
### Requirements
|
|
88
91
|
|
|
89
92
|
- [Python](https://www.python.org) 3.9+
|
|
90
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
93
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
|
|
91
94
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
92
95
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
93
96
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
94
97
|
|
|
95
|
-
### Postgres
|
|
98
|
+
### Postgres Setup
|
|
96
99
|
|
|
97
100
|
Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
|
|
98
101
|
Postgres setting.
|
|
@@ -103,6 +106,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
103
106
|
|
|
104
107
|
```max_replication_slots = 1```
|
|
105
108
|
|
|
109
|
+
|
|
110
|
+
### MySQL / MariaDB setup
|
|
111
|
+
|
|
112
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
113
|
+
|
|
114
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
115
|
+
|
|
116
|
+
```server-id = 1``` # any non-zero unique ID
|
|
117
|
+
|
|
118
|
+
```log_bin = mysql-bin```
|
|
119
|
+
|
|
120
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
121
|
+
|
|
122
|
+
- optional housekeeping:
|
|
123
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
124
|
+
|
|
125
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
126
|
+
|
|
127
|
+
```sql
|
|
128
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
129
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
130
|
+
FLUSH PRIVILEGES;
|
|
131
|
+
```
|
|
132
|
+
|
|
106
133
|
### Installation
|
|
107
134
|
|
|
108
135
|
You can install PGSync from [PyPI](https://pypi.org):
|
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
[](https://pypi.org/project/pgsync)
|
|
6
6
|
[](https://pypi.org/project/pgsync)
|
|
7
7
|
[](https://codecov.io/gh/toluaina/pgsync)
|
|
8
|
+
[](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
## PostgreSQL to Elasticsearch/OpenSearch sync
|
|
11
|
+
## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
11
12
|
|
|
12
|
-
[PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
|
|
13
|
-
It allows you to keep [Postgres](https://www.postgresql.org) as your source of truth and
|
|
13
|
+
[PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
|
|
14
|
+
It allows you to keep [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) as your source of truth and
|
|
14
15
|
expose structured denormalized documents in [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
15
16
|
|
|
16
17
|
Changes to nested entities are propagated to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
@@ -24,7 +25,7 @@ without writing any code.
|
|
|
24
25
|
[PGSync](https://pgsync.com) transforms your relational data into a structured document format.
|
|
25
26
|
|
|
26
27
|
It allows you to take advantage of the expressive power and scalability of
|
|
27
|
-
[Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org).
|
|
28
|
+
[Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/).
|
|
28
29
|
You don't have to write complex queries and transformation pipelines.
|
|
29
30
|
PGSync is lightweight, flexible and fast.
|
|
30
31
|
|
|
@@ -43,9 +44,26 @@ Other benefits of PGSync include:
|
|
|
43
44
|
- Scale on-demand (multiple consumers)
|
|
44
45
|
- Easily join multiple nested tables
|
|
45
46
|
|
|
47
|
+
## Sponsors
|
|
48
|
+
|
|
49
|
+
[PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
|
|
50
|
+
|
|
51
|
+
<p>
|
|
52
|
+
<a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
|
|
53
|
+
<img
|
|
54
|
+
src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
|
|
55
|
+
alt="DigitalOcean"
|
|
56
|
+
width="210"
|
|
57
|
+
loading="lazy"
|
|
58
|
+
decoding="async"
|
|
59
|
+
>
|
|
60
|
+
</a>
|
|
61
|
+
</p>
|
|
62
|
+
|
|
63
|
+
|
|
46
64
|
#### Why?
|
|
47
65
|
|
|
48
|
-
At a high level, you have data in a
|
|
66
|
+
At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
|
|
49
67
|
This means every change to your data (***Insert***, ***Update***, ***Delete*** and ***Truncate*** statements) needs to be replicated to Elasticsearch/OpenSearch.
|
|
50
68
|
At first, this seems easy and then it's not. Simply add some code to copy the data to Elasticsearch/OpenSearch after updating the database (or so called dual writes).
|
|
51
69
|
Writing SQL queries spanning multiple tables and involving multiple relationships are hard to write.
|
|
@@ -53,12 +71,11 @@ Detecting changes within a nested document can also be quite hard.
|
|
|
53
71
|
Of course, if your data never changed, then you could just take a snapshot in time and load it into Elasticsearch/OpenSearch as a one-off operation.
|
|
54
72
|
|
|
55
73
|
PGSync is appropriate for you if:
|
|
56
|
-
- [Postgres](https://www.postgresql.org) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
74
|
+
- [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
57
75
|
read-only search layer.
|
|
58
76
|
- You need to denormalize relational data into a NoSQL data source.
|
|
59
77
|
- Your data is constantly changing.
|
|
60
|
-
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) and you need
|
|
61
|
-
a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
78
|
+
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
62
79
|
- You want to keep your existing data untouched whilst taking advantage of
|
|
63
80
|
the search capabilities of [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) by exposing a view of your data without compromising the security of your relational data.
|
|
64
81
|
- Or you simply want to expose a view of your relational data for search purposes.
|
|
@@ -66,7 +83,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
|
|
|
66
83
|
|
|
67
84
|
#### How it works
|
|
68
85
|
|
|
69
|
-
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
86
|
+
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org)/[MySQL](https://www.mysql.com/)/[MariaDB](https://mariadb.org/), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
70
87
|
|
|
71
88
|
PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
|
|
72
89
|
This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
|
|
@@ -92,6 +109,14 @@ There are several ways of installing and trying PGSync
|
|
|
92
109
|
- [Running in Docker](#running-in-docker) is the easiest way to get up and running.
|
|
93
110
|
- [Manual configuration](#manual-configuration)
|
|
94
111
|
|
|
112
|
+
#### Book Demo Example (requires a DigitalOcean account)
|
|
113
|
+
|
|
114
|
+
[](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
|
|
115
|
+
|
|
116
|
+
Fill in the following during the setup
|
|
117
|
+
- `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
|
|
118
|
+
- `REDIS_URL` e.g. rediss://default:pass@host:port/0
|
|
119
|
+
|
|
95
120
|
|
|
96
121
|
##### Running in Docker (Using Github Repository)
|
|
97
122
|
|
|
@@ -137,16 +162,16 @@ To start all services with Docker, follow these steps:
|
|
|
137
162
|
Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
|
|
138
163
|
|
|
139
164
|
- redis_host_address — Address of the Redis/Valkey server (e.g., host.docker.internal for local Docker setup)
|
|
140
|
-
- username — PostgreSQL username
|
|
141
|
-
- password — PostgreSQL password
|
|
142
|
-
- postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
|
|
143
|
-
- database — Name of PostgreSQL database
|
|
165
|
+
- username — PostgreSQL/MySQL/MariaDB username
|
|
166
|
+
- password — PostgreSQL/MySQL/MariaDB password
|
|
167
|
+
- postgres_host — Host address for PostgreSQL/MySQL/MariaDB instance (e.g., host.docker.internal)
|
|
168
|
+
- database — Name of PostgreSQL/MySQL/MariaDB database
|
|
144
169
|
- elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
|
|
145
170
|
|
|
146
171
|
|
|
147
172
|
##### Manual configuration
|
|
148
173
|
|
|
149
|
-
|
|
174
|
+
### Postgres Setup
|
|
150
175
|
- Ensure the database user is a superuser
|
|
151
176
|
- Enable logical decoding. You would also need to set up at least two parameters at postgresql.conf
|
|
152
177
|
|
|
@@ -159,7 +184,31 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
159
184
|
|
|
160
185
|
```max_slot_wal_keep_size = 100GB```
|
|
161
186
|
|
|
162
|
-
|
|
187
|
+
### MySQL / MariaDB setup
|
|
188
|
+
|
|
189
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
190
|
+
|
|
191
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
192
|
+
|
|
193
|
+
```server-id = 1``` # any non-zero unique ID
|
|
194
|
+
|
|
195
|
+
```log_bin = mysql-bin```
|
|
196
|
+
|
|
197
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
198
|
+
|
|
199
|
+
- optional housekeeping:
|
|
200
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
201
|
+
|
|
202
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
203
|
+
|
|
204
|
+
```sql
|
|
205
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
206
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
207
|
+
FLUSH PRIVILEGES;
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Installation
|
|
211
|
+
|
|
163
212
|
- Install PGSync from pypi using pip
|
|
164
213
|
- ```$ pip install pgsync```
|
|
165
214
|
- Create a [schema.json](https://github.com/toluaina/pgsync/blob/main/examples/airbnb/schema.json) for your document representation
|
|
@@ -180,18 +229,18 @@ Key features of PGSync are:
|
|
|
180
229
|
- Negligible impact on database performance.
|
|
181
230
|
- Transactionally consistent output in Elasticsearch/OpenSearch. This means: writes appear only when they are committed to the database, insert, update and delete operations appear in the same order as they were committed (as opposed to eventual consistency).
|
|
182
231
|
- Fault-tolerant: does not lose data, even if processes crash or a network interruption occurs, etc. The process can be recovered from the last checkpoint.
|
|
183
|
-
- Returns the data directly as Postgres JSON from the database for speed.
|
|
232
|
+
- Returns the data directly as Postgres/MySQL/MariaDB JSON from the database for speed.
|
|
184
233
|
- Supports composite primary and foreign keys.
|
|
185
234
|
- Supports Views and Materialized views.
|
|
186
235
|
- Supports an arbitrary depth of nested entities i.e Tables having long chain of relationship dependencies.
|
|
187
|
-
- Supports
|
|
236
|
+
- Supports PostgreSQL/MySQL/MariaDB JSON data fields. This means: we can extract JSON fields in a database table as a separate field in the resulting document.
|
|
188
237
|
- Customizable document structure.
|
|
189
238
|
|
|
190
239
|
|
|
191
240
|
#### Requirements
|
|
192
241
|
|
|
193
242
|
- [Python](https://www.python.org) 3.9+
|
|
194
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
243
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 5.7.22+ or [MariaDB](https://mariadb.org/) 10.5.0+
|
|
195
244
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
196
245
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
197
246
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
@@ -327,7 +376,7 @@ PGSync addresses the following challenges:
|
|
|
327
376
|
- PGSync generates advanced queries matching your schema directly.
|
|
328
377
|
- PGSync allows you to easily rebuild your indexes in case of a schema change.
|
|
329
378
|
- You can expose only the data you require in Elasticsearch/OpenSearch.
|
|
330
|
-
- Supports multiple Postgres schemas for multi-tennant applications.
|
|
379
|
+
- Supports multiple Postgres/MySQL/MariaDB schemas for multi-tennant applications.
|
|
331
380
|
|
|
332
381
|
|
|
333
382
|
#### Contributing
|
|
@@ -335,16 +384,10 @@ PGSync addresses the following challenges:
|
|
|
335
384
|
Contributions are very welcome! Check out the [Contribution](CONTRIBUTING.rst) Guidelines for instructions.
|
|
336
385
|
|
|
337
386
|
|
|
338
|
-
#### Credits
|
|
339
|
-
|
|
340
|
-
- This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter)
|
|
341
|
-
- Elasticsearch is a trademark of Elasticsearch BV, registered in the U.S. and in other countries.
|
|
342
|
-
|
|
343
|
-
|
|
344
387
|
#### License
|
|
345
388
|
|
|
346
389
|
This project is licensed under the terms of the [MIT](https://opensource.org/license/mit/) license.
|
|
347
390
|
Please see [LICENSE](LICENSE) for more details.
|
|
348
391
|
|
|
349
|
-
You should have received a copy of the MIT License along with PGSync
|
|
392
|
+
You should have received a copy of the MIT License along with **PGSync**.
|
|
350
393
|
If not, see https://opensource.org/license/mit/.
|
|
@@ -9,12 +9,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
9
9
|
### Requirements
|
|
10
10
|
|
|
11
11
|
- [Python](https://www.python.org) 3.9+
|
|
12
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
12
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
|
|
13
13
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
14
14
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
15
15
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
16
16
|
|
|
17
|
-
### Postgres
|
|
17
|
+
### Postgres Setup
|
|
18
18
|
|
|
19
19
|
Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
|
|
20
20
|
Postgres setting.
|
|
@@ -25,6 +25,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
25
25
|
|
|
26
26
|
```max_replication_slots = 1```
|
|
27
27
|
|
|
28
|
+
|
|
29
|
+
### MySQL / MariaDB setup
|
|
30
|
+
|
|
31
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
32
|
+
|
|
33
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
34
|
+
|
|
35
|
+
```server-id = 1``` # any non-zero unique ID
|
|
36
|
+
|
|
37
|
+
```log_bin = mysql-bin```
|
|
38
|
+
|
|
39
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
40
|
+
|
|
41
|
+
- optional housekeeping:
|
|
42
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
43
|
+
|
|
44
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
45
|
+
|
|
46
|
+
```sql
|
|
47
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
48
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
49
|
+
FLUSH PRIVILEGES;
|
|
50
|
+
```
|
|
51
|
+
|
|
28
52
|
### Installation
|
|
29
53
|
|
|
30
54
|
You can install PGSync from [PyPI](https://pypi.org):
|
|
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
default=settings.SCHEMA,
|
|
27
27
|
show_default=True,
|
|
28
28
|
cls=MutuallyExclusiveOption,
|
|
29
|
-
mutually_exclusive=["s3_schema_url"],
|
|
29
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--schema_url",
|
|
33
|
+
help="URL for schema config",
|
|
34
|
+
type=click.STRING,
|
|
35
|
+
default=settings.SCHEMA_URL,
|
|
36
|
+
show_default=True,
|
|
37
|
+
cls=MutuallyExclusiveOption,
|
|
38
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
30
39
|
)
|
|
31
40
|
@click.option(
|
|
32
41
|
"--s3_schema_url",
|
|
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
35
44
|
default=settings.S3_SCHEMA_URL,
|
|
36
45
|
show_default=True,
|
|
37
46
|
cls=MutuallyExclusiveOption,
|
|
38
|
-
mutually_exclusive=["config"],
|
|
47
|
+
mutually_exclusive=["config", "schema_url"],
|
|
39
48
|
)
|
|
40
49
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
41
50
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
|
|
|
67
76
|
def main(
|
|
68
77
|
teardown: bool,
|
|
69
78
|
config: str,
|
|
79
|
+
schema_url: str,
|
|
70
80
|
s3_schema_url: str,
|
|
71
81
|
user: str,
|
|
72
82
|
password: bool,
|
|
@@ -75,7 +85,7 @@ def main(
|
|
|
75
85
|
verbose: bool,
|
|
76
86
|
no_create: bool = False,
|
|
77
87
|
) -> None:
|
|
78
|
-
"""Application onetime
|
|
88
|
+
"""Application onetime bootstrap."""
|
|
79
89
|
kwargs: dict = {
|
|
80
90
|
"user": user,
|
|
81
91
|
"host": host,
|
|
@@ -89,13 +99,24 @@ def main(
|
|
|
89
99
|
)
|
|
90
100
|
kwargs = {key: value for key, value in kwargs.items() if value is not None}
|
|
91
101
|
|
|
92
|
-
validate_config(
|
|
102
|
+
validate_config(
|
|
103
|
+
config=config,
|
|
104
|
+
schema_url=schema_url,
|
|
105
|
+
s3_schema_url=s3_schema_url,
|
|
106
|
+
)
|
|
93
107
|
|
|
94
|
-
show_settings(
|
|
108
|
+
show_settings(
|
|
109
|
+
config=config,
|
|
110
|
+
schema_url=schema_url,
|
|
111
|
+
s3_schema_url=s3_schema_url,
|
|
112
|
+
**kwargs,
|
|
113
|
+
)
|
|
95
114
|
|
|
96
115
|
validate: bool = False if teardown else True
|
|
97
116
|
|
|
98
|
-
for doc in config_loader(
|
|
117
|
+
for doc in config_loader(
|
|
118
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
119
|
+
):
|
|
99
120
|
sync: Sync = Sync(
|
|
100
121
|
doc,
|
|
101
122
|
verbose=verbose,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
|
|
3
3
|
"""
|
|
4
|
-
Parallel sync is
|
|
4
|
+
Parallel sync is a feature designed to optimize
|
|
5
5
|
throughput by utilizing available CPUs/threads, particularly beneficial
|
|
6
6
|
in environments experiencing high network latency.
|
|
7
7
|
|
|
@@ -53,7 +53,13 @@ from threading import Thread
|
|
|
53
53
|
import click
|
|
54
54
|
import sqlalchemy as sa
|
|
55
55
|
|
|
56
|
-
from pgsync.settings import
|
|
56
|
+
from pgsync.settings import (
|
|
57
|
+
BLOCK_SIZE,
|
|
58
|
+
CHECKPOINT_PATH,
|
|
59
|
+
S3_SCHEMA_URL,
|
|
60
|
+
SCHEMA,
|
|
61
|
+
SCHEMA_URL,
|
|
62
|
+
)
|
|
57
63
|
from pgsync.sync import Sync
|
|
58
64
|
from pgsync.utils import (
|
|
59
65
|
config_loader,
|
|
@@ -106,7 +112,7 @@ def logical_slot_changes(
|
|
|
106
112
|
txmin: int = sync.checkpoint
|
|
107
113
|
txmax: int = sync.txid_current
|
|
108
114
|
sync.logical_slot_changes(txmin=txmin, txmax=txmax)
|
|
109
|
-
sync.checkpoint
|
|
115
|
+
sync.checkpoint = txmax or sync.txid_current
|
|
110
116
|
|
|
111
117
|
|
|
112
118
|
@dataclass
|
|
@@ -386,7 +392,16 @@ def run_task(
|
|
|
386
392
|
default=SCHEMA,
|
|
387
393
|
show_default=True,
|
|
388
394
|
cls=MutuallyExclusiveOption,
|
|
389
|
-
mutually_exclusive=["s3_schema_url"],
|
|
395
|
+
mutually_exclusive=["s3_schema_url", "schema_url"],
|
|
396
|
+
)
|
|
397
|
+
@click.option(
|
|
398
|
+
"--schema_url",
|
|
399
|
+
help="URL for schema config",
|
|
400
|
+
type=click.STRING,
|
|
401
|
+
default=SCHEMA_URL,
|
|
402
|
+
show_default=True,
|
|
403
|
+
cls=MutuallyExclusiveOption,
|
|
404
|
+
mutually_exclusive=["config", "s3_schema_url"],
|
|
390
405
|
)
|
|
391
406
|
@click.option(
|
|
392
407
|
"--s3_schema_url",
|
|
@@ -395,7 +410,7 @@ def run_task(
|
|
|
395
410
|
default=S3_SCHEMA_URL,
|
|
396
411
|
show_default=True,
|
|
397
412
|
cls=MutuallyExclusiveOption,
|
|
398
|
-
mutually_exclusive=["config"],
|
|
413
|
+
mutually_exclusive=["config", "schema_url"],
|
|
399
414
|
)
|
|
400
415
|
@click.option(
|
|
401
416
|
"--verbose",
|
|
@@ -428,7 +443,12 @@ def run_task(
|
|
|
428
443
|
default="multiprocess_async",
|
|
429
444
|
)
|
|
430
445
|
def main(
|
|
431
|
-
config: str,
|
|
446
|
+
config: str,
|
|
447
|
+
schema_url: str,
|
|
448
|
+
s3_schema_url: str,
|
|
449
|
+
nprocs: int,
|
|
450
|
+
mode: str,
|
|
451
|
+
verbose: bool,
|
|
432
452
|
) -> None:
|
|
433
453
|
"""
|
|
434
454
|
TODO:
|
|
@@ -436,11 +456,17 @@ def main(
|
|
|
436
456
|
- Handle KeyboardInterrupt Exception
|
|
437
457
|
"""
|
|
438
458
|
|
|
439
|
-
validate_config(
|
|
459
|
+
validate_config(
|
|
460
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
461
|
+
)
|
|
440
462
|
|
|
441
|
-
show_settings(
|
|
463
|
+
show_settings(
|
|
464
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
465
|
+
)
|
|
442
466
|
|
|
443
|
-
for doc in config_loader(
|
|
467
|
+
for doc in config_loader(
|
|
468
|
+
config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
|
|
469
|
+
):
|
|
444
470
|
tasks: t.Generator = fetch_tasks(doc)
|
|
445
471
|
if mode == "synchronous":
|
|
446
472
|
synchronous(tasks, doc, verbose=verbose)
|