pgsync 4.2.1__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-4.2.1 → pgsync-5.0.0}/PKG-INFO +40 -14
- {pgsync-4.2.1 → pgsync-5.0.0}/README.md +43 -26
- {pgsync-4.2.1 → pgsync-5.0.0}/README.rst +26 -2
- {pgsync-4.2.1 → pgsync-5.0.0}/bin/bootstrap +1 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/bin/parallel_sync +2 -2
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/__init__.py +1 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/base.py +169 -69
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/constants.py +3 -3
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/helper.py +1 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/node.py +100 -23
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/querybuilder.py +103 -61
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/redisqueue.py +1 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/search_client.py +9 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/settings.py +31 -12
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/sync.py +577 -183
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/urls.py +21 -5
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/utils.py +69 -11
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/PKG-INFO +40 -14
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/requires.txt +11 -9
- {pgsync-4.2.1 → pgsync-5.0.0}/setup.py +3 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/conftest.py +46 -38
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_base.py +63 -2
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_node.py +20 -10
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_query_builder.py +21 -15
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_search_client.py +3 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_settings.py +5 -5
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_sync.py +13 -7
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_sync_nested_children.py +8 -3
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_sync_root.py +18 -13
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_sync_single_child_fk_on_child.py +19 -14
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_sync_single_child_fk_on_parent.py +19 -14
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_trigger.py +5 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_unique_behaviour.py +6 -1
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_urls.py +18 -11
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_utils.py +23 -13
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_view.py +5 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/AUTHORS.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/CONTRIBUTING.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/HISTORY.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/LICENSE +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/MANIFEST.in +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/bin/pgsync +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/Makefile +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/authors.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/changelog.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/conf.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/contributing.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/history.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/index.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/installation.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/logo.png +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/make.bat +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/readme.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/docs/usage.rst +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/exc.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/plugin.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/singleton.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/transform.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/trigger.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync/view.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/pyproject.toml +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/setup.cfg +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/__init__.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/fixtures/schema.json +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_constants.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_env_vars.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_helper.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_log_handlers.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_redisqueue.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/test_transform.py +0 -0
- {pgsync-4.2.1 → pgsync-5.0.0}/tests/testing_utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version:
|
|
4
|
-
Summary: Postgres to Elasticsearch/OpenSearch sync
|
|
3
|
+
Version: 5.0.0
|
|
4
|
+
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
7
7
|
Author-email: tolu@pgsync.com
|
|
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
|
|
|
13
13
|
Project-URL: Source, https://github.com/toluaina/pgsync
|
|
14
14
|
Project-URL: Web, https://pgsync.com
|
|
15
15
|
Project-URL: Documentation, https://pgsync.com
|
|
16
|
-
Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
|
|
16
|
+
Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres,mysql,mariadb
|
|
17
17
|
Classifier: Development Status :: 5 - Production/Stable
|
|
18
18
|
Classifier: Intended Audience :: Developers
|
|
19
19
|
Classifier: Natural Language :: English
|
|
@@ -32,23 +32,25 @@ License-File: LICENSE
|
|
|
32
32
|
License-File: AUTHORS.rst
|
|
33
33
|
Requires-Dist: async-timeout==5.0.1
|
|
34
34
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
35
|
-
Requires-Dist: boto3==1.40.
|
|
36
|
-
Requires-Dist: botocore==1.40.
|
|
37
|
-
Requires-Dist: certifi==2025.
|
|
35
|
+
Requires-Dist: boto3==1.40.50
|
|
36
|
+
Requires-Dist: botocore==1.40.50
|
|
37
|
+
Requires-Dist: certifi==2025.10.5
|
|
38
38
|
Requires-Dist: charset-normalizer==3.4.3
|
|
39
39
|
Requires-Dist: click==8.1.8
|
|
40
|
-
Requires-Dist: elastic-transport==
|
|
41
|
-
Requires-Dist: elasticsearch==
|
|
42
|
-
Requires-Dist: elasticsearch-dsl==
|
|
40
|
+
Requires-Dist: elastic-transport==9.1.0
|
|
41
|
+
Requires-Dist: elasticsearch==7.17.12
|
|
42
|
+
Requires-Dist: elasticsearch-dsl==7.4.1
|
|
43
43
|
Requires-Dist: environs==14.3.0
|
|
44
44
|
Requires-Dist: events==0.5
|
|
45
|
-
Requires-Dist: greenlet==3.2.4
|
|
46
45
|
Requires-Dist: idna==3.10
|
|
47
46
|
Requires-Dist: jmespath==1.0.1
|
|
48
47
|
Requires-Dist: marshmallow==4.0.1
|
|
48
|
+
Requires-Dist: mysql-replication==1.0.9
|
|
49
49
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
50
50
|
Requires-Dist: opensearch-py==3.0.0
|
|
51
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: packaging==25.0
|
|
52
|
+
Requires-Dist: psycopg2-binary==2.9.11
|
|
53
|
+
Requires-Dist: pymysql==1.1.2
|
|
52
54
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
53
55
|
Requires-Dist: python-dotenv==1.1.1
|
|
54
56
|
Requires-Dist: redis==6.4.0
|
|
@@ -56,7 +58,7 @@ Requires-Dist: requests==2.32.5
|
|
|
56
58
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
57
59
|
Requires-Dist: s3transfer==0.14.0
|
|
58
60
|
Requires-Dist: six==1.17.0
|
|
59
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
61
|
+
Requires-Dist: sqlalchemy==2.0.44
|
|
60
62
|
Requires-Dist: sqlparse==0.5.3
|
|
61
63
|
Requires-Dist: typing-extensions==4.15.0
|
|
62
64
|
Requires-Dist: urllib3==1.26.20
|
|
@@ -87,12 +89,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
87
89
|
### Requirements
|
|
88
90
|
|
|
89
91
|
- [Python](https://www.python.org) 3.9+
|
|
90
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
92
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
|
|
91
93
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
92
94
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
93
95
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
94
96
|
|
|
95
|
-
### Postgres
|
|
97
|
+
### Postgres Setup
|
|
96
98
|
|
|
97
99
|
Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
|
|
98
100
|
Postgres setting.
|
|
@@ -103,6 +105,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
103
105
|
|
|
104
106
|
```max_replication_slots = 1```
|
|
105
107
|
|
|
108
|
+
|
|
109
|
+
### MySQL / MariaDB setup
|
|
110
|
+
|
|
111
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
112
|
+
|
|
113
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
114
|
+
|
|
115
|
+
```server-id = 1``` # any non-zero unique ID
|
|
116
|
+
|
|
117
|
+
```log_bin = mysql-bin```
|
|
118
|
+
|
|
119
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
120
|
+
|
|
121
|
+
- optional housekeeping:
|
|
122
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
123
|
+
|
|
124
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
125
|
+
|
|
126
|
+
```sql
|
|
127
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
128
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
129
|
+
FLUSH PRIVILEGES;
|
|
130
|
+
```
|
|
131
|
+
|
|
106
132
|
### Installation
|
|
107
133
|
|
|
108
134
|
You can install PGSync from [PyPI](https://pypi.org):
|
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
[](https://codecov.io/gh/toluaina/pgsync)
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
## PostgreSQL to Elasticsearch/OpenSearch sync
|
|
10
|
+
## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
11
11
|
|
|
12
|
-
[PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
|
|
13
|
-
It allows you to keep [Postgres](https://www.postgresql.org) as your source of truth and
|
|
12
|
+
[PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
|
|
13
|
+
It allows you to keep [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) as your source of truth and
|
|
14
14
|
expose structured denormalized documents in [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
15
15
|
|
|
16
16
|
Changes to nested entities are propagated to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
@@ -24,7 +24,7 @@ without writing any code.
|
|
|
24
24
|
[PGSync](https://pgsync.com) transforms your relational data into a structured document format.
|
|
25
25
|
|
|
26
26
|
It allows you to take advantage of the expressive power and scalability of
|
|
27
|
-
[Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org).
|
|
27
|
+
[Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/).
|
|
28
28
|
You don't have to write complex queries and transformation pipelines.
|
|
29
29
|
PGSync is lightweight, flexible and fast.
|
|
30
30
|
|
|
@@ -45,7 +45,7 @@ Other benefits of PGSync include:
|
|
|
45
45
|
|
|
46
46
|
#### Why?
|
|
47
47
|
|
|
48
|
-
At a high level, you have data in a
|
|
48
|
+
At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
|
|
49
49
|
This means every change to your data (***Insert***, ***Update***, ***Delete*** and ***Truncate*** statements) needs to be replicated to Elasticsearch/OpenSearch.
|
|
50
50
|
At first, this seems easy and then it's not. Simply add some code to copy the data to Elasticsearch/OpenSearch after updating the database (or so called dual writes).
|
|
51
51
|
Writing SQL queries spanning multiple tables and involving multiple relationships are hard to write.
|
|
@@ -53,12 +53,11 @@ Detecting changes within a nested document can also be quite hard.
|
|
|
53
53
|
Of course, if your data never changed, then you could just take a snapshot in time and load it into Elasticsearch/OpenSearch as a one-off operation.
|
|
54
54
|
|
|
55
55
|
PGSync is appropriate for you if:
|
|
56
|
-
- [Postgres](https://www.postgresql.org) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
56
|
+
- [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
57
57
|
read-only search layer.
|
|
58
58
|
- You need to denormalize relational data into a NoSQL data source.
|
|
59
59
|
- Your data is constantly changing.
|
|
60
|
-
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) and you need
|
|
61
|
-
a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
60
|
+
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
62
61
|
- You want to keep your existing data untouched whilst taking advantage of
|
|
63
62
|
the search capabilities of [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) by exposing a view of your data without compromising the security of your relational data.
|
|
64
63
|
- Or you simply want to expose a view of your relational data for search purposes.
|
|
@@ -66,7 +65,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
|
|
|
66
65
|
|
|
67
66
|
#### How it works
|
|
68
67
|
|
|
69
|
-
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
68
|
+
PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org)/[MySQL](https://www.mysql.com/)/[MariaDB](https://mariadb.org/), and [SQLAlchemy](https://www.sqlalchemy.org).
|
|
70
69
|
|
|
71
70
|
PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
|
|
72
71
|
This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
|
|
@@ -137,16 +136,16 @@ To start all services with Docker, follow these steps:
|
|
|
137
136
|
Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
|
|
138
137
|
|
|
139
138
|
- redis_host_address — Address of the Redis/Valkey server (e.g., host.docker.internal for local Docker setup)
|
|
140
|
-
- username — PostgreSQL username
|
|
141
|
-
- password — PostgreSQL password
|
|
142
|
-
- postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
|
|
143
|
-
- database — Name of PostgreSQL database
|
|
139
|
+
- username — PostgreSQL/MySQL/MariaDB username
|
|
140
|
+
- password — PostgreSQL/MySQL/MariaDB password
|
|
141
|
+
- postgres_host — Host address for PostgreSQL/MySQL/MariaDB instance (e.g., host.docker.internal)
|
|
142
|
+
- database — Name of PostgreSQL/MySQL/MariaDB database
|
|
144
143
|
- elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
|
|
145
144
|
|
|
146
145
|
|
|
147
146
|
##### Manual configuration
|
|
148
147
|
|
|
149
|
-
|
|
148
|
+
### Postgres Setup
|
|
150
149
|
- Ensure the database user is a superuser
|
|
151
150
|
- Enable logical decoding. You would also need to set up at least two parameters at postgresql.conf
|
|
152
151
|
|
|
@@ -159,7 +158,31 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
159
158
|
|
|
160
159
|
```max_slot_wal_keep_size = 100GB```
|
|
161
160
|
|
|
162
|
-
|
|
161
|
+
### MySQL / MariaDB setup
|
|
162
|
+
|
|
163
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
164
|
+
|
|
165
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
166
|
+
|
|
167
|
+
```server-id = 1``` # any non-zero unique ID
|
|
168
|
+
|
|
169
|
+
```log_bin = mysql-bin```
|
|
170
|
+
|
|
171
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
172
|
+
|
|
173
|
+
- optional housekeeping:
|
|
174
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
175
|
+
|
|
176
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
177
|
+
|
|
178
|
+
```sql
|
|
179
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
180
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
181
|
+
FLUSH PRIVILEGES;
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Installation
|
|
185
|
+
|
|
163
186
|
- Install PGSync from pypi using pip
|
|
164
187
|
- ```$ pip install pgsync```
|
|
165
188
|
- Create a [schema.json](https://github.com/toluaina/pgsync/blob/main/examples/airbnb/schema.json) for your document representation
|
|
@@ -180,18 +203,18 @@ Key features of PGSync are:
|
|
|
180
203
|
- Negligible impact on database performance.
|
|
181
204
|
- Transactionally consistent output in Elasticsearch/OpenSearch. This means: writes appear only when they are committed to the database, insert, update and delete operations appear in the same order as they were committed (as opposed to eventual consistency).
|
|
182
205
|
- Fault-tolerant: does not lose data, even if processes crash or a network interruption occurs, etc. The process can be recovered from the last checkpoint.
|
|
183
|
-
- Returns the data directly as Postgres JSON from the database for speed.
|
|
206
|
+
- Returns the data directly as Postgres/MySQL/MariaDB JSON from the database for speed.
|
|
184
207
|
- Supports composite primary and foreign keys.
|
|
185
208
|
- Supports Views and Materialized views.
|
|
186
209
|
- Supports an arbitrary depth of nested entities i.e Tables having long chain of relationship dependencies.
|
|
187
|
-
- Supports
|
|
210
|
+
- Supports PostgreSQL/MySQL/MariaDB JSON data fields. This means: we can extract JSON fields in a database table as a separate field in the resulting document.
|
|
188
211
|
- Customizable document structure.
|
|
189
212
|
|
|
190
213
|
|
|
191
214
|
#### Requirements
|
|
192
215
|
|
|
193
216
|
- [Python](https://www.python.org) 3.9+
|
|
194
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
217
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 5.7.22+ or [MariaDB](https://mariadb.org/) 10.5.0+
|
|
195
218
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
196
219
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
197
220
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
@@ -327,7 +350,7 @@ PGSync addresses the following challenges:
|
|
|
327
350
|
- PGSync generates advanced queries matching your schema directly.
|
|
328
351
|
- PGSync allows you to easily rebuild your indexes in case of a schema change.
|
|
329
352
|
- You can expose only the data you require in Elasticsearch/OpenSearch.
|
|
330
|
-
- Supports multiple Postgres schemas for multi-tennant applications.
|
|
353
|
+
- Supports multiple Postgres/MySQL/MariaDB schemas for multi-tennant applications.
|
|
331
354
|
|
|
332
355
|
|
|
333
356
|
#### Contributing
|
|
@@ -335,16 +358,10 @@ PGSync addresses the following challenges:
|
|
|
335
358
|
Contributions are very welcome! Check out the [Contribution](CONTRIBUTING.rst) Guidelines for instructions.
|
|
336
359
|
|
|
337
360
|
|
|
338
|
-
#### Credits
|
|
339
|
-
|
|
340
|
-
- This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter)
|
|
341
|
-
- Elasticsearch is a trademark of Elasticsearch BV, registered in the U.S. and in other countries.
|
|
342
|
-
|
|
343
|
-
|
|
344
361
|
#### License
|
|
345
362
|
|
|
346
363
|
This project is licensed under the terms of the [MIT](https://opensource.org/license/mit/) license.
|
|
347
364
|
Please see [LICENSE](LICENSE) for more details.
|
|
348
365
|
|
|
349
|
-
You should have received a copy of the MIT License along with PGSync
|
|
366
|
+
You should have received a copy of the MIT License along with **PGSync**.
|
|
350
367
|
If not, see https://opensource.org/license/mit/.
|
|
@@ -9,12 +9,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
9
9
|
### Requirements
|
|
10
10
|
|
|
11
11
|
- [Python](https://www.python.org) 3.9+
|
|
12
|
-
- [Postgres](https://www.postgresql.org) 9.6+
|
|
12
|
+
- [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
|
|
13
13
|
- [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
|
|
14
14
|
- [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
|
|
15
15
|
- [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
|
|
16
16
|
|
|
17
|
-
### Postgres
|
|
17
|
+
### Postgres Setup
|
|
18
18
|
|
|
19
19
|
Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
|
|
20
20
|
Postgres setting.
|
|
@@ -25,6 +25,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
|
|
|
25
25
|
|
|
26
26
|
```max_replication_slots = 1```
|
|
27
27
|
|
|
28
|
+
|
|
29
|
+
### MySQL / MariaDB setup
|
|
30
|
+
|
|
31
|
+
- Enable binary logging in your MySQL / MariaDB setting.
|
|
32
|
+
|
|
33
|
+
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
34
|
+
|
|
35
|
+
```server-id = 1``` # any non-zero unique ID
|
|
36
|
+
|
|
37
|
+
```log_bin = mysql-bin```
|
|
38
|
+
|
|
39
|
+
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
40
|
+
|
|
41
|
+
- optional housekeeping:
|
|
42
|
+
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
43
|
+
|
|
44
|
+
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
45
|
+
|
|
46
|
+
```sql
|
|
47
|
+
CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
|
|
48
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
|
|
49
|
+
FLUSH PRIVILEGES;
|
|
50
|
+
```
|
|
51
|
+
|
|
28
52
|
### Installation
|
|
29
53
|
|
|
30
54
|
You can install PGSync from [PyPI](https://pypi.org):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
|
|
3
3
|
"""
|
|
4
|
-
Parallel sync is
|
|
4
|
+
Parallel sync is a feature designed to optimize
|
|
5
5
|
throughput by utilizing available CPUs/threads, particularly beneficial
|
|
6
6
|
in environments experiencing high network latency.
|
|
7
7
|
|
|
@@ -106,7 +106,7 @@ def logical_slot_changes(
|
|
|
106
106
|
txmin: int = sync.checkpoint
|
|
107
107
|
txmax: int = sync.txid_current
|
|
108
108
|
sync.logical_slot_changes(txmin=txmin, txmax=txmax)
|
|
109
|
-
sync.checkpoint
|
|
109
|
+
sync.checkpoint = txmax or sync.txid_current
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
@dataclass
|