pgsync 4.2.1__tar.gz → 6.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pgsync-4.2.1 → pgsync-6.0.0}/PKG-INFO +46 -19
  2. {pgsync-4.2.1 → pgsync-6.0.0}/README.md +69 -26
  3. {pgsync-4.2.1 → pgsync-6.0.0}/README.rst +26 -2
  4. {pgsync-4.2.1 → pgsync-6.0.0}/bin/bootstrap +27 -6
  5. {pgsync-4.2.1 → pgsync-6.0.0}/bin/parallel_sync +35 -9
  6. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/__init__.py +1 -1
  7. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/base.py +226 -83
  8. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/constants.py +5 -3
  9. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/helper.py +10 -3
  10. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/node.py +105 -26
  11. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/plugin.py +2 -2
  12. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/querybuilder.py +103 -61
  13. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/redisqueue.py +9 -3
  14. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/search_client.py +9 -1
  15. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/settings.py +49 -12
  16. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/singleton.py +1 -1
  17. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/sync.py +656 -206
  18. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/trigger.py +24 -4
  19. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/urls.py +35 -7
  20. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/utils.py +135 -23
  21. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/view.py +65 -11
  22. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/PKG-INFO +46 -19
  23. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/requires.txt +16 -14
  24. {pgsync-4.2.1 → pgsync-6.0.0}/setup.py +4 -1
  25. {pgsync-4.2.1 → pgsync-6.0.0}/tests/conftest.py +84 -44
  26. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_base.py +63 -2
  27. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_constants.py +1 -0
  28. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_node.py +20 -10
  29. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_query_builder.py +21 -15
  30. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_search_client.py +3 -0
  31. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_settings.py +5 -5
  32. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync.py +17 -7
  33. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_nested_children.py +43 -6
  34. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_root.py +37 -15
  35. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_child.py +43 -16
  36. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_sync_single_child_fk_on_parent.py +43 -16
  37. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_trigger.py +29 -4
  38. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_unique_behaviour.py +7 -3
  39. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_urls.py +20 -13
  40. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_utils.py +36 -14
  41. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_view.py +8 -2
  42. {pgsync-4.2.1 → pgsync-6.0.0}/AUTHORS.rst +0 -0
  43. {pgsync-4.2.1 → pgsync-6.0.0}/CONTRIBUTING.rst +0 -0
  44. {pgsync-4.2.1 → pgsync-6.0.0}/HISTORY.rst +0 -0
  45. {pgsync-4.2.1 → pgsync-6.0.0}/LICENSE +0 -0
  46. {pgsync-4.2.1 → pgsync-6.0.0}/MANIFEST.in +0 -0
  47. {pgsync-4.2.1 → pgsync-6.0.0}/bin/pgsync +0 -0
  48. {pgsync-4.2.1 → pgsync-6.0.0}/docs/Makefile +0 -0
  49. {pgsync-4.2.1 → pgsync-6.0.0}/docs/authors.rst +0 -0
  50. {pgsync-4.2.1 → pgsync-6.0.0}/docs/changelog.rst +0 -0
  51. {pgsync-4.2.1 → pgsync-6.0.0}/docs/conf.py +0 -0
  52. {pgsync-4.2.1 → pgsync-6.0.0}/docs/contributing.rst +0 -0
  53. {pgsync-4.2.1 → pgsync-6.0.0}/docs/history.rst +0 -0
  54. {pgsync-4.2.1 → pgsync-6.0.0}/docs/index.rst +0 -0
  55. {pgsync-4.2.1 → pgsync-6.0.0}/docs/installation.rst +0 -0
  56. {pgsync-4.2.1 → pgsync-6.0.0}/docs/logo.png +0 -0
  57. {pgsync-4.2.1 → pgsync-6.0.0}/docs/make.bat +0 -0
  58. {pgsync-4.2.1 → pgsync-6.0.0}/docs/readme.rst +0 -0
  59. {pgsync-4.2.1 → pgsync-6.0.0}/docs/usage.rst +0 -0
  60. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/exc.py +0 -0
  61. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync/transform.py +0 -0
  62. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/SOURCES.txt +0 -0
  63. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/dependency_links.txt +0 -0
  64. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/not-zip-safe +0 -0
  65. {pgsync-4.2.1 → pgsync-6.0.0}/pgsync.egg-info/top_level.txt +0 -0
  66. {pgsync-4.2.1 → pgsync-6.0.0}/pyproject.toml +0 -0
  67. {pgsync-4.2.1 → pgsync-6.0.0}/setup.cfg +0 -0
  68. {pgsync-4.2.1 → pgsync-6.0.0}/tests/__init__.py +0 -0
  69. {pgsync-4.2.1 → pgsync-6.0.0}/tests/fixtures/schema.json +0 -0
  70. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_env_vars.py +0 -0
  71. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_helper.py +0 -0
  72. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_log_handlers.py +0 -0
  73. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_redisqueue.py +0 -0
  74. {pgsync-4.2.1 → pgsync-6.0.0}/tests/test_transform.py +0 -0
  75. {pgsync-4.2.1 → pgsync-6.0.0}/tests/testing_utils.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 4.2.1
4
- Summary: Postgres to Elasticsearch/OpenSearch sync
3
+ Version: 6.0.0
4
+ Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
7
7
  Author-email: tolu@pgsync.com
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
13
13
  Project-URL: Source, https://github.com/toluaina/pgsync
14
14
  Project-URL: Web, https://pgsync.com
15
15
  Project-URL: Documentation, https://pgsync.com
16
- Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
16
+ Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres,mysql,mariadb
17
17
  Classifier: Development Status :: 5 - Production/Stable
18
18
  Classifier: Intended Audience :: Developers
19
19
  Classifier: Natural Language :: English
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Classifier: Programming Language :: Python :: Implementation :: CPython
26
27
  Classifier: Programming Language :: Python :: Implementation :: PyPy
27
28
  Classifier: License :: OSI Approved :: MIT License
@@ -32,31 +33,33 @@ License-File: LICENSE
32
33
  License-File: AUTHORS.rst
33
34
  Requires-Dist: async-timeout==5.0.1
34
35
  Requires-Dist: backports-datetime-fromisoformat==2.0.3
35
- Requires-Dist: boto3==1.40.35
36
- Requires-Dist: botocore==1.40.35
37
- Requires-Dist: certifi==2025.8.3
38
- Requires-Dist: charset-normalizer==3.4.3
36
+ Requires-Dist: boto3==1.40.64
37
+ Requires-Dist: botocore==1.40.64
38
+ Requires-Dist: certifi==2025.10.5
39
+ Requires-Dist: charset-normalizer==3.4.4
39
40
  Requires-Dist: click==8.1.8
40
- Requires-Dist: elastic-transport==8.17.1
41
- Requires-Dist: elasticsearch==8.19.1
42
- Requires-Dist: elasticsearch-dsl==8.15.4
43
- Requires-Dist: environs==14.3.0
41
+ Requires-Dist: elastic-transport==9.1.0
42
+ Requires-Dist: elasticsearch==7.17.12
43
+ Requires-Dist: elasticsearch-dsl==7.4.1
44
+ Requires-Dist: environs==14.4.0
44
45
  Requires-Dist: events==0.5
45
- Requires-Dist: greenlet==3.2.4
46
- Requires-Dist: idna==3.10
46
+ Requires-Dist: idna==3.11
47
47
  Requires-Dist: jmespath==1.0.1
48
48
  Requires-Dist: marshmallow==4.0.1
49
+ Requires-Dist: mysql-replication==1.0.9
49
50
  Requires-Dist: opensearch-dsl==2.1.0
50
51
  Requires-Dist: opensearch-py==3.0.0
51
- Requires-Dist: psycopg2-binary==2.9.10
52
+ Requires-Dist: packaging==25.0
53
+ Requires-Dist: psycopg2-binary==2.9.11
54
+ Requires-Dist: pymysql==1.1.2
52
55
  Requires-Dist: python-dateutil==2.9.0.post0
53
- Requires-Dist: python-dotenv==1.1.1
54
- Requires-Dist: redis==6.4.0
56
+ Requires-Dist: python-dotenv==1.2.1
57
+ Requires-Dist: redis==7.0.1
55
58
  Requires-Dist: requests==2.32.5
56
59
  Requires-Dist: requests-aws4auth==1.3.1
57
60
  Requires-Dist: s3transfer==0.14.0
58
61
  Requires-Dist: six==1.17.0
59
- Requires-Dist: sqlalchemy==2.0.43
62
+ Requires-Dist: sqlalchemy==2.0.44
60
63
  Requires-Dist: sqlparse==0.5.3
61
64
  Requires-Dist: typing-extensions==4.15.0
62
65
  Requires-Dist: urllib3==1.26.20
@@ -87,12 +90,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
87
90
  ### Requirements
88
91
 
89
92
  - [Python](https://www.python.org) 3.9+
90
- - [Postgres](https://www.postgresql.org) 9.6+
93
+ - [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
91
94
  - [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
92
95
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
93
96
  - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
94
97
 
95
- ### Postgres setup
98
+ ### Postgres Setup
96
99
 
97
100
  Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
98
101
  Postgres setting.
@@ -103,6 +106,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
103
106
 
104
107
  ```max_replication_slots = 1```
105
108
 
109
+
110
+ ### MySQL / MariaDB setup
111
+
112
+ - Enable binary logging in your MySQL / MariaDB setting.
113
+
114
+ - You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
115
+
116
+ ```server-id = 1``` # any non-zero unique ID
117
+
118
+ ```log_bin = mysql-bin```
119
+
120
+ ```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
121
+
122
+ - optional housekeeping:
123
+ ```binlog_expire_logs_seconds = 604800``` # 7 days
124
+
125
+ - You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
126
+
127
+ ```sql
128
+ CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
129
+ GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
130
+ FLUSH PRIVILEGES;
131
+ ```
132
+
106
133
  ### Installation
107
134
 
108
135
  You can install PGSync from [PyPI](https://pypi.org):
@@ -5,12 +5,13 @@
5
5
  [![Python versions](https://img.shields.io/pypi/pyversions/pgsync)](https://pypi.org/project/pgsync)
6
6
  [![Downloads](https://img.shields.io/pypi/dm/pgsync)](https://pypi.org/project/pgsync)
7
7
  [![codecov](https://codecov.io/gh/toluaina/pgsync/branch/main/graph/badge.svg?token=cvQzYkz6CV)](https://codecov.io/gh/toluaina/pgsync)
8
+ [![Sponsored by DigitalOcean](https://img.shields.io/badge/Sponsored%20by-DigitalOcean-0080FF?logo=digitalocean&logoColor=white)](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
8
9
 
9
10
 
10
- ## PostgreSQL to Elasticsearch/OpenSearch sync
11
+ ## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
11
12
 
12
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
13
- It allows you to keep [Postgres](https://www.postgresql.org) as your source of truth and
13
+ [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) effortlessly.
14
+ It allows you to keep [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) as your source of truth and
14
15
  expose structured denormalized documents in [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
15
16
 
16
17
  Changes to nested entities are propagated to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
@@ -24,7 +25,7 @@ without writing any code.
24
25
  [PGSync](https://pgsync.com) transforms your relational data into a structured document format.
25
26
 
26
27
  It allows you to take advantage of the expressive power and scalability of
27
- [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org).
28
+ [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) directly from [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/).
28
29
  You don't have to write complex queries and transformation pipelines.
29
30
  PGSync is lightweight, flexible and fast.
30
31
 
@@ -43,9 +44,26 @@ Other benefits of PGSync include:
43
44
  - Scale on-demand (multiple consumers)
44
45
  - Easily join multiple nested tables
45
46
 
47
+ ## Sponsors
48
+
49
+ [PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
50
+
51
+ <p>
52
+ <a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
53
+ <img
54
+ src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
55
+ alt="DigitalOcean"
56
+ width="210"
57
+ loading="lazy"
58
+ decoding="async"
59
+ >
60
+ </a>
61
+ </p>
62
+
63
+
46
64
  #### Why?
47
65
 
48
- At a high level, you have data in a Postgres database and you want to mirror it in Elasticsearch/OpenSearch.
66
+ At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
49
67
  This means every change to your data (***Insert***, ***Update***, ***Delete*** and ***Truncate*** statements) needs to be replicated to Elasticsearch/OpenSearch.
50
68
  At first, this seems easy and then it's not. Simply add some code to copy the data to Elasticsearch/OpenSearch after updating the database (or so called dual writes).
51
69
  Writing SQL queries spanning multiple tables and involving multiple relationships are hard to write.
@@ -53,12 +71,11 @@ Detecting changes within a nested document can also be quite hard.
53
71
  Of course, if your data never changed, then you could just take a snapshot in time and load it into Elasticsearch/OpenSearch as a one-off operation.
54
72
 
55
73
  PGSync is appropriate for you if:
56
- - [Postgres](https://www.postgresql.org) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
74
+ - [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
57
75
  read-only search layer.
58
76
  - You need to denormalize relational data into a NoSQL data source.
59
77
  - Your data is constantly changing.
60
- - You have existing data in a relational database such as [Postgres](https://www.postgresql.org) and you need
61
- a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
78
+ - You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
62
79
  - You want to keep your existing data untouched whilst taking advantage of
63
80
  the search capabilities of [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) by exposing a view of your data without compromising the security of your relational data.
64
81
  - Or you simply want to expose a view of your relational data for search purposes.
@@ -66,7 +83,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
66
83
 
67
84
  #### How it works
68
85
 
69
- PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
86
+ PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io)/[Valkey](https://valkey.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org)/[MySQL](https://www.mysql.com/)/[MariaDB](https://mariadb.org/), and [SQLAlchemy](https://www.sqlalchemy.org).
70
87
 
71
88
  PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
72
89
  This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
@@ -92,6 +109,14 @@ There are several ways of installing and trying PGSync
92
109
  - [Running in Docker](#running-in-docker) is the easiest way to get up and running.
93
110
  - [Manual configuration](#manual-configuration)
94
111
 
112
+ #### Book Demo Example (requires a DigitalOcean account)
113
+
114
+ [![Deploy to DO](https://www.deploytodo.com/do-btn-blue.svg)](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
115
+
116
+ Fill in the following during the setup
117
+ - `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
118
+ - `REDIS_URL` e.g. rediss://default:pass@host:port/0
119
+
95
120
 
96
121
  ##### Running in Docker (Using Github Repository)
97
122
 
@@ -137,16 +162,16 @@ To start all services with Docker, follow these steps:
137
162
  Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
138
163
 
139
164
  - redis_host_address — Address of the Redis/Valkey server (e.g., host.docker.internal for local Docker setup)
140
- - username — PostgreSQL username
141
- - password — PostgreSQL password
142
- - postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
143
- - database — Name of PostgreSQL database
165
+ - username — PostgreSQL/MySQL/MariaDB username
166
+ - password — PostgreSQL/MySQL/MariaDB password
167
+ - postgres_host — Host address for PostgreSQL/MySQL/MariaDB instance (e.g., host.docker.internal)
168
+ - database — Name of PostgreSQL/MySQL/MariaDB database
144
169
  - elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
145
170
 
146
171
 
147
172
  ##### Manual configuration
148
173
 
149
- - Setup
174
+ ### Postgres Setup
150
175
  - Ensure the database user is a superuser
151
176
  - Enable logical decoding. You would also need to set up at least two parameters at postgresql.conf
152
177
 
@@ -159,7 +184,31 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
159
184
 
160
185
  ```max_slot_wal_keep_size = 100GB```
161
186
 
162
- - Installation
187
+ ### MySQL / MariaDB setup
188
+
189
+ - Enable binary logging in your MySQL / MariaDB setting.
190
+
191
+ - You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
192
+
193
+ ```server-id = 1``` # any non-zero unique ID
194
+
195
+ ```log_bin = mysql-bin```
196
+
197
+ ```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
198
+
199
+ - optional housekeeping:
200
+ ```binlog_expire_logs_seconds = 604800``` # 7 days
201
+
202
+ - You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
203
+
204
+ ```sql
205
+ CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
206
+ GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
207
+ FLUSH PRIVILEGES;
208
+ ```
209
+
210
+ ### Installation
211
+
163
212
  - Install PGSync from pypi using pip
164
213
  - ```$ pip install pgsync```
165
214
  - Create a [schema.json](https://github.com/toluaina/pgsync/blob/main/examples/airbnb/schema.json) for your document representation
@@ -180,18 +229,18 @@ Key features of PGSync are:
180
229
  - Negligible impact on database performance.
181
230
  - Transactionally consistent output in Elasticsearch/OpenSearch. This means: writes appear only when they are committed to the database, insert, update and delete operations appear in the same order as they were committed (as opposed to eventual consistency).
182
231
  - Fault-tolerant: does not lose data, even if processes crash or a network interruption occurs, etc. The process can be recovered from the last checkpoint.
183
- - Returns the data directly as Postgres JSON from the database for speed.
232
+ - Returns the data directly as Postgres/MySQL/MariaDB JSON from the database for speed.
184
233
  - Supports composite primary and foreign keys.
185
234
  - Supports Views and Materialized views.
186
235
  - Supports an arbitrary depth of nested entities i.e Tables having long chain of relationship dependencies.
187
- - Supports Postgres JSON data fields. This means: we can extract JSON fields in a database table as a separate field in the resulting document.
236
+ - Supports PostgreSQL/MySQL/MariaDB JSON data fields. This means: we can extract JSON fields in a database table as a separate field in the resulting document.
188
237
  - Customizable document structure.
189
238
 
190
239
 
191
240
  #### Requirements
192
241
 
193
242
  - [Python](https://www.python.org) 3.9+
194
- - [Postgres](https://www.postgresql.org) 9.6+
243
+ - [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 5.7.22+ or [MariaDB](https://mariadb.org/) 10.5.0+
195
244
  - [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
196
245
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
197
246
  - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
@@ -327,7 +376,7 @@ PGSync addresses the following challenges:
327
376
  - PGSync generates advanced queries matching your schema directly.
328
377
  - PGSync allows you to easily rebuild your indexes in case of a schema change.
329
378
  - You can expose only the data you require in Elasticsearch/OpenSearch.
330
- - Supports multiple Postgres schemas for multi-tennant applications.
379
+ - Supports multiple Postgres/MySQL/MariaDB schemas for multi-tennant applications.
331
380
 
332
381
 
333
382
  #### Contributing
@@ -335,16 +384,10 @@ PGSync addresses the following challenges:
335
384
  Contributions are very welcome! Check out the [Contribution](CONTRIBUTING.rst) Guidelines for instructions.
336
385
 
337
386
 
338
- #### Credits
339
-
340
- - This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter)
341
- - Elasticsearch is a trademark of Elasticsearch BV, registered in the U.S. and in other countries.
342
-
343
-
344
387
  #### License
345
388
 
346
389
  This project is licensed under the terms of the [MIT](https://opensource.org/license/mit/) license.
347
390
  Please see [LICENSE](LICENSE) for more details.
348
391
 
349
- You should have received a copy of the MIT License along with PGSync.
392
+ You should have received a copy of the MIT License along with **PGSync**.
350
393
  If not, see https://opensource.org/license/mit/.
@@ -9,12 +9,12 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
9
9
  ### Requirements
10
10
 
11
11
  - [Python](https://www.python.org) 3.9+
12
- - [Postgres](https://www.postgresql.org) 9.6+
12
+ - [Postgres](https://www.postgresql.org) 9.6+ or [MySQL](https://www.mysql.com/) 8.0.0+ or [MariaDB](https://mariadb.org/) 12.0.0+
13
13
  - [Redis](https://redis.io) 3.1.0+ or [Valkey](https://valkey.io) 7.2.0+
14
14
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
15
15
  - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
16
16
 
17
- ### Postgres setup
17
+ ### Postgres Setup
18
18
 
19
19
  Enable [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) in your
20
20
  Postgres setting.
@@ -25,6 +25,30 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
25
25
 
26
26
  ```max_replication_slots = 1```
27
27
 
28
+
29
+ ### MySQL / MariaDB setup
30
+
31
+ - Enable binary logging in your MySQL / MariaDB setting.
32
+
33
+ - You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
34
+
35
+ ```server-id = 1``` # any non-zero unique ID
36
+
37
+ ```log_bin = mysql-bin```
38
+
39
+ ```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
40
+
41
+ - optional housekeeping:
42
+ ```binlog_expire_logs_seconds = 604800``` # 7 days
43
+
44
+ - You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
45
+
46
+ ```sql
47
+ CREATE USER 'replicator'@'%' IDENTIFIED WITH mysql_native_password BY 'password';
48
+ GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'@'%';
49
+ FLUSH PRIVILEGES;
50
+ ```
51
+
28
52
  ### Installation
29
53
 
30
54
  You can install PGSync from [PyPI](https://pypi.org):
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
26
26
  default=settings.SCHEMA,
27
27
  show_default=True,
28
28
  cls=MutuallyExclusiveOption,
29
- mutually_exclusive=["s3_schema_url"],
29
+ mutually_exclusive=["s3_schema_url", "schema_url"],
30
+ )
31
+ @click.option(
32
+ "--schema_url",
33
+ help="URL for schema config",
34
+ type=click.STRING,
35
+ default=settings.SCHEMA_URL,
36
+ show_default=True,
37
+ cls=MutuallyExclusiveOption,
38
+ mutually_exclusive=["config", "s3_schema_url"],
30
39
  )
31
40
  @click.option(
32
41
  "--s3_schema_url",
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
35
44
  default=settings.S3_SCHEMA_URL,
36
45
  show_default=True,
37
46
  cls=MutuallyExclusiveOption,
38
- mutually_exclusive=["config"],
47
+ mutually_exclusive=["config", "schema_url"],
39
48
  )
40
49
  @click.option("--host", "-h", help="PG_HOST override")
41
50
  @click.option("--password", is_flag=True, help="Prompt for database password")
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
67
76
  def main(
68
77
  teardown: bool,
69
78
  config: str,
79
+ schema_url: str,
70
80
  s3_schema_url: str,
71
81
  user: str,
72
82
  password: bool,
@@ -75,7 +85,7 @@ def main(
75
85
  verbose: bool,
76
86
  no_create: bool = False,
77
87
  ) -> None:
78
- """Application onetime Bootstrap."""
88
+ """Application onetime bootstrap."""
79
89
  kwargs: dict = {
80
90
  "user": user,
81
91
  "host": host,
@@ -89,13 +99,24 @@ def main(
89
99
  )
90
100
  kwargs = {key: value for key, value in kwargs.items() if value is not None}
91
101
 
92
- validate_config(config=config, s3_schema_url=s3_schema_url)
102
+ validate_config(
103
+ config=config,
104
+ schema_url=schema_url,
105
+ s3_schema_url=s3_schema_url,
106
+ )
93
107
 
94
- show_settings(config=config, s3_schema_url=s3_schema_url)
108
+ show_settings(
109
+ config=config,
110
+ schema_url=schema_url,
111
+ s3_schema_url=s3_schema_url,
112
+ **kwargs,
113
+ )
95
114
 
96
115
  validate: bool = False if teardown else True
97
116
 
98
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
117
+ for doc in config_loader(
118
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
119
+ ):
99
120
  sync: Sync = Sync(
100
121
  doc,
101
122
  verbose=verbose,
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
 
3
3
  """
4
- Parallel sync is an innovative, experimental feature designed to optimize
4
+ Parallel sync is a feature designed to optimize
5
5
  throughput by utilizing available CPUs/threads, particularly beneficial
6
6
  in environments experiencing high network latency.
7
7
 
@@ -53,7 +53,13 @@ from threading import Thread
53
53
  import click
54
54
  import sqlalchemy as sa
55
55
 
56
- from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
56
+ from pgsync.settings import (
57
+ BLOCK_SIZE,
58
+ CHECKPOINT_PATH,
59
+ S3_SCHEMA_URL,
60
+ SCHEMA,
61
+ SCHEMA_URL,
62
+ )
57
63
  from pgsync.sync import Sync
58
64
  from pgsync.utils import (
59
65
  config_loader,
@@ -106,7 +112,7 @@ def logical_slot_changes(
106
112
  txmin: int = sync.checkpoint
107
113
  txmax: int = sync.txid_current
108
114
  sync.logical_slot_changes(txmin=txmin, txmax=txmax)
109
- sync.checkpoint: int = txmax or sync.txid_current
115
+ sync.checkpoint = txmax or sync.txid_current
110
116
 
111
117
 
112
118
  @dataclass
@@ -386,7 +392,16 @@ def run_task(
386
392
  default=SCHEMA,
387
393
  show_default=True,
388
394
  cls=MutuallyExclusiveOption,
389
- mutually_exclusive=["s3_schema_url"],
395
+ mutually_exclusive=["s3_schema_url", "schema_url"],
396
+ )
397
+ @click.option(
398
+ "--schema_url",
399
+ help="URL for schema config",
400
+ type=click.STRING,
401
+ default=SCHEMA_URL,
402
+ show_default=True,
403
+ cls=MutuallyExclusiveOption,
404
+ mutually_exclusive=["config", "s3_schema_url"],
390
405
  )
391
406
  @click.option(
392
407
  "--s3_schema_url",
@@ -395,7 +410,7 @@ def run_task(
395
410
  default=S3_SCHEMA_URL,
396
411
  show_default=True,
397
412
  cls=MutuallyExclusiveOption,
398
- mutually_exclusive=["config"],
413
+ mutually_exclusive=["config", "schema_url"],
399
414
  )
400
415
  @click.option(
401
416
  "--verbose",
@@ -428,7 +443,12 @@ def run_task(
428
443
  default="multiprocess_async",
429
444
  )
430
445
  def main(
431
- config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
446
+ config: str,
447
+ schema_url: str,
448
+ s3_schema_url: str,
449
+ nprocs: int,
450
+ mode: str,
451
+ verbose: bool,
432
452
  ) -> None:
433
453
  """
434
454
  TODO:
@@ -436,11 +456,17 @@ def main(
436
456
  - Handle KeyboardInterrupt Exception
437
457
  """
438
458
 
439
- validate_config(config=config, s3_schema_url=s3_schema_url)
459
+ validate_config(
460
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
461
+ )
440
462
 
441
- show_settings(config=config, s3_schema_url=s3_schema_url)
463
+ show_settings(
464
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
465
+ )
442
466
 
443
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
467
+ for doc in config_loader(
468
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
469
+ ):
444
470
  tasks: t.Generator = fetch_tasks(doc)
445
471
  if mode == "synchronous":
446
472
  synchronous(tasks, doc, verbose=verbose)
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "4.2.1"
5
+ __version__ = "6.0.0"