pgsync 6.0.0__tar.gz → 6.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-6.0.0 → pgsync-6.1.0}/PKG-INFO +7 -7
- {pgsync-6.0.0 → pgsync-6.1.0}/README.md +32 -29
- {pgsync-6.0.0 → pgsync-6.1.0}/README.rst +1 -1
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/__init__.py +1 -1
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/base.py +18 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/helper.py +1 -1
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/sync.py +286 -133
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/utils.py +4 -1
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/PKG-INFO +7 -7
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/requires.txt +5 -5
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_sync.py +8 -5
- {pgsync-6.0.0 → pgsync-6.1.0}/AUTHORS.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/CONTRIBUTING.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/HISTORY.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/LICENSE +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/MANIFEST.in +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/bin/bootstrap +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/bin/parallel_sync +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/bin/pgsync +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/Makefile +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/authors.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/changelog.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/conf.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/contributing.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/history.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/index.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/installation.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/logo.png +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/make.bat +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/readme.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/docs/usage.rst +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/constants.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/exc.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/node.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/plugin.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/querybuilder.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/redisqueue.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/search_client.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/settings.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/singleton.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/transform.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/trigger.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/urls.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync/view.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/pyproject.toml +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/setup.cfg +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/setup.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/__init__.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/conftest.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/fixtures/schema.json +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_base.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_constants.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_env_vars.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_helper.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_log_handlers.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_node.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_query_builder.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_redisqueue.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_search_client.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_settings.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_sync_nested_children.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_sync_root.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_transform.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_trigger.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_unique_behaviour.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_urls.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_utils.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/test_view.py +0 -0
- {pgsync-6.0.0 → pgsync-6.1.0}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -33,9 +33,9 @@ License-File: LICENSE
|
|
|
33
33
|
License-File: AUTHORS.rst
|
|
34
34
|
Requires-Dist: async-timeout==5.0.1
|
|
35
35
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
36
|
-
Requires-Dist: boto3==1.
|
|
37
|
-
Requires-Dist: botocore==1.
|
|
38
|
-
Requires-Dist: certifi==2025.
|
|
36
|
+
Requires-Dist: boto3==1.41.2
|
|
37
|
+
Requires-Dist: botocore==1.41.2
|
|
38
|
+
Requires-Dist: certifi==2025.11.12
|
|
39
39
|
Requires-Dist: charset-normalizer==3.4.4
|
|
40
40
|
Requires-Dist: click==8.1.8
|
|
41
41
|
Requires-Dist: elastic-transport==9.1.0
|
|
@@ -46,7 +46,7 @@ Requires-Dist: events==0.5
|
|
|
46
46
|
Requires-Dist: idna==3.11
|
|
47
47
|
Requires-Dist: jmespath==1.0.1
|
|
48
48
|
Requires-Dist: marshmallow==4.0.1
|
|
49
|
-
Requires-Dist: mysql-replication==1.0.
|
|
49
|
+
Requires-Dist: mysql-replication==1.0.12
|
|
50
50
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
51
51
|
Requires-Dist: opensearch-py==3.0.0
|
|
52
52
|
Requires-Dist: packaging==25.0
|
|
@@ -57,7 +57,7 @@ Requires-Dist: python-dotenv==1.2.1
|
|
|
57
57
|
Requires-Dist: redis==7.0.1
|
|
58
58
|
Requires-Dist: requests==2.32.5
|
|
59
59
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
60
|
-
Requires-Dist: s3transfer==0.
|
|
60
|
+
Requires-Dist: s3transfer==0.15.0
|
|
61
61
|
Requires-Dist: six==1.17.0
|
|
62
62
|
Requires-Dist: sqlalchemy==2.0.44
|
|
63
63
|
Requires-Dist: sqlparse==0.5.3
|
|
@@ -79,7 +79,7 @@ Dynamic: requires-dist
|
|
|
79
79
|
Dynamic: requires-python
|
|
80
80
|
Dynamic: summary
|
|
81
81
|
|
|
82
|
-
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
82
|
+
# PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
83
83
|
|
|
84
84
|
|
|
85
85
|
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
|
|
@@ -73,7 +73,7 @@ Of course, if your data never changed, then you could just take a snapshot in ti
|
|
|
73
73
|
PGSync is appropriate for you if:
|
|
74
74
|
- [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
|
|
75
75
|
read-only search layer.
|
|
76
|
-
- You need to denormalize relational data into a NoSQL data source.
|
|
76
|
+
- You need to denormalize relational data into a NoSQL data source like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
|
|
77
77
|
- Your data is constantly changing.
|
|
78
78
|
- You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
|
|
79
79
|
- You want to keep your existing data untouched whilst taking advantage of
|
|
@@ -99,7 +99,8 @@ PGSync operates in an event-driven model by creating triggers for tables in your
|
|
|
99
99
|
|
|
100
100
|
*This is the only time PGSync will ever make any changes to your database.*
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
>**NOTE**: **If you change the structure of your PGSync schema config, it's recommended and in most cases necessary to rebuild your Elasticsearch/OpenSearch indices.**
|
|
103
|
+
|
|
103
104
|
There are plans to support zero-downtime migrations to streamline this process.
|
|
104
105
|
|
|
105
106
|
|
|
@@ -187,7 +188,6 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
187
188
|
### MySQL / MariaDB setup
|
|
188
189
|
|
|
189
190
|
- Enable binary logging in your MySQL / MariaDB setting.
|
|
190
|
-
|
|
191
191
|
- You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
|
|
192
192
|
|
|
193
193
|
```server-id = 1``` # any non-zero unique ID
|
|
@@ -195,10 +195,8 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
195
195
|
```log_bin = mysql-bin```
|
|
196
196
|
|
|
197
197
|
```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
|
|
198
|
-
|
|
199
198
|
- optional housekeeping:
|
|
200
199
|
```binlog_expire_logs_seconds = 604800``` # 7 days
|
|
201
|
-
|
|
202
200
|
- You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
|
|
203
201
|
|
|
204
202
|
```sql
|
|
@@ -224,17 +222,19 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
|
|
|
224
222
|
|
|
225
223
|
Key features of PGSync are:
|
|
226
224
|
|
|
227
|
-
- Easily denormalize relational data
|
|
228
|
-
- Works with any PostgreSQL database (
|
|
229
|
-
- Negligible impact on database performance
|
|
230
|
-
- Transactionally consistent output in Elasticsearch/OpenSearch
|
|
231
|
-
-
|
|
232
|
-
-
|
|
233
|
-
-
|
|
234
|
-
-
|
|
235
|
-
- Supports
|
|
236
|
-
- Supports
|
|
237
|
-
-
|
|
225
|
+
- Easily denormalize relational data
|
|
226
|
+
- Works with any PostgreSQL database (9.6 or later)
|
|
227
|
+
- Negligible impact on database performance
|
|
228
|
+
- Transactionally consistent output in Elasticsearch/OpenSearch:
|
|
229
|
+
- Writes appear only after they’re committed
|
|
230
|
+
- Inserts, updates, and deletes appear in commit order (not eventually)
|
|
231
|
+
- Fault-tolerant: no data loss even on crashes or network issues; processing resumes from the last checkpoint
|
|
232
|
+
- Returns data directly as PostgreSQL/MySQL/MariaDB JSON for speed
|
|
233
|
+
- Supports composite primary and foreign keys
|
|
234
|
+
- Supports views and materialized views
|
|
235
|
+
- Handles arbitrarily deep nesting of related tables
|
|
236
|
+
- Supports PostgreSQL/MySQL/MariaDB JSON fields, allowing JSON properties to be extracted as separate document fields
|
|
237
|
+
- Customizable document structure
|
|
238
238
|
|
|
239
239
|
|
|
240
240
|
#### Requirements
|
|
@@ -360,23 +360,26 @@ e.g
|
|
|
360
360
|
}
|
|
361
361
|
```
|
|
362
362
|
|
|
363
|
-
PGSync addresses
|
|
364
|
-
|
|
365
|
-
-
|
|
366
|
-
-
|
|
367
|
-
-
|
|
368
|
-
-
|
|
363
|
+
PGSync addresses common data consistency challenges, such as:
|
|
364
|
+
|
|
365
|
+
- Updating an author's name in the database
|
|
366
|
+
- Adding an additional author to an existing book
|
|
367
|
+
- Changing an author's name across many existing documents
|
|
368
|
+
- Deleting or updating an author record
|
|
369
|
+
- Truncating an entire table and keeping indexes in sync
|
|
369
370
|
|
|
370
371
|
|
|
371
372
|
#### Benefits
|
|
372
373
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
-
|
|
376
|
-
-
|
|
377
|
-
-
|
|
378
|
-
-
|
|
379
|
-
-
|
|
374
|
+
PGSync is a simple, out-of-the-box solution for change data capture, designed to minimize development effort and keep your search indexes in sync.
|
|
375
|
+
|
|
376
|
+
- Handles data deletions automatically.
|
|
377
|
+
- Requires minimal setup. Just define a schema config that describes your data.
|
|
378
|
+
- Generates advanced queries directly from your schema.
|
|
379
|
+
- Makes it easy to rebuild indexes after schema changes.
|
|
380
|
+
- Lets you expose only the data you need in Elasticsearch/OpenSearch.
|
|
381
|
+
- Supports multiple Postgres/MySQL/MariaDB schemas for multi-tenant applications.
|
|
382
|
+
|
|
380
383
|
|
|
381
384
|
|
|
382
385
|
#### Contributing
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
1
|
+
# PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
|
|
@@ -8,8 +8,11 @@ import time
|
|
|
8
8
|
import typing as t
|
|
9
9
|
from contextlib import contextmanager
|
|
10
10
|
|
|
11
|
+
import psycopg2
|
|
11
12
|
import sqlalchemy as sa
|
|
13
|
+
from psycopg2.extras import LogicalReplicationConnection
|
|
12
14
|
from sqlalchemy.dialects import postgresql # noqa
|
|
15
|
+
from sqlalchemy.engine.url import make_url
|
|
13
16
|
from sqlalchemy.orm import sessionmaker
|
|
14
17
|
|
|
15
18
|
from .constants import (
|
|
@@ -746,6 +749,21 @@ class Base(object):
|
|
|
746
749
|
)
|
|
747
750
|
)[0]
|
|
748
751
|
|
|
752
|
+
def get_replication_connection(
|
|
753
|
+
self, engine: sa.engine.Engine
|
|
754
|
+
) -> psycopg2.extensions.connection:
|
|
755
|
+
url: sa.engine.URL = make_url(str(engine.url))
|
|
756
|
+
# Build a libpq-style connection by keyword args
|
|
757
|
+
conn: psycopg2.extensions.connection = psycopg2.connect(
|
|
758
|
+
host=url.host,
|
|
759
|
+
port=url.port or 5432,
|
|
760
|
+
user=url.username,
|
|
761
|
+
password=url.password,
|
|
762
|
+
dbname=url.database,
|
|
763
|
+
connection_factory=LogicalReplicationConnection,
|
|
764
|
+
)
|
|
765
|
+
return conn
|
|
766
|
+
|
|
749
767
|
def logical_slot_get_changes(
|
|
750
768
|
self,
|
|
751
769
|
slot_name: str,
|
|
@@ -95,6 +95,7 @@ class Sync(Base, metaclass=Singleton):
|
|
|
95
95
|
producer: bool = True,
|
|
96
96
|
consumer: bool = True,
|
|
97
97
|
bootstrap: bool = False,
|
|
98
|
+
wal: bool = False,
|
|
98
99
|
**kwargs,
|
|
99
100
|
) -> None:
|
|
100
101
|
"""Constructor."""
|
|
@@ -119,12 +120,13 @@ class Sync(Base, metaclass=Singleton):
|
|
|
119
120
|
self.producer: bool = producer
|
|
120
121
|
self.consumer: bool = consumer
|
|
121
122
|
self.num_workers: int = num_workers
|
|
122
|
-
|
|
123
|
+
# Redis not required in wal or polling mode
|
|
124
|
+
self._redis: t.Optional[RedisQueue] = None
|
|
123
125
|
self.tree: Tree = Tree(
|
|
124
126
|
self.models, nodes=self.nodes, database=doc["database"]
|
|
125
127
|
)
|
|
126
128
|
if bootstrap:
|
|
127
|
-
self.setup()
|
|
129
|
+
self.setup(wal, polling)
|
|
128
130
|
|
|
129
131
|
if validate:
|
|
130
132
|
self.validate(repl_slots=repl_slots, polling=polling)
|
|
@@ -137,6 +139,8 @@ class Sync(Base, metaclass=Singleton):
|
|
|
137
139
|
self.count: dict = dict(xlog=0, db=0, redis=0)
|
|
138
140
|
self.tasks: t.List[asyncio.Task] = []
|
|
139
141
|
self.lock: threading.Lock = threading.Lock()
|
|
142
|
+
# holds Payload objects across multiple consume() calls
|
|
143
|
+
self._buffer: list["Payload"] = []
|
|
140
144
|
|
|
141
145
|
@property
|
|
142
146
|
def slot_name(self) -> str:
|
|
@@ -147,6 +151,16 @@ class Sync(Base, metaclass=Singleton):
|
|
|
147
151
|
def checkpoint_file(self) -> str:
|
|
148
152
|
return os.path.join(settings.CHECKPOINT_PATH, f".{self.__name}")
|
|
149
153
|
|
|
154
|
+
@property
|
|
155
|
+
def redis(self) -> t.Optional[RedisQueue]:
|
|
156
|
+
"""Return the Redis queue instance."""
|
|
157
|
+
if self._redis is None:
|
|
158
|
+
try:
|
|
159
|
+
self._redis = RedisQueue(self.__name)
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
return self._redis
|
|
163
|
+
|
|
150
164
|
def validate(self, repl_slots: bool = True, polling: bool = False) -> None:
|
|
151
165
|
"""Perform all validation right away."""
|
|
152
166
|
|
|
@@ -161,45 +175,51 @@ class Sync(Base, metaclass=Singleton):
|
|
|
161
175
|
if self.index is None:
|
|
162
176
|
raise ValueError("Index is missing for doc")
|
|
163
177
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
178
|
+
# replication slot not needed in polling or mysql
|
|
179
|
+
if not self.is_mysql_compat and not polling:
|
|
180
|
+
max_replication_slots: t.Optional[str] = self.pg_settings(
|
|
181
|
+
"max_replication_slots"
|
|
182
|
+
)
|
|
183
|
+
try:
|
|
184
|
+
if int(max_replication_slots) < 1:
|
|
185
|
+
raise TypeError
|
|
186
|
+
except TypeError:
|
|
187
|
+
raise RuntimeError(
|
|
188
|
+
"Ensure there is at least one replication slot defined "
|
|
189
|
+
"by setting max_replication_slots = 1"
|
|
168
190
|
)
|
|
169
|
-
try:
|
|
170
|
-
if int(max_replication_slots) < 1:
|
|
171
|
-
raise TypeError
|
|
172
|
-
except TypeError:
|
|
173
|
-
raise RuntimeError(
|
|
174
|
-
"Ensure there is at least one replication slot defined "
|
|
175
|
-
"by setting max_replication_slots = 1"
|
|
176
|
-
)
|
|
177
191
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
192
|
+
wal_level: t.Optional[str] = self.pg_settings("wal_level")
|
|
193
|
+
if not wal_level or wal_level.lower() != "logical":
|
|
194
|
+
raise RuntimeError(
|
|
195
|
+
"Enable logical decoding by setting wal_level = logical"
|
|
196
|
+
)
|
|
183
197
|
|
|
184
|
-
|
|
198
|
+
self._can_create_replication_slot("_tmp_")
|
|
185
199
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
200
|
+
rds_logical_replication: t.Optional[str] = self.pg_settings(
|
|
201
|
+
"rds.logical_replication"
|
|
202
|
+
)
|
|
203
|
+
if (
|
|
204
|
+
rds_logical_replication
|
|
205
|
+
and rds_logical_replication.lower() == "off"
|
|
206
|
+
):
|
|
207
|
+
raise RDSError("rds.logical_replication is not enabled")
|
|
194
208
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
209
|
+
# ensure we have run bootstrap and the replication slot exists
|
|
210
|
+
if repl_slots and not self.replication_slots(self.__name):
|
|
211
|
+
raise RuntimeError(
|
|
212
|
+
f'Replication slot "{self.__name}" does not exist.\n'
|
|
213
|
+
f'Make sure you have run the "bootstrap" command.'
|
|
214
|
+
)
|
|
201
215
|
|
|
202
|
-
if
|
|
216
|
+
if settings.REDIS_CHECKPOINT:
|
|
217
|
+
# ensure Redis is reachable
|
|
218
|
+
try:
|
|
219
|
+
self.redis.ping()
|
|
220
|
+
except Exception as e:
|
|
221
|
+
raise RuntimeError(f"Cannot reach Redis: {e}")
|
|
222
|
+
else:
|
|
203
223
|
# ensure the checkpoint dirpath is valid
|
|
204
224
|
if not Path(settings.CHECKPOINT_PATH).exists():
|
|
205
225
|
raise RuntimeError(
|
|
@@ -300,8 +320,12 @@ class Sync(Base, metaclass=Singleton):
|
|
|
300
320
|
routing=self.routing,
|
|
301
321
|
)
|
|
302
322
|
|
|
303
|
-
def setup(
|
|
304
|
-
|
|
323
|
+
def setup(
|
|
324
|
+
self, no_create: bool = False, wal: bool = False, polling: bool = False
|
|
325
|
+
) -> None:
|
|
326
|
+
"""Create the database triggers and replication slot.
|
|
327
|
+
Generally bootstrap should not require Redis as it is optional in certain cases.
|
|
328
|
+
"""
|
|
305
329
|
if self.is_mysql_compat:
|
|
306
330
|
raise NotImplementedError(
|
|
307
331
|
"Setup is not supported for MySQL-family backend (MySQL or MariaDB)"
|
|
@@ -318,75 +342,85 @@ class Sync(Base, metaclass=Singleton):
|
|
|
318
342
|
|
|
319
343
|
self.teardown(drop_view=False)
|
|
320
344
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
345
|
+
if not polling:
|
|
346
|
+
for schema in self.schemas:
|
|
347
|
+
# TODO: move if_not_exists to the function
|
|
348
|
+
if if_not_exists or not self.function_exists(schema):
|
|
349
|
+
|
|
350
|
+
self.create_function(schema)
|
|
351
|
+
|
|
352
|
+
tables: t.Set = set()
|
|
353
|
+
# tables with user defined foreign keys
|
|
354
|
+
user_defined_fkey_tables: dict = {}
|
|
355
|
+
node_columns: dict = {}
|
|
356
|
+
|
|
357
|
+
for node in self.tree.traverse_breadth_first():
|
|
358
|
+
if node.schema != schema:
|
|
359
|
+
continue
|
|
360
|
+
tables |= set(
|
|
361
|
+
[
|
|
362
|
+
through.table
|
|
363
|
+
for through in node.relationship.throughs
|
|
364
|
+
]
|
|
365
|
+
)
|
|
366
|
+
tables |= set([node.table])
|
|
367
|
+
# we also need to bootstrap the base tables
|
|
368
|
+
tables |= set(node.base_tables)
|
|
369
|
+
node_columns[node.table] = set(
|
|
370
|
+
[
|
|
371
|
+
re.split(
|
|
372
|
+
rf"\s*({'|'.join(re.escape(op) for op in JSONB_OPERATORS)})\s*",
|
|
373
|
+
c,
|
|
374
|
+
maxsplit=1,
|
|
375
|
+
)[0]
|
|
376
|
+
for c in node.column_names
|
|
377
|
+
]
|
|
378
|
+
)
|
|
379
|
+
# we want to get both the parent and the child keys here
|
|
380
|
+
# even though only one of them is the foreign_key.
|
|
381
|
+
# this is because we define both in the schema but
|
|
382
|
+
# do not specify which table is the foreign key.
|
|
383
|
+
columns: list = []
|
|
384
|
+
if node.relationship.foreign_key.parent:
|
|
385
|
+
columns.extend(
|
|
386
|
+
node.relationship.foreign_key.parent
|
|
387
|
+
)
|
|
388
|
+
if node.relationship.foreign_key.child:
|
|
389
|
+
columns.extend(node.relationship.foreign_key.child)
|
|
390
|
+
if columns:
|
|
391
|
+
user_defined_fkey_tables.setdefault(
|
|
392
|
+
node.table, set()
|
|
393
|
+
)
|
|
394
|
+
user_defined_fkey_tables[node.table] |= set(
|
|
395
|
+
columns
|
|
396
|
+
)
|
|
397
|
+
if tables:
|
|
398
|
+
if if_not_exists or not self.view_exists(
|
|
399
|
+
MATERIALIZED_VIEW, schema
|
|
400
|
+
):
|
|
401
|
+
self.create_view(
|
|
402
|
+
self.index,
|
|
403
|
+
schema,
|
|
404
|
+
tables,
|
|
405
|
+
user_defined_fkey_tables,
|
|
406
|
+
node_columns,
|
|
407
|
+
)
|
|
331
408
|
|
|
332
|
-
|
|
333
|
-
if node.schema != schema:
|
|
334
|
-
continue
|
|
335
|
-
tables |= set(
|
|
336
|
-
[
|
|
337
|
-
through.table
|
|
338
|
-
for through in node.relationship.throughs
|
|
339
|
-
]
|
|
340
|
-
)
|
|
341
|
-
tables |= set([node.table])
|
|
342
|
-
# we also need to bootstrap the base tables
|
|
343
|
-
tables |= set(node.base_tables)
|
|
344
|
-
node_columns[node.table] = set(
|
|
345
|
-
[
|
|
346
|
-
re.split(
|
|
347
|
-
rf"\s*({'|'.join(re.escape(op) for op in JSONB_OPERATORS)})\s*",
|
|
348
|
-
c,
|
|
349
|
-
maxsplit=1,
|
|
350
|
-
)[0]
|
|
351
|
-
for c in node.column_names
|
|
352
|
-
]
|
|
353
|
-
)
|
|
354
|
-
# we want to get both the parent and the child keys here
|
|
355
|
-
# even though only one of them is the foreign_key.
|
|
356
|
-
# this is because we define both in the schema but
|
|
357
|
-
# do not specify which table is the foreign key.
|
|
358
|
-
columns: list = []
|
|
359
|
-
if node.relationship.foreign_key.parent:
|
|
360
|
-
columns.extend(node.relationship.foreign_key.parent)
|
|
361
|
-
if node.relationship.foreign_key.child:
|
|
362
|
-
columns.extend(node.relationship.foreign_key.child)
|
|
363
|
-
if columns:
|
|
364
|
-
user_defined_fkey_tables.setdefault(node.table, set())
|
|
365
|
-
user_defined_fkey_tables[node.table] |= set(columns)
|
|
366
|
-
if tables:
|
|
367
|
-
if if_not_exists or not self.view_exists(
|
|
368
|
-
MATERIALIZED_VIEW, schema
|
|
369
|
-
):
|
|
370
|
-
self.create_view(
|
|
371
|
-
self.index,
|
|
409
|
+
self.create_triggers(
|
|
372
410
|
schema,
|
|
373
|
-
tables,
|
|
374
|
-
|
|
375
|
-
|
|
411
|
+
tables=tables,
|
|
412
|
+
join_queries=join_queries,
|
|
413
|
+
if_not_exists=if_not_exists,
|
|
376
414
|
)
|
|
377
415
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
tables=tables,
|
|
381
|
-
join_queries=join_queries,
|
|
382
|
-
if_not_exists=if_not_exists,
|
|
383
|
-
)
|
|
384
|
-
|
|
385
|
-
if if_not_exists or not self.replication_slots(self.__name):
|
|
416
|
+
if not wal:
|
|
417
|
+
if if_not_exists or not self.replication_slots(self.__name):
|
|
386
418
|
|
|
387
|
-
|
|
419
|
+
self.create_replication_slot(self.__name)
|
|
388
420
|
|
|
389
|
-
def teardown(
|
|
421
|
+
def teardown(
|
|
422
|
+
self, drop_view: bool = True, polling: bool = False, wal: bool = False
|
|
423
|
+
) -> None:
|
|
390
424
|
"""Drop the database triggers and replication slot."""
|
|
391
425
|
if self.is_mysql_compat:
|
|
392
426
|
raise NotImplementedError(
|
|
@@ -405,28 +439,35 @@ class Sync(Base, metaclass=Singleton):
|
|
|
405
439
|
f"Checkpoint file not found: {self.checkpoint_file}"
|
|
406
440
|
)
|
|
407
441
|
|
|
408
|
-
|
|
442
|
+
try:
|
|
443
|
+
if self._redis is None:
|
|
444
|
+
raise RuntimeError("Redis is not configured.")
|
|
445
|
+
self.redis.delete()
|
|
446
|
+
except Exception as e:
|
|
447
|
+
logger.warning(f"Could not clear Redis checkpoint queue: {e}")
|
|
409
448
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
449
|
+
if not polling:
|
|
450
|
+
for schema in self.schemas:
|
|
451
|
+
tables: t.Set = set()
|
|
452
|
+
for node in self.tree.traverse_breadth_first():
|
|
453
|
+
tables |= set(
|
|
454
|
+
[
|
|
455
|
+
through.table
|
|
456
|
+
for through in node.relationship.throughs
|
|
457
|
+
]
|
|
458
|
+
)
|
|
459
|
+
tables |= set([node.table])
|
|
460
|
+
# we also need to teardown the base tables
|
|
461
|
+
tables |= set(node.base_tables)
|
|
462
|
+
self.drop_triggers(
|
|
463
|
+
schema=schema, tables=tables, join_queries=join_queries
|
|
418
464
|
)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
self.drop_triggers(
|
|
423
|
-
schema=schema, tables=tables, join_queries=join_queries
|
|
424
|
-
)
|
|
425
|
-
if drop_view:
|
|
426
|
-
self.drop_view(schema)
|
|
427
|
-
self.drop_function(schema)
|
|
465
|
+
if drop_view:
|
|
466
|
+
self.drop_view(schema)
|
|
467
|
+
self.drop_function(schema)
|
|
428
468
|
|
|
429
|
-
|
|
469
|
+
if not wal:
|
|
470
|
+
self.drop_replication_slot(self.__name)
|
|
430
471
|
|
|
431
472
|
def get_doc_id(self, primary_keys: t.List[str], table: str) -> str:
|
|
432
473
|
"""
|
|
@@ -571,7 +612,7 @@ class Sync(Base, metaclass=Singleton):
|
|
|
571
612
|
is_mariadb: bool = getattr(conn.dialect, "is_mariadb", False)
|
|
572
613
|
|
|
573
614
|
def _conn_settings_from_engine(engine: sa.Engine) -> dict:
|
|
574
|
-
url = engine.url
|
|
615
|
+
url: sa.engine.URL = engine.url
|
|
575
616
|
return {
|
|
576
617
|
"host": url.host,
|
|
577
618
|
"port": int(url.port),
|
|
@@ -581,9 +622,9 @@ class Sync(Base, metaclass=Singleton):
|
|
|
581
622
|
"autocommit": True,
|
|
582
623
|
}
|
|
583
624
|
|
|
584
|
-
base = _conn_settings_from_engine(self.engine)
|
|
585
|
-
connection_settings = dict(base) # replication socket
|
|
586
|
-
ctl_connection_settings = dict(base)
|
|
625
|
+
base: dict = _conn_settings_from_engine(self.engine)
|
|
626
|
+
connection_settings: dict = dict(base) # replication socket
|
|
627
|
+
ctl_connection_settings: dict = dict(base)
|
|
587
628
|
ctl_connection_settings["cursorclass"] = (
|
|
588
629
|
pymysql.cursors.Cursor
|
|
589
630
|
) # tuple rows
|
|
@@ -607,11 +648,11 @@ class Sync(Base, metaclass=Singleton):
|
|
|
607
648
|
freeze_schema=False,
|
|
608
649
|
)
|
|
609
650
|
|
|
610
|
-
current = 0
|
|
611
|
-
total = None
|
|
651
|
+
current: int = 0
|
|
652
|
+
total: t.Optional[int] = None
|
|
612
653
|
batch: list = []
|
|
613
654
|
last_key: t.Optional[tuple[str, str]] = None
|
|
614
|
-
batch_limit = limit
|
|
655
|
+
batch_limit: int = limit
|
|
615
656
|
|
|
616
657
|
# Single-save checkpoint snapshot
|
|
617
658
|
save_file: t.Optional[str] = start_log
|
|
@@ -652,7 +693,7 @@ class Sync(Base, metaclass=Singleton):
|
|
|
652
693
|
self.engine, schema, table, row.get("values")
|
|
653
694
|
),
|
|
654
695
|
)
|
|
655
|
-
key = (payload.tg_op, payload.table)
|
|
696
|
+
key: tuple[str, str] = (payload.tg_op, payload.table)
|
|
656
697
|
if last_key is None or key == last_key:
|
|
657
698
|
batch.append(payload)
|
|
658
699
|
else:
|
|
@@ -1836,6 +1877,97 @@ class Sync(Base, metaclass=Singleton):
|
|
|
1836
1877
|
|
|
1837
1878
|
self._truncate = True
|
|
1838
1879
|
|
|
1880
|
+
def _flush_buffer(
|
|
1881
|
+
self,
|
|
1882
|
+
cursor: t.Any,
|
|
1883
|
+
flush_lsn: t.Optional[str] = None,
|
|
1884
|
+
force_ack: bool = False,
|
|
1885
|
+
) -> None:
|
|
1886
|
+
# If we have buffered docs, send them
|
|
1887
|
+
if self._buffer:
|
|
1888
|
+
logger.info(f"flushing buffer with {len(self._buffer)} docs")
|
|
1889
|
+
docs: list = []
|
|
1890
|
+
for (op, tbl), run in groupby(
|
|
1891
|
+
self._buffer,
|
|
1892
|
+
key=lambda payload: (payload.tg_op, payload.table),
|
|
1893
|
+
):
|
|
1894
|
+
batch: list = list(run)
|
|
1895
|
+
logger.info(f"bulk group op={op} tbl={tbl} size={len(batch)}")
|
|
1896
|
+
docs.extend(self._payloads(batch))
|
|
1897
|
+
|
|
1898
|
+
if docs:
|
|
1899
|
+
processed: int = len(self._buffer)
|
|
1900
|
+
logger.info(f"sending bulk of {len(docs)} docs")
|
|
1901
|
+
self.search_client.bulk(self.index, docs)
|
|
1902
|
+
self.count["xlog"] += processed
|
|
1903
|
+
logger.info(f"sent bulk of {len(docs)} docs")
|
|
1904
|
+
|
|
1905
|
+
# if caller didn't provide a flush_lsn, then fall back to last buffered row
|
|
1906
|
+
if flush_lsn is None:
|
|
1907
|
+
flush_lsn = self._buffer_last_lsn
|
|
1908
|
+
|
|
1909
|
+
# clear buffer after successful bulk
|
|
1910
|
+
self._buffer.clear()
|
|
1911
|
+
self._buffer_last_lsn = None
|
|
1912
|
+
|
|
1913
|
+
# Even if buffer was empty, we may want to ACK a COMMIT LSN
|
|
1914
|
+
if flush_lsn is not None and (force_ack or not self._buffer):
|
|
1915
|
+
cursor.send_feedback(flush_lsn=flush_lsn, force=True)
|
|
1916
|
+
logger.info(f"sent feedback flush_lsn=P{flush_lsn}")
|
|
1917
|
+
|
|
1918
|
+
def consume(self, message: t.Any) -> None:
|
|
1919
|
+
raw: t.Any = message.payload
|
|
1920
|
+
lsn: t.Optional[str] = message.data_start
|
|
1921
|
+
chunk_size: int = settings.LOGICAL_SLOT_CHUNK_SIZE
|
|
1922
|
+
|
|
1923
|
+
logger.debug(f"[LSN {lsn}] {raw}")
|
|
1924
|
+
|
|
1925
|
+
match = TX_BOUNDARY_RE.match(raw)
|
|
1926
|
+
if match:
|
|
1927
|
+
kind: str = match.group(1).upper()
|
|
1928
|
+
if kind == "COMMIT":
|
|
1929
|
+
# Flush any buffered docs, and ACK this COMMIT LSN
|
|
1930
|
+
self._flush_buffer(
|
|
1931
|
+
cursor=message.cursor,
|
|
1932
|
+
flush_lsn=lsn,
|
|
1933
|
+
force_ack=True, # ACK even if buffer empty
|
|
1934
|
+
)
|
|
1935
|
+
# BEGIN/COMMIT don't include rows by themselves
|
|
1936
|
+
return
|
|
1937
|
+
|
|
1938
|
+
# Not BEGIN/COMMIT -> row change
|
|
1939
|
+
try:
|
|
1940
|
+
payload: Payload = self.parse_logical_slot(raw)
|
|
1941
|
+
except Exception:
|
|
1942
|
+
logger.exception(f"Error parsing row: {raw}")
|
|
1943
|
+
raise
|
|
1944
|
+
|
|
1945
|
+
# Filter by schema
|
|
1946
|
+
if payload.schema not in self.tree.schemas:
|
|
1947
|
+
# we still saw this LSN; it will be ACKed at COMMIT
|
|
1948
|
+
return
|
|
1949
|
+
|
|
1950
|
+
# Buffer across transactions
|
|
1951
|
+
self._buffer.append(payload)
|
|
1952
|
+
self._buffer_last_lsn = lsn
|
|
1953
|
+
|
|
1954
|
+
# Flush when big enough
|
|
1955
|
+
if len(self._buffer) >= chunk_size:
|
|
1956
|
+
self._flush_buffer(message.cursor)
|
|
1957
|
+
|
|
1958
|
+
def wal_consumer(self) -> None:
|
|
1959
|
+
# open a replication‐mode connection
|
|
1960
|
+
conn = self.get_replication_connection(self.engine)
|
|
1961
|
+
cursor = conn.cursor()
|
|
1962
|
+
# start streaming; include XIDs so you see BEGIN/COMMIT markers
|
|
1963
|
+
cursor.start_replication(
|
|
1964
|
+
slot_name=self.__name,
|
|
1965
|
+
options={"include-xids": "1", "skip-empty-xacts": "1"},
|
|
1966
|
+
decode=True, # gets you str instead of bytes
|
|
1967
|
+
)
|
|
1968
|
+
logger.info("Starting logical replication stream (test_decoding)...")
|
|
1969
|
+
cursor.consume_stream(self.consume)
|
|
1970
|
+
|
|
1839
1971
|
@threaded
|
|
1840
1972
|
@exception
|
|
1841
1973
|
def truncate_slots(self) -> None:
|
|
@@ -1962,7 +2094,7 @@ class Sync(Base, metaclass=Singleton):
|
|
|
1962
2094
|
is_flag=True,
|
|
1963
2095
|
help="Run as a daemon (Incompatible with --polling)",
|
|
1964
2096
|
cls=MutuallyExclusiveOption,
|
|
1965
|
-
mutually_exclusive=["polling"],
|
|
2097
|
+
mutually_exclusive=["polling", "wal"],
|
|
1966
2098
|
)
|
|
1967
2099
|
@click.option(
|
|
1968
2100
|
"--producer",
|
|
@@ -1985,7 +2117,19 @@ class Sync(Base, metaclass=Singleton):
|
|
|
1985
2117
|
is_flag=True,
|
|
1986
2118
|
help="Polling mode (Incompatible with -d)",
|
|
1987
2119
|
cls=MutuallyExclusiveOption,
|
|
1988
|
-
mutually_exclusive=["daemon"],
|
|
2120
|
+
mutually_exclusive=["daemon", "wal"],
|
|
2121
|
+
)
|
|
2122
|
+
@click.option(
|
|
2123
|
+
"--wal",
|
|
2124
|
+
"-w",
|
|
2125
|
+
is_flag=True,
|
|
2126
|
+
default=False,
|
|
2127
|
+
help="Use WAL for replication",
|
|
2128
|
+
cls=MutuallyExclusiveOption,
|
|
2129
|
+
mutually_exclusive=[
|
|
2130
|
+
"daemon",
|
|
2131
|
+
"polling",
|
|
2132
|
+
],
|
|
1989
2133
|
)
|
|
1990
2134
|
@click.option("--host", "-h", help="PG_HOST override")
|
|
1991
2135
|
@click.option("--password", is_flag=True, help="Prompt for database password")
|
|
@@ -2066,6 +2210,7 @@ def main(
|
|
|
2066
2210
|
producer: bool,
|
|
2067
2211
|
consumer: bool,
|
|
2068
2212
|
bootstrap: bool,
|
|
2213
|
+
wal: bool,
|
|
2069
2214
|
) -> None:
|
|
2070
2215
|
"""Main application syncer."""
|
|
2071
2216
|
if version:
|
|
@@ -2129,17 +2274,25 @@ def main(
|
|
|
2129
2274
|
# In polling mode, the app can run without replication slots or triggers.
|
|
2130
2275
|
# However, this is not the preferred mode of operation.
|
|
2131
2276
|
# It should be considered a workaround for running on a read-only cluster.
|
|
2132
|
-
kwargs["polling"] = True
|
|
2133
2277
|
while True:
|
|
2134
2278
|
for doc in config_loader(
|
|
2135
2279
|
config=config,
|
|
2136
2280
|
schema_url=schema_url,
|
|
2137
2281
|
s3_schema_url=s3_schema_url,
|
|
2138
2282
|
):
|
|
2139
|
-
sync: Sync = Sync(
|
|
2283
|
+
sync: Sync = Sync(
|
|
2284
|
+
doc, verbose=verbose, polling=True, **kwargs
|
|
2285
|
+
)
|
|
2140
2286
|
sync.pull(polling=True)
|
|
2141
2287
|
time.sleep(settings.POLL_INTERVAL)
|
|
2142
|
-
|
|
2288
|
+
elif wal:
|
|
2289
|
+
for doc in config_loader(
|
|
2290
|
+
config=config,
|
|
2291
|
+
schema_url=schema_url,
|
|
2292
|
+
s3_schema_url=s3_schema_url,
|
|
2293
|
+
):
|
|
2294
|
+
sync: Sync = Sync(doc, verbose=verbose, wal=True, **kwargs)
|
|
2295
|
+
sync.wal_consumer()
|
|
2143
2296
|
else:
|
|
2144
2297
|
tasks: t.List[asyncio.Task] = []
|
|
2145
2298
|
for doc in config_loader(
|
|
@@ -2162,7 +2315,7 @@ def main(
|
|
|
2162
2315
|
tasks.extend(sync.tasks)
|
|
2163
2316
|
|
|
2164
2317
|
if settings.USE_ASYNC:
|
|
2165
|
-
event_loop = asyncio.get_event_loop()
|
|
2318
|
+
event_loop: asyncio.AbstractEventLoop = asyncio.get_event_loop()
|
|
2166
2319
|
event_loop.run_until_complete(asyncio.gather(*tasks))
|
|
2167
2320
|
event_loop.close()
|
|
2168
2321
|
|
|
@@ -280,7 +280,10 @@ def config_loader(
|
|
|
280
280
|
|
|
281
281
|
try:
|
|
282
282
|
with open(config_path, "r") as f:
|
|
283
|
-
|
|
283
|
+
try:
|
|
284
|
+
data = json.load(f)
|
|
285
|
+
except json.JSONDecodeError as e:
|
|
286
|
+
raise ValueError(f"{config_path} is not valid JSON: {e}") from e
|
|
284
287
|
for doc in data:
|
|
285
288
|
for key, value in doc.items():
|
|
286
289
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -33,9 +33,9 @@ License-File: LICENSE
|
|
|
33
33
|
License-File: AUTHORS.rst
|
|
34
34
|
Requires-Dist: async-timeout==5.0.1
|
|
35
35
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
36
|
-
Requires-Dist: boto3==1.
|
|
37
|
-
Requires-Dist: botocore==1.
|
|
38
|
-
Requires-Dist: certifi==2025.
|
|
36
|
+
Requires-Dist: boto3==1.41.2
|
|
37
|
+
Requires-Dist: botocore==1.41.2
|
|
38
|
+
Requires-Dist: certifi==2025.11.12
|
|
39
39
|
Requires-Dist: charset-normalizer==3.4.4
|
|
40
40
|
Requires-Dist: click==8.1.8
|
|
41
41
|
Requires-Dist: elastic-transport==9.1.0
|
|
@@ -46,7 +46,7 @@ Requires-Dist: events==0.5
|
|
|
46
46
|
Requires-Dist: idna==3.11
|
|
47
47
|
Requires-Dist: jmespath==1.0.1
|
|
48
48
|
Requires-Dist: marshmallow==4.0.1
|
|
49
|
-
Requires-Dist: mysql-replication==1.0.
|
|
49
|
+
Requires-Dist: mysql-replication==1.0.12
|
|
50
50
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
51
51
|
Requires-Dist: opensearch-py==3.0.0
|
|
52
52
|
Requires-Dist: packaging==25.0
|
|
@@ -57,7 +57,7 @@ Requires-Dist: python-dotenv==1.2.1
|
|
|
57
57
|
Requires-Dist: redis==7.0.1
|
|
58
58
|
Requires-Dist: requests==2.32.5
|
|
59
59
|
Requires-Dist: requests-aws4auth==1.3.1
|
|
60
|
-
Requires-Dist: s3transfer==0.
|
|
60
|
+
Requires-Dist: s3transfer==0.15.0
|
|
61
61
|
Requires-Dist: six==1.17.0
|
|
62
62
|
Requires-Dist: sqlalchemy==2.0.44
|
|
63
63
|
Requires-Dist: sqlparse==0.5.3
|
|
@@ -79,7 +79,7 @@ Dynamic: requires-dist
|
|
|
79
79
|
Dynamic: requires-python
|
|
80
80
|
Dynamic: summary
|
|
81
81
|
|
|
82
|
-
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
82
|
+
# PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
83
83
|
|
|
84
84
|
|
|
85
85
|
- [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
async-timeout==5.0.1
|
|
2
2
|
backports-datetime-fromisoformat==2.0.3
|
|
3
|
-
boto3==1.
|
|
4
|
-
botocore==1.
|
|
5
|
-
certifi==2025.
|
|
3
|
+
boto3==1.41.2
|
|
4
|
+
botocore==1.41.2
|
|
5
|
+
certifi==2025.11.12
|
|
6
6
|
charset-normalizer==3.4.4
|
|
7
7
|
click==8.1.8
|
|
8
8
|
elastic-transport==9.1.0
|
|
@@ -13,7 +13,7 @@ events==0.5
|
|
|
13
13
|
idna==3.11
|
|
14
14
|
jmespath==1.0.1
|
|
15
15
|
marshmallow==4.0.1
|
|
16
|
-
mysql-replication==1.0.
|
|
16
|
+
mysql-replication==1.0.12
|
|
17
17
|
opensearch-dsl==2.1.0
|
|
18
18
|
opensearch-py==3.0.0
|
|
19
19
|
packaging==25.0
|
|
@@ -24,7 +24,7 @@ python-dotenv==1.2.1
|
|
|
24
24
|
redis==7.0.1
|
|
25
25
|
requests==2.32.5
|
|
26
26
|
requests-aws4auth==1.3.1
|
|
27
|
-
s3transfer==0.
|
|
27
|
+
s3transfer==0.15.0
|
|
28
28
|
six==1.17.0
|
|
29
29
|
sqlalchemy==2.0.44
|
|
30
30
|
sqlparse==0.5.3
|
|
@@ -937,7 +937,7 @@ class TestSync(object):
|
|
|
937
937
|
mock_teardown.assert_called_once_with(drop_view=False)
|
|
938
938
|
|
|
939
939
|
@patch("pgsync.redisqueue.RedisQueue.delete")
|
|
940
|
-
def test_teardown(self,
|
|
940
|
+
def test_teardown(self, mock_redis_delete, sync):
|
|
941
941
|
with override_env_var(JOIN_QUERIES="False"):
|
|
942
942
|
importlib.reload(settings)
|
|
943
943
|
|
|
@@ -960,16 +960,19 @@ class TestSync(object):
|
|
|
960
960
|
)
|
|
961
961
|
mock_drop_view.assert_called_once_with("public")
|
|
962
962
|
mock_drop_function.assert_called_once_with("public")
|
|
963
|
-
|
|
963
|
+
mock_redis_delete.assert_not_called()
|
|
964
964
|
assert os.path.exists(sync.checkpoint_file) is False
|
|
965
965
|
|
|
966
966
|
with patch("pgsync.sync.logger") as mock_logger:
|
|
967
967
|
with patch("pgsync.sync.Base.drop_replication_slot"):
|
|
968
968
|
self.checkpoint_file = "foo"
|
|
969
969
|
sync.teardown()
|
|
970
|
-
mock_logger.warning.
|
|
971
|
-
"Checkpoint file not found: ./.testdb_testdb"
|
|
972
|
-
|
|
970
|
+
assert mock_logger.warning.call_args_list == [
|
|
971
|
+
call("Checkpoint file not found: ./.testdb_testdb"),
|
|
972
|
+
call(
|
|
973
|
+
"Could not clear Redis checkpoint queue: Redis is not configured."
|
|
974
|
+
),
|
|
975
|
+
]
|
|
973
976
|
|
|
974
977
|
def test_root(self, sync):
|
|
975
978
|
root = sync.tree.root
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|