pgsync 7.0.5__tar.gz → 7.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-7.0.5 → pgsync-7.1.0}/PKG-INFO +30 -28
- {pgsync-7.0.5 → pgsync-7.1.0}/README.md +108 -1
- {pgsync-7.0.5 → pgsync-7.1.0}/README.rst +1 -1
- {pgsync-7.0.5 → pgsync-7.1.0}/bin/bootstrap +1 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/bin/parallel_sync +1 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/bin/pgsync +1 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/__init__.py +1 -1
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/base.py +121 -93
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/node.py +5 -4
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/querybuilder.py +19 -2
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/redisqueue.py +23 -7
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/settings.py +12 -6
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/sync.py +69 -5
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/transform.py +84 -40
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/PKG-INFO +30 -28
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/SOURCES.txt +3 -0
- pgsync-7.1.0/pgsync.egg-info/requires.txt +36 -0
- pgsync-7.1.0/pyproject.toml +3 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/requirements/base.txt +40 -27
- {pgsync-7.0.5 → pgsync-7.1.0}/requirements/dev.txt +66 -53
- {pgsync-7.0.5 → pgsync-7.1.0}/setup.py +2 -2
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/conftest.py +31 -0
- pgsync-7.1.0/tests/test_base.py +1599 -0
- pgsync-7.1.0/tests/test_bug_regressions.py +136 -0
- pgsync-7.1.0/tests/test_exc.py +322 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_node.py +359 -0
- pgsync-7.1.0/tests/test_plugin.py +437 -0
- pgsync-7.1.0/tests/test_query_builder.py +2552 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_redisqueue.py +58 -0
- pgsync-7.1.0/tests/test_search_client.py +648 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_settings.py +4 -1
- pgsync-7.1.0/tests/test_sync.py +3210 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_transform.py +474 -1
- pgsync-7.1.0/tests/test_utils.py +775 -0
- pgsync-7.1.0/tests/test_view.py +662 -0
- pgsync-7.0.5/pgsync.egg-info/requires.txt +0 -33
- pgsync-7.0.5/pyproject.toml +0 -3
- pgsync-7.0.5/tests/test_base.py +0 -676
- pgsync-7.0.5/tests/test_query_builder.py +0 -102
- pgsync-7.0.5/tests/test_search_client.py +0 -115
- pgsync-7.0.5/tests/test_sync.py +0 -1450
- pgsync-7.0.5/tests/test_utils.py +0 -229
- pgsync-7.0.5/tests/test_view.py +0 -302
- {pgsync-7.0.5 → pgsync-7.1.0}/AUTHORS.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/CONTRIBUTING.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/HISTORY.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/LICENSE +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/MANIFEST.in +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/Makefile +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/authors.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/changelog.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/conf.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/contributing.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/history.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/index.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/installation.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/logo.png +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/make.bat +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/readme.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/docs/usage.rst +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/constants.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/exc.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/helper.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/plugin.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/search_client.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/singleton.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/trigger.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/urls.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/utils.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/view.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/setup.cfg +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/__init__.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/fixtures/schema.json +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_constants.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_env_vars.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_helper.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_log_handlers.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_nested_children.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_root.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_trigger.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_unique_behaviour.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_urls.py +0 -0
- {pgsync-7.0.5 → pgsync-7.1.0}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 7.0
|
|
3
|
+
Version: 7.1.0
|
|
4
4
|
Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -17,7 +17,6 @@ Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres,mysql,mar
|
|
|
17
17
|
Classifier: Development Status :: 5 - Production/Stable
|
|
18
18
|
Classifier: Intended Audience :: Developers
|
|
19
19
|
Classifier: Natural Language :: English
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.10
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -27,43 +26,46 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
|
27
26
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
28
27
|
Classifier: License :: OSI Approved :: MIT License
|
|
29
28
|
Classifier: Operating System :: OS Independent
|
|
30
|
-
Requires-Python: >=3.
|
|
29
|
+
Requires-Python: >=3.10.0
|
|
31
30
|
Description-Content-Type: text/x-rst
|
|
32
31
|
License-File: LICENSE
|
|
33
32
|
License-File: AUTHORS.rst
|
|
34
33
|
Requires-Dist: async-timeout==5.0.1
|
|
35
34
|
Requires-Dist: backports-datetime-fromisoformat==2.0.3
|
|
36
|
-
Requires-Dist: boto3==1.
|
|
37
|
-
Requires-Dist: botocore==1.
|
|
38
|
-
Requires-Dist: certifi==2026.
|
|
39
|
-
Requires-Dist: charset-normalizer==3.4.
|
|
40
|
-
Requires-Dist: click==8.
|
|
41
|
-
Requires-Dist: elastic-transport==9.
|
|
42
|
-
Requires-Dist: elasticsearch==7.17.
|
|
35
|
+
Requires-Dist: boto3==1.43.36
|
|
36
|
+
Requires-Dist: botocore==1.43.36
|
|
37
|
+
Requires-Dist: certifi==2026.6.17
|
|
38
|
+
Requires-Dist: charset-normalizer==3.4.7
|
|
39
|
+
Requires-Dist: click==8.4.2
|
|
40
|
+
Requires-Dist: elastic-transport==9.4.2
|
|
41
|
+
Requires-Dist: elasticsearch==7.17.13
|
|
43
42
|
Requires-Dist: elasticsearch-dsl==7.4.1
|
|
44
|
-
Requires-Dist: environs==
|
|
43
|
+
Requires-Dist: environs==15.0.1
|
|
45
44
|
Requires-Dist: events==0.5
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist: idna==3.
|
|
48
|
-
Requires-Dist: jmespath==1.0
|
|
49
|
-
Requires-Dist: marshmallow==4.0
|
|
50
|
-
Requires-Dist: mysql-replication==1.0.
|
|
45
|
+
Requires-Dist: grpcio==1.81.1
|
|
46
|
+
Requires-Dist: idna==3.18
|
|
47
|
+
Requires-Dist: jmespath==1.1.0
|
|
48
|
+
Requires-Dist: marshmallow==4.3.0
|
|
49
|
+
Requires-Dist: mysql-replication==1.0.15
|
|
51
50
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
52
|
-
Requires-Dist: opensearch-
|
|
53
|
-
Requires-Dist:
|
|
54
|
-
Requires-Dist:
|
|
55
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: opensearch-protobufs==1.2.0
|
|
52
|
+
Requires-Dist: opensearch-py==3.2.0
|
|
53
|
+
Requires-Dist: packaging==26.2
|
|
54
|
+
Requires-Dist: protobuf==7.35.1
|
|
55
|
+
Requires-Dist: psycopg2-binary==2.9.12
|
|
56
|
+
Requires-Dist: pymysql==1.2.0
|
|
56
57
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
57
|
-
Requires-Dist: python-dotenv==1.2.
|
|
58
|
-
Requires-Dist: redis==
|
|
59
|
-
Requires-Dist: requests==2.
|
|
60
|
-
Requires-Dist: requests-aws4auth==1.3.
|
|
61
|
-
Requires-Dist: s3transfer==0.
|
|
58
|
+
Requires-Dist: python-dotenv==1.2.2
|
|
59
|
+
Requires-Dist: redis==8.0.1
|
|
60
|
+
Requires-Dist: requests==2.34.2
|
|
61
|
+
Requires-Dist: requests-aws4auth==1.3.2
|
|
62
|
+
Requires-Dist: s3transfer==0.19.0
|
|
62
63
|
Requires-Dist: six==1.17.0
|
|
63
|
-
Requires-Dist:
|
|
64
|
+
Requires-Dist: sniffio==1.3.1
|
|
65
|
+
Requires-Dist: sqlalchemy==2.0.51
|
|
64
66
|
Requires-Dist: sqlparse==0.5.5
|
|
65
67
|
Requires-Dist: typing-extensions==4.15.0
|
|
66
|
-
Requires-Dist: urllib3==
|
|
68
|
+
Requires-Dist: urllib3==2.7.0
|
|
67
69
|
Dynamic: author
|
|
68
70
|
Dynamic: author-email
|
|
69
71
|
Dynamic: classifier
|
|
@@ -121,7 +123,7 @@ Key Features
|
|
|
121
123
|
Requirements
|
|
122
124
|
------------
|
|
123
125
|
|
|
124
|
-
- `Python <https://www.python.org>`_ 3.
|
|
126
|
+
- `Python <https://www.python.org>`_ 3.10+
|
|
125
127
|
- `PostgreSQL <https://www.postgresql.org>`_ 9.6+ or `MySQL <https://www.mysql.com>`_ 8.0.0+ or `MariaDB <https://mariadb.org>`_ 12.0.0+
|
|
126
128
|
- `Redis <https://redis.io>`_ 3.1.0+ or `Valkey <https://valkey.io>`_ 7.2.0+ (optional in WAL mode)
|
|
127
129
|
- `Elasticsearch <https://www.elastic.co/products/elastic-stack>`_ 6.3.1+ or `OpenSearch <https://opensearch.org>`_ 1.3.7+
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
PGSync is a change data capture tool that syncs data from **PostgreSQL**, **MySQL**, or **MariaDB** to **Elasticsearch** or **OpenSearch** in real-time. Define your document structure in JSON, and PGSync handles the rest — no custom code required.
|
|
28
28
|
|
|
29
29
|
```mermaid
|
|
30
|
+
%%{init: {'look': 'handDrawn', 'theme': 'neutral'}}%%
|
|
30
31
|
flowchart LR
|
|
31
32
|
subgraph Source["🗄️ Source Database"]
|
|
32
33
|
DB[(PostgreSQL<br/>MySQL<br/>MariaDB)]
|
|
@@ -86,12 +87,28 @@ pgsync --config schema.json -d
|
|
|
86
87
|
|
|
87
88
|
### Using Docker Compose
|
|
88
89
|
|
|
90
|
+
**Default (Elasticsearch + Kibana):**
|
|
89
91
|
```bash
|
|
90
92
|
git clone https://github.com/toluaina/pgsync
|
|
91
93
|
cd pgsync
|
|
92
94
|
docker-compose up
|
|
93
95
|
```
|
|
94
96
|
|
|
97
|
+
This starts PostgreSQL, Redis, Elasticsearch, Kibana, and PGSync configured for Elasticsearch.
|
|
98
|
+
|
|
99
|
+
**For OpenSearch:**
|
|
100
|
+
```bash
|
|
101
|
+
docker-compose --profile opensearch up
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
This starts PostgreSQL, Redis, OpenSearch, and PGSync configured for OpenSearch.
|
|
105
|
+
|
|
106
|
+
**Ports:**
|
|
107
|
+
- PostgreSQL: `15432`
|
|
108
|
+
- Elasticsearch: `9201` (default)
|
|
109
|
+
- Kibana: `5601` (default)
|
|
110
|
+
- OpenSearch: `9400` (OpenSearch profile)
|
|
111
|
+
|
|
95
112
|
---
|
|
96
113
|
|
|
97
114
|
## How It Works
|
|
@@ -137,7 +154,7 @@ Changes to any related table automatically update the document in Elasticsearch/
|
|
|
137
154
|
|
|
138
155
|
| Component | Version |
|
|
139
156
|
|-----------|---------|
|
|
140
|
-
|  | 3.10+ |
|
|
141
158
|
|  | 9.6+ (or MySQL 5.7.22+ / MariaDB 10.5+) |
|
|
142
159
|
|  | 6.3.1+ (or OpenSearch 1.3.7+) |
|
|
143
160
|
|  | 3.1+ (or Valkey 7.2+) — optional in WAL mode |
|
|
@@ -225,6 +242,96 @@ PGSync transforms this into search-ready documents:
|
|
|
225
242
|
|
|
226
243
|
---
|
|
227
244
|
|
|
245
|
+
## Transforms
|
|
246
|
+
|
|
247
|
+
PGSync supports built-in transforms to modify field values before indexing. Transforms are applied in order: `replace` → `rename` → `concat`.
|
|
248
|
+
|
|
249
|
+
### Replace
|
|
250
|
+
|
|
251
|
+
Find and replace substrings within field values:
|
|
252
|
+
|
|
253
|
+
```json
|
|
254
|
+
{
|
|
255
|
+
"table": "product",
|
|
256
|
+
"columns": ["code", "name"],
|
|
257
|
+
"transform": {
|
|
258
|
+
"replace": {
|
|
259
|
+
"code": {
|
|
260
|
+
"-": "/",
|
|
261
|
+
"_": " "
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
| Before | After |
|
|
269
|
+
|--------|-------|
|
|
270
|
+
| `ABC-DEF_GHI` | `ABC/DEF GHI` |
|
|
271
|
+
|
|
272
|
+
### Rename
|
|
273
|
+
|
|
274
|
+
Rename fields in the output document:
|
|
275
|
+
|
|
276
|
+
```json
|
|
277
|
+
{
|
|
278
|
+
"table": "book",
|
|
279
|
+
"columns": ["id", "title"],
|
|
280
|
+
"transform": {
|
|
281
|
+
"rename": {
|
|
282
|
+
"id": "book_id",
|
|
283
|
+
"title": "book_title"
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Concat
|
|
290
|
+
|
|
291
|
+
Combine multiple fields into a new field:
|
|
292
|
+
|
|
293
|
+
```json
|
|
294
|
+
{
|
|
295
|
+
"table": "user",
|
|
296
|
+
"columns": ["first_name", "last_name"],
|
|
297
|
+
"transform": {
|
|
298
|
+
"concat": {
|
|
299
|
+
"columns": ["first_name", "last_name"],
|
|
300
|
+
"destination": "full_name",
|
|
301
|
+
"delimiter": " "
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
### Combined Example
|
|
308
|
+
|
|
309
|
+
Transforms can be combined and applied to nested children:
|
|
310
|
+
|
|
311
|
+
```json
|
|
312
|
+
{
|
|
313
|
+
"table": "book",
|
|
314
|
+
"columns": ["isbn", "title"],
|
|
315
|
+
"children": [{
|
|
316
|
+
"table": "publisher",
|
|
317
|
+
"columns": ["code", "name"],
|
|
318
|
+
"transform": {
|
|
319
|
+
"replace": { "code": { "-": "." } },
|
|
320
|
+
"rename": { "name": "publisher_name" }
|
|
321
|
+
}
|
|
322
|
+
}],
|
|
323
|
+
"transform": {
|
|
324
|
+
"concat": {
|
|
325
|
+
"columns": ["isbn", "title"],
|
|
326
|
+
"destination": "search_text",
|
|
327
|
+
"delimiter": " - "
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
228
335
|
## Why PGSync?
|
|
229
336
|
|
|
230
337
|
| Challenge | PGSync Solution |
|
|
@@ -39,7 +39,7 @@ Key Features
|
|
|
39
39
|
Requirements
|
|
40
40
|
------------
|
|
41
41
|
|
|
42
|
-
- `Python <https://www.python.org>`_ 3.
|
|
42
|
+
- `Python <https://www.python.org>`_ 3.10+
|
|
43
43
|
- `PostgreSQL <https://www.postgresql.org>`_ 9.6+ or `MySQL <https://www.mysql.com>`_ 8.0.0+ or `MariaDB <https://mariadb.org>`_ 12.0.0+
|
|
44
44
|
- `Redis <https://redis.io>`_ 3.1.0+ or `Valkey <https://valkey.io>`_ 7.2.0+ (optional in WAL mode)
|
|
45
45
|
- `Elasticsearch <https://www.elastic.co/products/elastic-stack>`_ 6.3.1+ or `OpenSearch <https://opensearch.org>`_ 1.3.7+
|
|
@@ -43,6 +43,7 @@ from .settings import (
|
|
|
43
43
|
PG_SSLROOTCERT,
|
|
44
44
|
PG_URL_RO,
|
|
45
45
|
PG_USER_RO,
|
|
46
|
+
PG_WORK_MEM,
|
|
46
47
|
QUERY_CHUNK_SIZE,
|
|
47
48
|
SQLALCHEMY_MAX_OVERFLOW,
|
|
48
49
|
SQLALCHEMY_POOL_PRE_PING,
|
|
@@ -242,6 +243,10 @@ class Base(object):
|
|
|
242
243
|
self.verbose: bool = verbose
|
|
243
244
|
self._conn = None
|
|
244
245
|
self._session = None
|
|
246
|
+
# Per-thread set of advisory lock keys currently held, to make
|
|
247
|
+
# advisory_lock() re-entrant without requiring the nested call to
|
|
248
|
+
# acquire a second backend connection.
|
|
249
|
+
self._advisory_locks_held = threading.local()
|
|
245
250
|
|
|
246
251
|
def connect(self) -> None:
|
|
247
252
|
"""Connect to database."""
|
|
@@ -567,50 +572,6 @@ class Base(object):
|
|
|
567
572
|
)
|
|
568
573
|
return row[0]
|
|
569
574
|
|
|
570
|
-
def pg_try_advisory_lock(
|
|
571
|
-
self, key: t.Union[int, str], timeout: int = 0
|
|
572
|
-
) -> bool:
|
|
573
|
-
"""
|
|
574
|
-
Attempts to acquire an dvisory/named lock based on a hashed slot name without blocking.
|
|
575
|
-
|
|
576
|
-
PostgreSQL: integer key -> PG_TRY_ADVISORY_LOCK(key) -> bool
|
|
577
|
-
MySQL/MariaDB: string name -> GET_LOCK(name, timeout) -> 1 on success
|
|
578
|
-
(timeout defaults to 0 = non-blocking)
|
|
579
|
-
|
|
580
|
-
Returns:
|
|
581
|
-
bool: True if the lock was acquired, False otherwise.
|
|
582
|
-
"""
|
|
583
|
-
if self.is_mysql_compat:
|
|
584
|
-
row = self.fetchone(
|
|
585
|
-
sa.text("SELECT GET_LOCK(:name, :timeout)").bindparams(
|
|
586
|
-
name=str(key), timeout=int(timeout)
|
|
587
|
-
)
|
|
588
|
-
)
|
|
589
|
-
return bool(row and row[0] == 1)
|
|
590
|
-
|
|
591
|
-
row = self.fetchone(
|
|
592
|
-
sa.text("SELECT PG_TRY_ADVISORY_LOCK(:key)").bindparams(key=key)
|
|
593
|
-
)
|
|
594
|
-
return bool(row and row[0])
|
|
595
|
-
|
|
596
|
-
def pg_advisory_unlock(self, key: t.Union[int, str]) -> bool:
|
|
597
|
-
"""
|
|
598
|
-
Releases an advisory lock associated with the hashed slot name.
|
|
599
|
-
|
|
600
|
-
Returns:
|
|
601
|
-
bool: True if the lock was released, False if it was not held.
|
|
602
|
-
"""
|
|
603
|
-
if self.is_mysql_compat:
|
|
604
|
-
row = self.fetchone(
|
|
605
|
-
sa.text("SELECT RELEASE_LOCK(:name)").bindparams(name=str(key))
|
|
606
|
-
)
|
|
607
|
-
return bool(row and row[0] == 1)
|
|
608
|
-
|
|
609
|
-
row = self.fetchone(
|
|
610
|
-
sa.text("SELECT PG_ADVISORY_UNLOCK(:key)").bindparams(key=key)
|
|
611
|
-
)
|
|
612
|
-
return bool(row and row[0])
|
|
613
|
-
|
|
614
575
|
@contextmanager
|
|
615
576
|
def advisory_lock(
|
|
616
577
|
self,
|
|
@@ -626,58 +587,111 @@ class Base(object):
|
|
|
626
587
|
Context manager to acquire a PostgreSQL advisory lock with optional retries.
|
|
627
588
|
Acquire a PostgreSQL advisory lock with retries, backoff, and jitter.
|
|
628
589
|
Jitter reduces lock-step contention so callers don't starve.
|
|
590
|
+
|
|
591
|
+
A single connection is held for the entire lifetime of the context
|
|
592
|
+
so that lock and unlock always run on the same PostgreSQL backend,
|
|
593
|
+
preventing advisory lock leaks. Re-entrant within the same thread:
|
|
594
|
+
nested calls for the same slot are no-ops.
|
|
629
595
|
"""
|
|
630
596
|
key: int = self.advisory_key(slot_name)
|
|
597
|
+
|
|
598
|
+
held: set = getattr(self._advisory_locks_held, "keys", None)
|
|
599
|
+
if held is None:
|
|
600
|
+
held = set()
|
|
601
|
+
self._advisory_locks_held.keys = held
|
|
602
|
+
|
|
603
|
+
if key in held:
|
|
604
|
+
# Already held in this thread; nested acquires are a no-op.
|
|
605
|
+
yield
|
|
606
|
+
return
|
|
607
|
+
|
|
631
608
|
attempt: int = 0
|
|
632
609
|
|
|
633
610
|
base_delay: float = float(retry_interval)
|
|
634
611
|
# current backoff window (seconds)
|
|
635
612
|
delay: float = base_delay
|
|
636
613
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
614
|
+
with self.engine.connect() as conn:
|
|
615
|
+
while True:
|
|
616
|
+
if self._try_lock_on(conn, key):
|
|
617
|
+
break
|
|
640
618
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
619
|
+
if (max_retries is not None) and (attempt >= max_retries):
|
|
620
|
+
raise RuntimeError(
|
|
621
|
+
f"Failed to acquire advisory lock for '{slot_name}' after {max_retries} retries."
|
|
622
|
+
)
|
|
645
623
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
else:
|
|
652
|
-
# For other modes, sleep is derived from current delay.
|
|
653
|
-
if jitter == "full":
|
|
654
|
-
sleep_for = random.uniform(0.0, delay)
|
|
655
|
-
elif jitter == "equal":
|
|
656
|
-
sleep_for = (delay / 2.0) + random.uniform(
|
|
657
|
-
0.0, delay / 2.0
|
|
624
|
+
# Compute sleep using jitter strategy
|
|
625
|
+
if jitter == "decorrelated":
|
|
626
|
+
# Decorrelated jitter chooses the *next* delay first.
|
|
627
|
+
delay = min(
|
|
628
|
+
max_delay, random.uniform(base_delay, delay * 3)
|
|
658
629
|
)
|
|
659
|
-
elif jitter == "none":
|
|
660
630
|
sleep_for = delay
|
|
661
631
|
else:
|
|
662
|
-
#
|
|
663
|
-
|
|
632
|
+
# For other modes, sleep is derived from current delay.
|
|
633
|
+
if jitter == "full":
|
|
634
|
+
sleep_for = random.uniform(0.0, delay)
|
|
635
|
+
elif jitter == "equal":
|
|
636
|
+
sleep_for = (delay / 2.0) + random.uniform(
|
|
637
|
+
0.0, delay / 2.0
|
|
638
|
+
)
|
|
639
|
+
elif jitter == "none":
|
|
640
|
+
sleep_for = delay
|
|
641
|
+
else:
|
|
642
|
+
# Fallback to full jitter if an unknown option is passed
|
|
643
|
+
sleep_for = random.uniform(0.0, delay)
|
|
664
644
|
|
|
665
|
-
|
|
645
|
+
time.sleep(max(0.0, sleep_for))
|
|
666
646
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
647
|
+
# Increase delay for next attempt (except decorrelated which already advanced)
|
|
648
|
+
if backoff_type == "exponential" and jitter != "decorrelated":
|
|
649
|
+
delay = min(max_delay, delay * backoff_factor)
|
|
650
|
+
# For fixed backoff, 'delay' stays at base_delay unless decorrelated changed it.
|
|
671
651
|
|
|
672
|
-
|
|
652
|
+
attempt += 1
|
|
673
653
|
|
|
674
|
-
|
|
675
|
-
yield
|
|
676
|
-
finally:
|
|
654
|
+
held.add(key)
|
|
677
655
|
try:
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
656
|
+
yield
|
|
657
|
+
finally:
|
|
658
|
+
held.discard(key)
|
|
659
|
+
try:
|
|
660
|
+
self._unlock_on(conn, key)
|
|
661
|
+
except Exception:
|
|
662
|
+
pass
|
|
663
|
+
|
|
664
|
+
# ------------------------------------------------------------------
|
|
665
|
+
# Internal helpers that run lock/unlock on an explicit connection
|
|
666
|
+
# ------------------------------------------------------------------
|
|
667
|
+
|
|
668
|
+
def _try_lock_on(
|
|
669
|
+
self, conn: sa.engine.Connection, key: t.Union[int, str]
|
|
670
|
+
) -> bool:
|
|
671
|
+
"""Acquire an advisory lock on *conn* without blocking."""
|
|
672
|
+
if self.is_mysql_compat:
|
|
673
|
+
row = conn.execute(
|
|
674
|
+
sa.text("SELECT GET_LOCK(:name, 0)").bindparams(name=str(key))
|
|
675
|
+
).fetchone()
|
|
676
|
+
return bool(row and row[0] == 1)
|
|
677
|
+
row = conn.execute(
|
|
678
|
+
sa.text("SELECT PG_TRY_ADVISORY_LOCK(:key)").bindparams(key=key)
|
|
679
|
+
).fetchone()
|
|
680
|
+
return bool(row and row[0])
|
|
681
|
+
|
|
682
|
+
def _unlock_on(
|
|
683
|
+
self, conn: sa.engine.Connection, key: t.Union[int, str]
|
|
684
|
+
) -> bool:
|
|
685
|
+
"""Release an advisory lock on *conn*."""
|
|
686
|
+
if self.is_mysql_compat:
|
|
687
|
+
row = conn.execute(
|
|
688
|
+
sa.text("SELECT RELEASE_LOCK(:name)").bindparams(name=str(key))
|
|
689
|
+
).fetchone()
|
|
690
|
+
return bool(row and row[0] == 1)
|
|
691
|
+
row = conn.execute(
|
|
692
|
+
sa.text("SELECT PG_ADVISORY_UNLOCK(:key)").bindparams(key=key)
|
|
693
|
+
).fetchone()
|
|
694
|
+
return bool(row and row[0])
|
|
681
695
|
|
|
682
696
|
def _logical_slot_changes(
|
|
683
697
|
self,
|
|
@@ -1050,15 +1064,13 @@ class Base(object):
|
|
|
1050
1064
|
if not xid8s:
|
|
1051
1065
|
return {}
|
|
1052
1066
|
# TODO: use the SQLAlchemy ORM to handle this query
|
|
1053
|
-
statement = sa.text(
|
|
1054
|
-
"""
|
|
1067
|
+
statement = sa.text("""
|
|
1055
1068
|
SELECT xid AS xid8,
|
|
1056
1069
|
PG_VISIBLE_IN_SNAPSHOT(xid::xid8, PG_CURRENT_SNAPSHOT()) AS visible
|
|
1057
1070
|
FROM UNNEST(CAST(:xid8s AS text[]))
|
|
1058
1071
|
WITH ORDINALITY AS t(xid, ord)
|
|
1059
1072
|
ORDER BY t.ord
|
|
1060
|
-
"""
|
|
1061
|
-
)
|
|
1073
|
+
""")
|
|
1062
1074
|
if self.verbose:
|
|
1063
1075
|
compiled_query(
|
|
1064
1076
|
statement,
|
|
@@ -1397,23 +1409,39 @@ def _pg_engine(
|
|
|
1397
1409
|
if SQLALCHEMY_USE_NULLPOOL:
|
|
1398
1410
|
from sqlalchemy.pool import NullPool
|
|
1399
1411
|
|
|
1400
|
-
|
|
1412
|
+
engine = sa.create_engine(
|
|
1401
1413
|
url,
|
|
1402
1414
|
echo=echo,
|
|
1403
1415
|
connect_args=connect_args,
|
|
1404
1416
|
poolclass=NullPool,
|
|
1405
1417
|
)
|
|
1418
|
+
else:
|
|
1419
|
+
engine = sa.create_engine(
|
|
1420
|
+
url,
|
|
1421
|
+
echo=echo,
|
|
1422
|
+
connect_args=connect_args,
|
|
1423
|
+
pool_size=SQLALCHEMY_POOL_SIZE,
|
|
1424
|
+
max_overflow=SQLALCHEMY_MAX_OVERFLOW,
|
|
1425
|
+
pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
|
|
1426
|
+
pool_recycle=SQLALCHEMY_POOL_RECYCLE,
|
|
1427
|
+
pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
|
|
1428
|
+
)
|
|
1406
1429
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1430
|
+
# Set work_mem on each connection if configured.
|
|
1431
|
+
# This prevents temp file creation during complex queries with
|
|
1432
|
+
# LATERAL JOINs and JSON aggregation (typically needs 12-16MB).
|
|
1433
|
+
if PG_WORK_MEM and not IS_MYSQL_COMPAT:
|
|
1434
|
+
from sqlalchemy import event
|
|
1435
|
+
|
|
1436
|
+
@event.listens_for(engine, "connect")
|
|
1437
|
+
def set_work_mem(dbapi_conn, connection_record):
|
|
1438
|
+
cursor = dbapi_conn.cursor()
|
|
1439
|
+
cursor.execute(f"SET work_mem = '{PG_WORK_MEM}'")
|
|
1440
|
+
cursor.close()
|
|
1441
|
+
|
|
1442
|
+
logger.debug(f"Configured work_mem={PG_WORK_MEM} for new connections")
|
|
1443
|
+
|
|
1444
|
+
return engine
|
|
1417
1445
|
|
|
1418
1446
|
|
|
1419
1447
|
def pg_logical_repl_conn(
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import re
|
|
6
6
|
import threading
|
|
7
7
|
import typing as t
|
|
8
|
+
from collections import deque
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
|
|
10
11
|
import sqlalchemy as sa
|
|
@@ -342,12 +343,12 @@ class Node(object):
|
|
|
342
343
|
child.display(prefix, leaf)
|
|
343
344
|
|
|
344
345
|
def traverse_breadth_first(self) -> t.Generator:
|
|
345
|
-
|
|
346
|
-
while
|
|
347
|
-
node: Node =
|
|
346
|
+
queue: deque = deque([self])
|
|
347
|
+
while queue:
|
|
348
|
+
node: Node = queue.popleft()
|
|
348
349
|
yield node
|
|
349
350
|
for child in node.children:
|
|
350
|
-
|
|
351
|
+
queue.append(child)
|
|
351
352
|
|
|
352
353
|
def traverse_post_order(self) -> t.Generator:
|
|
353
354
|
for child in self.children:
|
|
@@ -210,7 +210,8 @@ class QueryBuilder(threading.local):
|
|
|
210
210
|
getattr(getattr(node, "model", None), "original", None)
|
|
211
211
|
)
|
|
212
212
|
|
|
213
|
-
# merge relationship provided hints (if any)
|
|
213
|
+
# merge relationship provided hints (if any)
|
|
214
|
+
has_explicit_fk = False
|
|
214
215
|
for node in (node_a, node_b):
|
|
215
216
|
rel_fk = getattr(
|
|
216
217
|
getattr(node, "relationship", None), "foreign_key", None
|
|
@@ -218,6 +219,14 @@ class QueryBuilder(threading.local):
|
|
|
218
219
|
if not rel_fk:
|
|
219
220
|
continue
|
|
220
221
|
|
|
222
|
+
# node.relationship.foreign_key describes the FK between node and
|
|
223
|
+
# node.parent (in the tree). Only honor it when the other node IS
|
|
224
|
+
# node's tree parent; otherwise it can leak a middle node's FK to
|
|
225
|
+
# its grandparent into a grandchild join's resolution dict.
|
|
226
|
+
other = node_b if node is node_a else node_a
|
|
227
|
+
if getattr(node, "parent", None) is not other:
|
|
228
|
+
continue
|
|
229
|
+
|
|
221
230
|
parent_tbl_key = node_table_key(node, prefer_parent=True)
|
|
222
231
|
child_tbl_key = node_table_key(node, prefer_parent=False)
|
|
223
232
|
|
|
@@ -241,7 +250,15 @@ class QueryBuilder(threading.local):
|
|
|
241
250
|
# child table cols
|
|
242
251
|
merge_side(getattr(rel_fk, "child", None), child_tbl_key)
|
|
243
252
|
|
|
253
|
+
if getattr(rel_fk, "parent", None) and getattr(
|
|
254
|
+
rel_fk, "child", None
|
|
255
|
+
):
|
|
256
|
+
has_explicit_fk = True
|
|
257
|
+
|
|
244
258
|
# SQLAlchemy introspection in both directions (A -> B and B -> A)
|
|
259
|
+
# Skip when an explicit foreign_key was provided in the schema,
|
|
260
|
+
# otherwise auto discovery adds ALL FKs between the tables which
|
|
261
|
+
# causes mismatches when a child has multiple FKs to the parent.
|
|
245
262
|
A = getattr(getattr(node_a, "model", None), "original", None)
|
|
246
263
|
B = getattr(getattr(node_b, "model", None), "original", None)
|
|
247
264
|
|
|
@@ -252,7 +269,7 @@ class QueryBuilder(threading.local):
|
|
|
252
269
|
def same_table(t1: t.Any, t2: t.Any) -> bool:
|
|
253
270
|
return qname(t1) is not None and qname(t1) == qname(t2)
|
|
254
271
|
|
|
255
|
-
if A is not None and B is not None:
|
|
272
|
+
if not has_explicit_fk and A is not None and B is not None:
|
|
256
273
|
for fk in getattr(A, "foreign_keys", []):
|
|
257
274
|
# does A have an FK pointing to B?
|
|
258
275
|
if same_table(getattr(fk, "column", None).table, B):
|