pgsync 7.0.5__tar.gz → 7.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {pgsync-7.0.5 → pgsync-7.1.0}/PKG-INFO +30 -28
  2. {pgsync-7.0.5 → pgsync-7.1.0}/README.md +108 -1
  3. {pgsync-7.0.5 → pgsync-7.1.0}/README.rst +1 -1
  4. {pgsync-7.0.5 → pgsync-7.1.0}/bin/bootstrap +1 -0
  5. {pgsync-7.0.5 → pgsync-7.1.0}/bin/parallel_sync +1 -0
  6. {pgsync-7.0.5 → pgsync-7.1.0}/bin/pgsync +1 -0
  7. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/__init__.py +1 -1
  8. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/base.py +121 -93
  9. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/node.py +5 -4
  10. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/querybuilder.py +19 -2
  11. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/redisqueue.py +23 -7
  12. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/settings.py +12 -6
  13. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/sync.py +69 -5
  14. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/transform.py +84 -40
  15. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/PKG-INFO +30 -28
  16. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/SOURCES.txt +3 -0
  17. pgsync-7.1.0/pgsync.egg-info/requires.txt +36 -0
  18. pgsync-7.1.0/pyproject.toml +3 -0
  19. {pgsync-7.0.5 → pgsync-7.1.0}/requirements/base.txt +40 -27
  20. {pgsync-7.0.5 → pgsync-7.1.0}/requirements/dev.txt +66 -53
  21. {pgsync-7.0.5 → pgsync-7.1.0}/setup.py +2 -2
  22. {pgsync-7.0.5 → pgsync-7.1.0}/tests/conftest.py +31 -0
  23. pgsync-7.1.0/tests/test_base.py +1599 -0
  24. pgsync-7.1.0/tests/test_bug_regressions.py +136 -0
  25. pgsync-7.1.0/tests/test_exc.py +322 -0
  26. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_node.py +359 -0
  27. pgsync-7.1.0/tests/test_plugin.py +437 -0
  28. pgsync-7.1.0/tests/test_query_builder.py +2552 -0
  29. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_redisqueue.py +58 -0
  30. pgsync-7.1.0/tests/test_search_client.py +648 -0
  31. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_settings.py +4 -1
  32. pgsync-7.1.0/tests/test_sync.py +3210 -0
  33. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_transform.py +474 -1
  34. pgsync-7.1.0/tests/test_utils.py +775 -0
  35. pgsync-7.1.0/tests/test_view.py +662 -0
  36. pgsync-7.0.5/pgsync.egg-info/requires.txt +0 -33
  37. pgsync-7.0.5/pyproject.toml +0 -3
  38. pgsync-7.0.5/tests/test_base.py +0 -676
  39. pgsync-7.0.5/tests/test_query_builder.py +0 -102
  40. pgsync-7.0.5/tests/test_search_client.py +0 -115
  41. pgsync-7.0.5/tests/test_sync.py +0 -1450
  42. pgsync-7.0.5/tests/test_utils.py +0 -229
  43. pgsync-7.0.5/tests/test_view.py +0 -302
  44. {pgsync-7.0.5 → pgsync-7.1.0}/AUTHORS.rst +0 -0
  45. {pgsync-7.0.5 → pgsync-7.1.0}/CONTRIBUTING.rst +0 -0
  46. {pgsync-7.0.5 → pgsync-7.1.0}/HISTORY.rst +0 -0
  47. {pgsync-7.0.5 → pgsync-7.1.0}/LICENSE +0 -0
  48. {pgsync-7.0.5 → pgsync-7.1.0}/MANIFEST.in +0 -0
  49. {pgsync-7.0.5 → pgsync-7.1.0}/docs/Makefile +0 -0
  50. {pgsync-7.0.5 → pgsync-7.1.0}/docs/authors.rst +0 -0
  51. {pgsync-7.0.5 → pgsync-7.1.0}/docs/changelog.rst +0 -0
  52. {pgsync-7.0.5 → pgsync-7.1.0}/docs/conf.py +0 -0
  53. {pgsync-7.0.5 → pgsync-7.1.0}/docs/contributing.rst +0 -0
  54. {pgsync-7.0.5 → pgsync-7.1.0}/docs/history.rst +0 -0
  55. {pgsync-7.0.5 → pgsync-7.1.0}/docs/index.rst +0 -0
  56. {pgsync-7.0.5 → pgsync-7.1.0}/docs/installation.rst +0 -0
  57. {pgsync-7.0.5 → pgsync-7.1.0}/docs/logo.png +0 -0
  58. {pgsync-7.0.5 → pgsync-7.1.0}/docs/make.bat +0 -0
  59. {pgsync-7.0.5 → pgsync-7.1.0}/docs/readme.rst +0 -0
  60. {pgsync-7.0.5 → pgsync-7.1.0}/docs/usage.rst +0 -0
  61. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/constants.py +0 -0
  62. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/exc.py +0 -0
  63. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/helper.py +0 -0
  64. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/plugin.py +0 -0
  65. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/search_client.py +0 -0
  66. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/singleton.py +0 -0
  67. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/trigger.py +0 -0
  68. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/urls.py +0 -0
  69. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/utils.py +0 -0
  70. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync/view.py +0 -0
  71. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
  72. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/not-zip-safe +0 -0
  73. {pgsync-7.0.5 → pgsync-7.1.0}/pgsync.egg-info/top_level.txt +0 -0
  74. {pgsync-7.0.5 → pgsync-7.1.0}/setup.cfg +0 -0
  75. {pgsync-7.0.5 → pgsync-7.1.0}/tests/__init__.py +0 -0
  76. {pgsync-7.0.5 → pgsync-7.1.0}/tests/fixtures/schema.json +0 -0
  77. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_constants.py +0 -0
  78. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_env_vars.py +0 -0
  79. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_helper.py +0 -0
  80. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_log_handlers.py +0 -0
  81. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_nested_children.py +0 -0
  82. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_root.py +0 -0
  83. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
  84. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
  85. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_trigger.py +0 -0
  86. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_unique_behaviour.py +0 -0
  87. {pgsync-7.0.5 → pgsync-7.1.0}/tests/test_urls.py +0 -0
  88. {pgsync-7.0.5 → pgsync-7.1.0}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 7.0.5
3
+ Version: 7.1.0
4
4
  Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -17,7 +17,6 @@ Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres,mysql,mar
17
17
  Classifier: Development Status :: 5 - Production/Stable
18
18
  Classifier: Intended Audience :: Developers
19
19
  Classifier: Natural Language :: English
20
- Classifier: Programming Language :: Python :: 3.9
21
20
  Classifier: Programming Language :: Python :: 3.10
22
21
  Classifier: Programming Language :: Python :: 3.11
23
22
  Classifier: Programming Language :: Python :: 3.12
@@ -27,43 +26,46 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
27
26
  Classifier: Programming Language :: Python :: Implementation :: PyPy
28
27
  Classifier: License :: OSI Approved :: MIT License
29
28
  Classifier: Operating System :: OS Independent
30
- Requires-Python: >=3.9.0
29
+ Requires-Python: >=3.10.0
31
30
  Description-Content-Type: text/x-rst
32
31
  License-File: LICENSE
33
32
  License-File: AUTHORS.rst
34
33
  Requires-Dist: async-timeout==5.0.1
35
34
  Requires-Dist: backports-datetime-fromisoformat==2.0.3
36
- Requires-Dist: boto3==1.42.21
37
- Requires-Dist: botocore==1.42.21
38
- Requires-Dist: certifi==2026.1.4
39
- Requires-Dist: charset-normalizer==3.4.4
40
- Requires-Dist: click==8.1.8
41
- Requires-Dist: elastic-transport==9.1.0
42
- Requires-Dist: elasticsearch==7.17.12
35
+ Requires-Dist: boto3==1.43.36
36
+ Requires-Dist: botocore==1.43.36
37
+ Requires-Dist: certifi==2026.6.17
38
+ Requires-Dist: charset-normalizer==3.4.7
39
+ Requires-Dist: click==8.4.2
40
+ Requires-Dist: elastic-transport==9.4.2
41
+ Requires-Dist: elasticsearch==7.17.13
43
42
  Requires-Dist: elasticsearch-dsl==7.4.1
44
- Requires-Dist: environs==14.4.0
43
+ Requires-Dist: environs==15.0.1
45
44
  Requires-Dist: events==0.5
46
- Requires-Dist: greenlet==3.2.4
47
- Requires-Dist: idna==3.11
48
- Requires-Dist: jmespath==1.0.1
49
- Requires-Dist: marshmallow==4.0.1
50
- Requires-Dist: mysql-replication==1.0.12
45
+ Requires-Dist: grpcio==1.81.1
46
+ Requires-Dist: idna==3.18
47
+ Requires-Dist: jmespath==1.1.0
48
+ Requires-Dist: marshmallow==4.3.0
49
+ Requires-Dist: mysql-replication==1.0.15
51
50
  Requires-Dist: opensearch-dsl==2.1.0
52
- Requires-Dist: opensearch-py==3.0.0
53
- Requires-Dist: packaging==25.0
54
- Requires-Dist: psycopg2-binary==2.9.11
55
- Requires-Dist: pymysql==1.1.2
51
+ Requires-Dist: opensearch-protobufs==1.2.0
52
+ Requires-Dist: opensearch-py==3.2.0
53
+ Requires-Dist: packaging==26.2
54
+ Requires-Dist: protobuf==7.35.1
55
+ Requires-Dist: psycopg2-binary==2.9.12
56
+ Requires-Dist: pymysql==1.2.0
56
57
  Requires-Dist: python-dateutil==2.9.0.post0
57
- Requires-Dist: python-dotenv==1.2.1
58
- Requires-Dist: redis==7.0.1
59
- Requires-Dist: requests==2.32.5
60
- Requires-Dist: requests-aws4auth==1.3.1
61
- Requires-Dist: s3transfer==0.16.0
58
+ Requires-Dist: python-dotenv==1.2.2
59
+ Requires-Dist: redis==8.0.1
60
+ Requires-Dist: requests==2.34.2
61
+ Requires-Dist: requests-aws4auth==1.3.2
62
+ Requires-Dist: s3transfer==0.19.0
62
63
  Requires-Dist: six==1.17.0
63
- Requires-Dist: sqlalchemy==2.0.45
64
+ Requires-Dist: sniffio==1.3.1
65
+ Requires-Dist: sqlalchemy==2.0.51
64
66
  Requires-Dist: sqlparse==0.5.5
65
67
  Requires-Dist: typing-extensions==4.15.0
66
- Requires-Dist: urllib3==1.26.20
68
+ Requires-Dist: urllib3==2.7.0
67
69
  Dynamic: author
68
70
  Dynamic: author-email
69
71
  Dynamic: classifier
@@ -121,7 +123,7 @@ Key Features
121
123
  Requirements
122
124
  ------------
123
125
 
124
- - `Python <https://www.python.org>`_ 3.9+
126
+ - `Python <https://www.python.org>`_ 3.10+
125
127
  - `PostgreSQL <https://www.postgresql.org>`_ 9.6+ or `MySQL <https://www.mysql.com>`_ 8.0.0+ or `MariaDB <https://mariadb.org>`_ 12.0.0+
126
128
  - `Redis <https://redis.io>`_ 3.1.0+ or `Valkey <https://valkey.io>`_ 7.2.0+ (optional in WAL mode)
127
129
  - `Elasticsearch <https://www.elastic.co/products/elastic-stack>`_ 6.3.1+ or `OpenSearch <https://opensearch.org>`_ 1.3.7+
@@ -27,6 +27,7 @@
27
27
  PGSync is a change data capture tool that syncs data from **PostgreSQL**, **MySQL**, or **MariaDB** to **Elasticsearch** or **OpenSearch** in real-time. Define your document structure in JSON, and PGSync handles the rest — no custom code required.
28
28
 
29
29
  ```mermaid
30
+ %%{init: {'look': 'handDrawn', 'theme': 'neutral'}}%%
30
31
  flowchart LR
31
32
  subgraph Source["🗄️ Source Database"]
32
33
  DB[(PostgreSQL<br/>MySQL<br/>MariaDB)]
@@ -86,12 +87,28 @@ pgsync --config schema.json -d
86
87
 
87
88
  ### Using Docker Compose
88
89
 
90
+ **Default (Elasticsearch + Kibana):**
89
91
  ```bash
90
92
  git clone https://github.com/toluaina/pgsync
91
93
  cd pgsync
92
94
  docker-compose up
93
95
  ```
94
96
 
97
+ This starts PostgreSQL, Redis, Elasticsearch, Kibana, and PGSync configured for Elasticsearch.
98
+
99
+ **For OpenSearch:**
100
+ ```bash
101
+ docker-compose --profile opensearch up
102
+ ```
103
+
104
+ This starts PostgreSQL, Redis, OpenSearch, and PGSync configured for OpenSearch.
105
+
106
+ **Ports:**
107
+ - PostgreSQL: `15432`
108
+ - Elasticsearch: `9201` (default)
109
+ - Kibana: `5601` (default)
110
+ - OpenSearch: `9400` (OpenSearch profile)
111
+
95
112
  ---
96
113
 
97
114
  ## How It Works
@@ -137,7 +154,7 @@ Changes to any related table automatically update the document in Elasticsearch/
137
154
 
138
155
  | Component | Version |
139
156
  |-----------|---------|
140
- | ![Python](https://img.shields.io/badge/Python-3.9+-3776AB?logo=python&logoColor=white) | 3.9+ |
157
+ | ![Python](https://img.shields.io/badge/Python-3.10+-3776AB?logo=python&logoColor=white) | 3.10+ |
141
158
  | ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-9.6+-4169E1?logo=postgresql&logoColor=white) | 9.6+ (or MySQL 5.7.22+ / MariaDB 10.5+) |
142
159
  | ![Elasticsearch](https://img.shields.io/badge/Elasticsearch-6.3+-005571?logo=elasticsearch&logoColor=white) | 6.3.1+ (or OpenSearch 1.3.7+) |
143
160
  | ![Redis](https://img.shields.io/badge/Redis-3.1+-DC382D?logo=redis&logoColor=white) | 3.1+ (or Valkey 7.2+) — optional in WAL mode |
@@ -225,6 +242,96 @@ PGSync transforms this into search-ready documents:
225
242
 
226
243
  ---
227
244
 
245
+ ## Transforms
246
+
247
+ PGSync supports built-in transforms to modify field values before indexing. Transforms are applied in order: `replace` → `rename` → `concat`.
248
+
249
+ ### Replace
250
+
251
+ Find and replace substrings within field values:
252
+
253
+ ```json
254
+ {
255
+ "table": "product",
256
+ "columns": ["code", "name"],
257
+ "transform": {
258
+ "replace": {
259
+ "code": {
260
+ "-": "/",
261
+ "_": " "
262
+ }
263
+ }
264
+ }
265
+ }
266
+ ```
267
+
268
+ | Before | After |
269
+ |--------|-------|
270
+ | `ABC-DEF_GHI` | `ABC/DEF GHI` |
271
+
272
+ ### Rename
273
+
274
+ Rename fields in the output document:
275
+
276
+ ```json
277
+ {
278
+ "table": "book",
279
+ "columns": ["id", "title"],
280
+ "transform": {
281
+ "rename": {
282
+ "id": "book_id",
283
+ "title": "book_title"
284
+ }
285
+ }
286
+ }
287
+ ```
288
+
289
+ ### Concat
290
+
291
+ Combine multiple fields into a new field:
292
+
293
+ ```json
294
+ {
295
+ "table": "user",
296
+ "columns": ["first_name", "last_name"],
297
+ "transform": {
298
+ "concat": {
299
+ "columns": ["first_name", "last_name"],
300
+ "destination": "full_name",
301
+ "delimiter": " "
302
+ }
303
+ }
304
+ }
305
+ ```
306
+
307
+ ### Combined Example
308
+
309
+ Transforms can be combined and applied to nested children:
310
+
311
+ ```json
312
+ {
313
+ "table": "book",
314
+ "columns": ["isbn", "title"],
315
+ "children": [{
316
+ "table": "publisher",
317
+ "columns": ["code", "name"],
318
+ "transform": {
319
+ "replace": { "code": { "-": "." } },
320
+ "rename": { "name": "publisher_name" }
321
+ }
322
+ }],
323
+ "transform": {
324
+ "concat": {
325
+ "columns": ["isbn", "title"],
326
+ "destination": "search_text",
327
+ "delimiter": " - "
328
+ }
329
+ }
330
+ }
331
+ ```
332
+
333
+ ---
334
+
228
335
  ## Why PGSync?
229
336
 
230
337
  | Challenge | PGSync Solution |
@@ -39,7 +39,7 @@ Key Features
39
39
  Requirements
40
40
  ------------
41
41
 
42
- - `Python <https://www.python.org>`_ 3.9+
42
+ - `Python <https://www.python.org>`_ 3.10+
43
43
  - `PostgreSQL <https://www.postgresql.org>`_ 9.6+ or `MySQL <https://www.mysql.com>`_ 8.0.0+ or `MariaDB <https://mariadb.org>`_ 12.0.0+
44
44
  - `Redis <https://redis.io>`_ 3.1.0+ or `Valkey <https://valkey.io>`_ 7.2.0+ (optional in WAL mode)
45
45
  - `Elasticsearch <https://www.elastic.co/products/elastic-stack>`_ 6.3.1+ or `OpenSearch <https://opensearch.org>`_ 1.3.7+
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
 
3
3
  """PGSync bootstrap."""
4
+
4
5
  import logging
5
6
 
6
7
  import click
@@ -31,6 +31,7 @@ The sync process works as follows:
31
31
  This parallel approach significantly improves synchronization performance,
32
32
  especially in high-latency network environments.
33
33
  """
34
+
34
35
  import asyncio
35
36
  import multiprocessing
36
37
  import os
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
 
3
3
  """PGSync runtime."""
4
+
4
5
  from pgsync import sync
5
6
 
6
7
  if __name__ == "__main__":
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "7.0.5"
5
+ __version__ = "7.1.0"
@@ -43,6 +43,7 @@ from .settings import (
43
43
  PG_SSLROOTCERT,
44
44
  PG_URL_RO,
45
45
  PG_USER_RO,
46
+ PG_WORK_MEM,
46
47
  QUERY_CHUNK_SIZE,
47
48
  SQLALCHEMY_MAX_OVERFLOW,
48
49
  SQLALCHEMY_POOL_PRE_PING,
@@ -242,6 +243,10 @@ class Base(object):
242
243
  self.verbose: bool = verbose
243
244
  self._conn = None
244
245
  self._session = None
246
+ # Per-thread set of advisory lock keys currently held, to make
247
+ # advisory_lock() re-entrant without requiring the nested call to
248
+ # acquire a second backend connection.
249
+ self._advisory_locks_held = threading.local()
245
250
 
246
251
  def connect(self) -> None:
247
252
  """Connect to database."""
@@ -567,50 +572,6 @@ class Base(object):
567
572
  )
568
573
  return row[0]
569
574
 
570
- def pg_try_advisory_lock(
571
- self, key: t.Union[int, str], timeout: int = 0
572
- ) -> bool:
573
- """
574
- Attempts to acquire an dvisory/named lock based on a hashed slot name without blocking.
575
-
576
- PostgreSQL: integer key -> PG_TRY_ADVISORY_LOCK(key) -> bool
577
- MySQL/MariaDB: string name -> GET_LOCK(name, timeout) -> 1 on success
578
- (timeout defaults to 0 = non-blocking)
579
-
580
- Returns:
581
- bool: True if the lock was acquired, False otherwise.
582
- """
583
- if self.is_mysql_compat:
584
- row = self.fetchone(
585
- sa.text("SELECT GET_LOCK(:name, :timeout)").bindparams(
586
- name=str(key), timeout=int(timeout)
587
- )
588
- )
589
- return bool(row and row[0] == 1)
590
-
591
- row = self.fetchone(
592
- sa.text("SELECT PG_TRY_ADVISORY_LOCK(:key)").bindparams(key=key)
593
- )
594
- return bool(row and row[0])
595
-
596
- def pg_advisory_unlock(self, key: t.Union[int, str]) -> bool:
597
- """
598
- Releases an advisory lock associated with the hashed slot name.
599
-
600
- Returns:
601
- bool: True if the lock was released, False if it was not held.
602
- """
603
- if self.is_mysql_compat:
604
- row = self.fetchone(
605
- sa.text("SELECT RELEASE_LOCK(:name)").bindparams(name=str(key))
606
- )
607
- return bool(row and row[0] == 1)
608
-
609
- row = self.fetchone(
610
- sa.text("SELECT PG_ADVISORY_UNLOCK(:key)").bindparams(key=key)
611
- )
612
- return bool(row and row[0])
613
-
614
575
  @contextmanager
615
576
  def advisory_lock(
616
577
  self,
@@ -626,58 +587,111 @@ class Base(object):
626
587
  Context manager to acquire a PostgreSQL advisory lock with optional retries.
627
588
  Acquire a PostgreSQL advisory lock with retries, backoff, and jitter.
628
589
  Jitter reduces lock-step contention so callers don't starve.
590
+
591
+ A single connection is held for the entire lifetime of the context
592
+ so that lock and unlock always run on the same PostgreSQL backend,
593
+ preventing advisory lock leaks. Re-entrant within the same thread:
594
+ nested calls for the same slot are no-ops.
629
595
  """
630
596
  key: int = self.advisory_key(slot_name)
597
+
598
+ held: set = getattr(self._advisory_locks_held, "keys", None)
599
+ if held is None:
600
+ held = set()
601
+ self._advisory_locks_held.keys = held
602
+
603
+ if key in held:
604
+ # Already held in this thread; nested acquires are a no-op.
605
+ yield
606
+ return
607
+
631
608
  attempt: int = 0
632
609
 
633
610
  base_delay: float = float(retry_interval)
634
611
  # current backoff window (seconds)
635
612
  delay: float = base_delay
636
613
 
637
- while True:
638
- if self.pg_try_advisory_lock(key):
639
- break
614
+ with self.engine.connect() as conn:
615
+ while True:
616
+ if self._try_lock_on(conn, key):
617
+ break
640
618
 
641
- if (max_retries is not None) and (attempt >= max_retries):
642
- raise RuntimeError(
643
- f"Failed to acquire advisory lock for '{slot_name}' after {max_retries} retries."
644
- )
619
+ if (max_retries is not None) and (attempt >= max_retries):
620
+ raise RuntimeError(
621
+ f"Failed to acquire advisory lock for '{slot_name}' after {max_retries} retries."
622
+ )
645
623
 
646
- # Compute sleep using jitter strategy
647
- if jitter == "decorrelated":
648
- # Decorrelated jitter chooses the *next* delay first.
649
- delay = min(max_delay, random.uniform(base_delay, delay * 3))
650
- sleep_for = delay
651
- else:
652
- # For other modes, sleep is derived from current delay.
653
- if jitter == "full":
654
- sleep_for = random.uniform(0.0, delay)
655
- elif jitter == "equal":
656
- sleep_for = (delay / 2.0) + random.uniform(
657
- 0.0, delay / 2.0
624
+ # Compute sleep using jitter strategy
625
+ if jitter == "decorrelated":
626
+ # Decorrelated jitter chooses the *next* delay first.
627
+ delay = min(
628
+ max_delay, random.uniform(base_delay, delay * 3)
658
629
  )
659
- elif jitter == "none":
660
630
  sleep_for = delay
661
631
  else:
662
- # Fallback to full jitter if an unknown option is passed
663
- sleep_for = random.uniform(0.0, delay)
632
+ # For other modes, sleep is derived from current delay.
633
+ if jitter == "full":
634
+ sleep_for = random.uniform(0.0, delay)
635
+ elif jitter == "equal":
636
+ sleep_for = (delay / 2.0) + random.uniform(
637
+ 0.0, delay / 2.0
638
+ )
639
+ elif jitter == "none":
640
+ sleep_for = delay
641
+ else:
642
+ # Fallback to full jitter if an unknown option is passed
643
+ sleep_for = random.uniform(0.0, delay)
664
644
 
665
- time.sleep(max(0.0, sleep_for))
645
+ time.sleep(max(0.0, sleep_for))
666
646
 
667
- # Increase delay for next attempt (except decorrelated which already advanced)
668
- if backoff_type == "exponential" and jitter != "decorrelated":
669
- delay = min(max_delay, delay * backoff_factor)
670
- # For fixed backoff, 'delay' stays at base_delay unless decorrelated changed it.
647
+ # Increase delay for next attempt (except decorrelated which already advanced)
648
+ if backoff_type == "exponential" and jitter != "decorrelated":
649
+ delay = min(max_delay, delay * backoff_factor)
650
+ # For fixed backoff, 'delay' stays at base_delay unless decorrelated changed it.
671
651
 
672
- attempt += 1
652
+ attempt += 1
673
653
 
674
- try:
675
- yield
676
- finally:
654
+ held.add(key)
677
655
  try:
678
- self.pg_advisory_unlock(key)
679
- except Exception:
680
- pass
656
+ yield
657
+ finally:
658
+ held.discard(key)
659
+ try:
660
+ self._unlock_on(conn, key)
661
+ except Exception:
662
+ pass
663
+
664
+ # ------------------------------------------------------------------
665
+ # Internal helpers that run lock/unlock on an explicit connection
666
+ # ------------------------------------------------------------------
667
+
668
+ def _try_lock_on(
669
+ self, conn: sa.engine.Connection, key: t.Union[int, str]
670
+ ) -> bool:
671
+ """Acquire an advisory lock on *conn* without blocking."""
672
+ if self.is_mysql_compat:
673
+ row = conn.execute(
674
+ sa.text("SELECT GET_LOCK(:name, 0)").bindparams(name=str(key))
675
+ ).fetchone()
676
+ return bool(row and row[0] == 1)
677
+ row = conn.execute(
678
+ sa.text("SELECT PG_TRY_ADVISORY_LOCK(:key)").bindparams(key=key)
679
+ ).fetchone()
680
+ return bool(row and row[0])
681
+
682
+ def _unlock_on(
683
+ self, conn: sa.engine.Connection, key: t.Union[int, str]
684
+ ) -> bool:
685
+ """Release an advisory lock on *conn*."""
686
+ if self.is_mysql_compat:
687
+ row = conn.execute(
688
+ sa.text("SELECT RELEASE_LOCK(:name)").bindparams(name=str(key))
689
+ ).fetchone()
690
+ return bool(row and row[0] == 1)
691
+ row = conn.execute(
692
+ sa.text("SELECT PG_ADVISORY_UNLOCK(:key)").bindparams(key=key)
693
+ ).fetchone()
694
+ return bool(row and row[0])
681
695
 
682
696
  def _logical_slot_changes(
683
697
  self,
@@ -1050,15 +1064,13 @@ class Base(object):
1050
1064
  if not xid8s:
1051
1065
  return {}
1052
1066
  # TODO: use the SQLAlchemy ORM to handle this query
1053
- statement = sa.text(
1054
- """
1067
+ statement = sa.text("""
1055
1068
  SELECT xid AS xid8,
1056
1069
  PG_VISIBLE_IN_SNAPSHOT(xid::xid8, PG_CURRENT_SNAPSHOT()) AS visible
1057
1070
  FROM UNNEST(CAST(:xid8s AS text[]))
1058
1071
  WITH ORDINALITY AS t(xid, ord)
1059
1072
  ORDER BY t.ord
1060
- """
1061
- )
1073
+ """)
1062
1074
  if self.verbose:
1063
1075
  compiled_query(
1064
1076
  statement,
@@ -1397,23 +1409,39 @@ def _pg_engine(
1397
1409
  if SQLALCHEMY_USE_NULLPOOL:
1398
1410
  from sqlalchemy.pool import NullPool
1399
1411
 
1400
- return sa.create_engine(
1412
+ engine = sa.create_engine(
1401
1413
  url,
1402
1414
  echo=echo,
1403
1415
  connect_args=connect_args,
1404
1416
  poolclass=NullPool,
1405
1417
  )
1418
+ else:
1419
+ engine = sa.create_engine(
1420
+ url,
1421
+ echo=echo,
1422
+ connect_args=connect_args,
1423
+ pool_size=SQLALCHEMY_POOL_SIZE,
1424
+ max_overflow=SQLALCHEMY_MAX_OVERFLOW,
1425
+ pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
1426
+ pool_recycle=SQLALCHEMY_POOL_RECYCLE,
1427
+ pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
1428
+ )
1406
1429
 
1407
- return sa.create_engine(
1408
- url,
1409
- echo=echo,
1410
- connect_args=connect_args,
1411
- pool_size=SQLALCHEMY_POOL_SIZE,
1412
- max_overflow=SQLALCHEMY_MAX_OVERFLOW,
1413
- pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
1414
- pool_recycle=SQLALCHEMY_POOL_RECYCLE,
1415
- pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
1416
- )
1430
+ # Set work_mem on each connection if configured.
1431
+ # This prevents temp file creation during complex queries with
1432
+ # LATERAL JOINs and JSON aggregation (typically needs 12-16MB).
1433
+ if PG_WORK_MEM and not IS_MYSQL_COMPAT:
1434
+ from sqlalchemy import event
1435
+
1436
+ @event.listens_for(engine, "connect")
1437
+ def set_work_mem(dbapi_conn, connection_record):
1438
+ cursor = dbapi_conn.cursor()
1439
+ cursor.execute(f"SET work_mem = '{PG_WORK_MEM}'")
1440
+ cursor.close()
1441
+
1442
+ logger.debug(f"Configured work_mem={PG_WORK_MEM} for new connections")
1443
+
1444
+ return engine
1417
1445
 
1418
1446
 
1419
1447
  def pg_logical_repl_conn(
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import re
6
6
  import threading
7
7
  import typing as t
8
+ from collections import deque
8
9
  from dataclasses import dataclass
9
10
 
10
11
  import sqlalchemy as sa
@@ -342,12 +343,12 @@ class Node(object):
342
343
  child.display(prefix, leaf)
343
344
 
344
345
  def traverse_breadth_first(self) -> t.Generator:
345
- stack: t.List[Node] = [self]
346
- while stack:
347
- node: Node = stack.pop(0)
346
+ queue: deque = deque([self])
347
+ while queue:
348
+ node: Node = queue.popleft()
348
349
  yield node
349
350
  for child in node.children:
350
- stack.append(child)
351
+ queue.append(child)
351
352
 
352
353
  def traverse_post_order(self) -> t.Generator:
353
354
  for child in self.children:
@@ -210,7 +210,8 @@ class QueryBuilder(threading.local):
210
210
  getattr(getattr(node, "model", None), "original", None)
211
211
  )
212
212
 
213
- # merge relationship provided hints (if any); do NOT short-circuit
213
+ # merge relationship provided hints (if any)
214
+ has_explicit_fk = False
214
215
  for node in (node_a, node_b):
215
216
  rel_fk = getattr(
216
217
  getattr(node, "relationship", None), "foreign_key", None
@@ -218,6 +219,14 @@ class QueryBuilder(threading.local):
218
219
  if not rel_fk:
219
220
  continue
220
221
 
222
+ # node.relationship.foreign_key describes the FK between node and
223
+ # node.parent (in the tree). Only honor it when the other node IS
224
+ # node's tree parent; otherwise it can leak a middle node's FK to
225
+ # its grandparent into a grandchild join's resolution dict.
226
+ other = node_b if node is node_a else node_a
227
+ if getattr(node, "parent", None) is not other:
228
+ continue
229
+
221
230
  parent_tbl_key = node_table_key(node, prefer_parent=True)
222
231
  child_tbl_key = node_table_key(node, prefer_parent=False)
223
232
 
@@ -241,7 +250,15 @@ class QueryBuilder(threading.local):
241
250
  # child table cols
242
251
  merge_side(getattr(rel_fk, "child", None), child_tbl_key)
243
252
 
253
+ if getattr(rel_fk, "parent", None) and getattr(
254
+ rel_fk, "child", None
255
+ ):
256
+ has_explicit_fk = True
257
+
244
258
  # SQLAlchemy introspection in both directions (A -> B and B -> A)
259
+ # Skip when an explicit foreign_key was provided in the schema,
260
+ # otherwise auto discovery adds ALL FKs between the tables which
261
+ # causes mismatches when a child has multiple FKs to the parent.
245
262
  A = getattr(getattr(node_a, "model", None), "original", None)
246
263
  B = getattr(getattr(node_b, "model", None), "original", None)
247
264
 
@@ -252,7 +269,7 @@ class QueryBuilder(threading.local):
252
269
  def same_table(t1: t.Any, t2: t.Any) -> bool:
253
270
  return qname(t1) is not None and qname(t1) == qname(t2)
254
271
 
255
- if A is not None and B is not None:
272
+ if not has_explicit_fk and A is not None and B is not None:
256
273
  for fk in getattr(A, "foreign_keys", []):
257
274
  # does A have an FK pointing to B?
258
275
  if same_table(getattr(fk, "column", None).table, B):