pgsync 5.0.0__tar.gz → 6.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pgsync-5.0.0 → pgsync-6.1.0}/PKG-INFO +13 -12
  2. {pgsync-5.0.0 → pgsync-6.1.0}/README.md +58 -29
  3. {pgsync-5.0.0 → pgsync-6.1.0}/README.rst +1 -1
  4. {pgsync-5.0.0 → pgsync-6.1.0}/bin/bootstrap +26 -5
  5. {pgsync-5.0.0 → pgsync-6.1.0}/bin/parallel_sync +33 -7
  6. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/__init__.py +1 -1
  7. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/base.py +79 -18
  8. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/constants.py +2 -0
  9. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/helper.py +10 -3
  10. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/node.py +5 -3
  11. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/plugin.py +2 -2
  12. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/redisqueue.py +8 -3
  13. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/settings.py +18 -0
  14. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/singleton.py +1 -1
  15. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/sync.py +358 -149
  16. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/trigger.py +24 -4
  17. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/urls.py +14 -2
  18. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/utils.py +70 -13
  19. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/view.py +65 -11
  20. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/PKG-INFO +13 -12
  21. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/requires.txt +10 -10
  22. {pgsync-5.0.0 → pgsync-6.1.0}/setup.py +1 -0
  23. {pgsync-5.0.0 → pgsync-6.1.0}/tests/conftest.py +46 -14
  24. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_constants.py +1 -0
  25. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync.py +12 -5
  26. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_nested_children.py +35 -3
  27. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_root.py +19 -2
  28. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_child.py +24 -2
  29. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_sync_single_child_fk_on_parent.py +24 -2
  30. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_trigger.py +24 -4
  31. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_unique_behaviour.py +1 -2
  32. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_urls.py +2 -2
  33. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_utils.py +13 -1
  34. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_view.py +3 -2
  35. {pgsync-5.0.0 → pgsync-6.1.0}/AUTHORS.rst +0 -0
  36. {pgsync-5.0.0 → pgsync-6.1.0}/CONTRIBUTING.rst +0 -0
  37. {pgsync-5.0.0 → pgsync-6.1.0}/HISTORY.rst +0 -0
  38. {pgsync-5.0.0 → pgsync-6.1.0}/LICENSE +0 -0
  39. {pgsync-5.0.0 → pgsync-6.1.0}/MANIFEST.in +0 -0
  40. {pgsync-5.0.0 → pgsync-6.1.0}/bin/pgsync +0 -0
  41. {pgsync-5.0.0 → pgsync-6.1.0}/docs/Makefile +0 -0
  42. {pgsync-5.0.0 → pgsync-6.1.0}/docs/authors.rst +0 -0
  43. {pgsync-5.0.0 → pgsync-6.1.0}/docs/changelog.rst +0 -0
  44. {pgsync-5.0.0 → pgsync-6.1.0}/docs/conf.py +0 -0
  45. {pgsync-5.0.0 → pgsync-6.1.0}/docs/contributing.rst +0 -0
  46. {pgsync-5.0.0 → pgsync-6.1.0}/docs/history.rst +0 -0
  47. {pgsync-5.0.0 → pgsync-6.1.0}/docs/index.rst +0 -0
  48. {pgsync-5.0.0 → pgsync-6.1.0}/docs/installation.rst +0 -0
  49. {pgsync-5.0.0 → pgsync-6.1.0}/docs/logo.png +0 -0
  50. {pgsync-5.0.0 → pgsync-6.1.0}/docs/make.bat +0 -0
  51. {pgsync-5.0.0 → pgsync-6.1.0}/docs/readme.rst +0 -0
  52. {pgsync-5.0.0 → pgsync-6.1.0}/docs/usage.rst +0 -0
  53. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/exc.py +0 -0
  54. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/querybuilder.py +0 -0
  55. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/search_client.py +0 -0
  56. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync/transform.py +0 -0
  57. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/SOURCES.txt +0 -0
  58. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
  59. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/not-zip-safe +0 -0
  60. {pgsync-5.0.0 → pgsync-6.1.0}/pgsync.egg-info/top_level.txt +0 -0
  61. {pgsync-5.0.0 → pgsync-6.1.0}/pyproject.toml +0 -0
  62. {pgsync-5.0.0 → pgsync-6.1.0}/setup.cfg +0 -0
  63. {pgsync-5.0.0 → pgsync-6.1.0}/tests/__init__.py +0 -0
  64. {pgsync-5.0.0 → pgsync-6.1.0}/tests/fixtures/schema.json +0 -0
  65. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_base.py +0 -0
  66. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_env_vars.py +0 -0
  67. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_helper.py +0 -0
  68. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_log_handlers.py +0 -0
  69. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_node.py +0 -0
  70. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_query_builder.py +0 -0
  71. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_redisqueue.py +0 -0
  72. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_search_client.py +0 -0
  73. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_settings.py +0 -0
  74. {pgsync-5.0.0 → pgsync-6.1.0}/tests/test_transform.py +0 -0
  75. {pgsync-5.0.0 → pgsync-6.1.0}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 5.0.0
3
+ Version: 6.1.0
4
4
  Summary: Postgres/MySQL/MariaDB to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Classifier: Programming Language :: Python :: Implementation :: CPython
26
27
  Classifier: Programming Language :: Python :: Implementation :: PyPy
27
28
  Classifier: License :: OSI Approved :: MIT License
@@ -32,31 +33,31 @@ License-File: LICENSE
32
33
  License-File: AUTHORS.rst
33
34
  Requires-Dist: async-timeout==5.0.1
34
35
  Requires-Dist: backports-datetime-fromisoformat==2.0.3
35
- Requires-Dist: boto3==1.40.50
36
- Requires-Dist: botocore==1.40.50
37
- Requires-Dist: certifi==2025.10.5
38
- Requires-Dist: charset-normalizer==3.4.3
36
+ Requires-Dist: boto3==1.41.2
37
+ Requires-Dist: botocore==1.41.2
38
+ Requires-Dist: certifi==2025.11.12
39
+ Requires-Dist: charset-normalizer==3.4.4
39
40
  Requires-Dist: click==8.1.8
40
41
  Requires-Dist: elastic-transport==9.1.0
41
42
  Requires-Dist: elasticsearch==7.17.12
42
43
  Requires-Dist: elasticsearch-dsl==7.4.1
43
- Requires-Dist: environs==14.3.0
44
+ Requires-Dist: environs==14.4.0
44
45
  Requires-Dist: events==0.5
45
- Requires-Dist: idna==3.10
46
+ Requires-Dist: idna==3.11
46
47
  Requires-Dist: jmespath==1.0.1
47
48
  Requires-Dist: marshmallow==4.0.1
48
- Requires-Dist: mysql-replication==1.0.9
49
+ Requires-Dist: mysql-replication==1.0.12
49
50
  Requires-Dist: opensearch-dsl==2.1.0
50
51
  Requires-Dist: opensearch-py==3.0.0
51
52
  Requires-Dist: packaging==25.0
52
53
  Requires-Dist: psycopg2-binary==2.9.11
53
54
  Requires-Dist: pymysql==1.1.2
54
55
  Requires-Dist: python-dateutil==2.9.0.post0
55
- Requires-Dist: python-dotenv==1.1.1
56
- Requires-Dist: redis==6.4.0
56
+ Requires-Dist: python-dotenv==1.2.1
57
+ Requires-Dist: redis==7.0.1
57
58
  Requires-Dist: requests==2.32.5
58
59
  Requires-Dist: requests-aws4auth==1.3.1
59
- Requires-Dist: s3transfer==0.14.0
60
+ Requires-Dist: s3transfer==0.15.0
60
61
  Requires-Dist: six==1.17.0
61
62
  Requires-Dist: sqlalchemy==2.0.44
62
63
  Requires-Dist: sqlparse==0.5.3
@@ -78,7 +79,7 @@ Dynamic: requires-dist
78
79
  Dynamic: requires-python
79
80
  Dynamic: summary
80
81
 
81
- # PostgreSQL to Elasticsearch/OpenSearch sync
82
+ # PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
82
83
 
83
84
 
84
85
  - [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
@@ -5,6 +5,7 @@
5
5
  [![Python versions](https://img.shields.io/pypi/pyversions/pgsync)](https://pypi.org/project/pgsync)
6
6
  [![Downloads](https://img.shields.io/pypi/dm/pgsync)](https://pypi.org/project/pgsync)
7
7
  [![codecov](https://codecov.io/gh/toluaina/pgsync/branch/main/graph/badge.svg?token=cvQzYkz6CV)](https://codecov.io/gh/toluaina/pgsync)
8
+ [![Sponsored by DigitalOcean](https://img.shields.io/badge/Sponsored%20by-DigitalOcean-0080FF?logo=digitalocean&logoColor=white)](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync)
8
9
 
9
10
 
10
11
  ## PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
@@ -43,6 +44,23 @@ Other benefits of PGSync include:
43
44
  - Scale on-demand (multiple consumers)
44
45
  - Easily join multiple nested tables
45
46
 
47
+ ## Sponsors
48
+
49
+ [PGSync](https://pgsync.com) is made possible with support from [DigitalOcean](https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync).
50
+
51
+ <p>
52
+ <a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=pgsync" rel="sponsored noopener noreferrer">
53
+ <img
54
+ src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg"
55
+ alt="DigitalOcean"
56
+ width="210"
57
+ loading="lazy"
58
+ decoding="async"
59
+ >
60
+ </a>
61
+ </p>
62
+
63
+
46
64
  #### Why?
47
65
 
48
66
  At a high level, you have data in a PostgreSQL/MySQL/MariaDB database and you want to mirror it in Elasticsearch/OpenSearch.
@@ -55,7 +73,7 @@ Of course, if your data never changed, then you could just take a snapshot in ti
55
73
  PGSync is appropriate for you if:
56
74
  - [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) is your read/write source of truth whilst [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) is your
57
75
  read-only search layer.
58
- - You need to denormalize relational data into a NoSQL data source.
76
+ - You need to denormalize relational data into a NoSQL data source like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/).
59
77
  - Your data is constantly changing.
60
78
  - You have existing data in a relational database such as [Postgres](https://www.postgresql.org) or [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/) and you need a secondary NoSQL database like [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) for text-based queries or autocomplete queries to mirror the existing data without having your application perform dual writes.
61
79
  - You want to keep your existing data untouched whilst taking advantage of
@@ -81,7 +99,8 @@ PGSync operates in an event-driven model by creating triggers for tables in your
81
99
 
82
100
  *This is the only time PGSync will ever make any changes to your database.*
83
101
 
84
- **NOTE**: **If you change the structure of your PGSync's schema config, you would need to rebuild your Elasticsearch/OpenSearch indices.**
102
+ >**NOTE**: **If you change the structure of your PGSync schema config, it's recommended and in most cases necessary to rebuild your Elasticsearch/OpenSearch indices.**
103
+
85
104
  There are plans to support zero-downtime migrations to streamline this process.
86
105
 
87
106
 
@@ -91,6 +110,14 @@ There are several ways of installing and trying PGSync
91
110
  - [Running in Docker](#running-in-docker) is the easiest way to get up and running.
92
111
  - [Manual configuration](#manual-configuration)
93
112
 
113
+ #### Book Demo Example (requires a DigitalOcean account)
114
+
115
+ [![Deploy to DO](https://www.deploytodo.com/do-btn-blue.svg)](https://cloud.digitalocean.com/apps/new?repo=https://github.com/toluaina/pgsync/tree/main)
116
+
117
+ Fill in the following during the setup
118
+ - `ELASTICSEARCH_URL` e.g. https://user:pass@os-host:443
119
+ - `REDIS_URL` e.g. rediss://default:pass@host:port/0
120
+
94
121
 
95
122
  ##### Running in Docker (Using Github Repository)
96
123
 
@@ -161,7 +188,6 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
161
188
  ### MySQL / MariaDB setup
162
189
 
163
190
  - Enable binary logging in your MySQL / MariaDB setting.
164
-
165
191
  - You also need to set up the following parameters in your MySQL / MariaDB config my.cnf, then restart the database server.
166
192
 
167
193
  ```server-id = 1``` # any non-zero unique ID
@@ -169,10 +195,8 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
169
195
  ```log_bin = mysql-bin```
170
196
 
171
197
  ```binlog_row_image = FULL``` # recommended; if not supported on older MariaDB, omit
172
-
173
198
  - optional housekeeping:
174
199
  ```binlog_expire_logs_seconds = 604800``` # 7 days
175
-
176
200
  - You need to create a replication user with REPLICATION SLAVE and REPLICATION CLIENT privileges
177
201
 
178
202
  ```sql
@@ -198,17 +222,19 @@ Environment variable placeholders - full list [here](https://pgsync.com/env-vars
198
222
 
199
223
  Key features of PGSync are:
200
224
 
201
- - Easily denormalize relational data.
202
- - Works with any PostgreSQL database (version 9.6 or later).
203
- - Negligible impact on database performance.
204
- - Transactionally consistent output in Elasticsearch/OpenSearch. This means: writes appear only when they are committed to the database, insert, update and delete operations appear in the same order as they were committed (as opposed to eventual consistency).
205
- - Fault-tolerant: does not lose data, even if processes crash or a network interruption occurs, etc. The process can be recovered from the last checkpoint.
206
- - Returns the data directly as Postgres/MySQL/MariaDB JSON from the database for speed.
207
- - Supports composite primary and foreign keys.
208
- - Supports Views and Materialized views.
209
- - Supports an arbitrary depth of nested entities i.e Tables having long chain of relationship dependencies.
210
- - Supports PostgreSQL/MySQL/MariaDB JSON data fields. This means: we can extract JSON fields in a database table as a separate field in the resulting document.
211
- - Customizable document structure.
225
+ - Easily denormalize relational data
226
+ - Works with any PostgreSQL database (9.6 or later)
227
+ - Negligible impact on database performance
228
+ - Transactionally consistent output in Elasticsearch/OpenSearch:
229
+ - Writes appear only after they’re committed
230
+ - Inserts, updates, and deletes appear in commit order (not eventually)
231
+ - Fault-tolerant: no data loss even on crashes or network issues; processing resumes from the last checkpoint
232
+ - Returns data directly as PostgreSQL/MySQL/MariaDB JSON for speed
233
+ - Supports composite primary and foreign keys
234
+ - Supports views and materialized views
235
+ - Handles arbitrarily deep nesting of related tables
236
+ - Supports PostgreSQL/MySQL/MariaDB JSON fields, allowing JSON properties to be extracted as separate document fields
237
+ - Customizable document structure
212
238
 
213
239
 
214
240
  #### Requirements
@@ -334,23 +360,26 @@ e.g
334
360
  }
335
361
  ```
336
362
 
337
- PGSync addresses the following challenges:
338
- - What if we update the author's name in the database?
339
- - What if we wanted to add another author for an existing book?
340
- - What if we have lots of documents already with the same author we wanted to change the author name?
341
- - What if we delete or update an author?
342
- - What if we truncate an entire table?
363
+ PGSync addresses common data consistency challenges, such as:
364
+
365
+ - Updating an author's name in the database
366
+ - Adding an additional author to an existing book
367
+ - Changing an author's name across many existing documents
368
+ - Deleting or updating an author record
369
+ - Truncating an entire table and keeping indexes in sync
343
370
 
344
371
 
345
372
  #### Benefits
346
373
 
347
- - PGSync is a simple to use out of the box solution for Change data capture.
348
- - PGSync handles data deletions.
349
- - PGSync requires little development effort. You simply define a schema config describing your data.
350
- - PGSync generates advanced queries matching your schema directly.
351
- - PGSync allows you to easily rebuild your indexes in case of a schema change.
352
- - You can expose only the data you require in Elasticsearch/OpenSearch.
353
- - Supports multiple Postgres/MySQL/MariaDB schemas for multi-tennant applications.
374
+ PGSync is a simple, out-of-the-box solution for change data capture, designed to minimize development effort and keep your search indexes in sync.
375
+
376
+ - Handles data deletions automatically.
377
+ - Requires minimal setup. Just define a schema config that describes your data.
378
+ - Generates advanced queries directly from your schema.
379
+ - Makes it easy to rebuild indexes after schema changes.
380
+ - Lets you expose only the data you need in Elasticsearch/OpenSearch.
381
+ - Supports multiple Postgres/MySQL/MariaDB schemas for multi-tenant applications.
382
+
354
383
 
355
384
 
356
385
  #### Contributing
@@ -1,4 +1,4 @@
1
- # PostgreSQL to Elasticsearch/OpenSearch sync
1
+ # PostgreSQL/MySQL/MariaDB to Elasticsearch/OpenSearch sync
2
2
 
3
3
 
4
4
  - [PGSync](https://pgsync.com) is a middleware for syncing data from [Postgres](https://www.postgresql.org) to [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/) or [OpenSearch](https://opensearch.org/).
@@ -26,7 +26,16 @@ logger = logging.getLogger(__name__)
26
26
  default=settings.SCHEMA,
27
27
  show_default=True,
28
28
  cls=MutuallyExclusiveOption,
29
- mutually_exclusive=["s3_schema_url"],
29
+ mutually_exclusive=["s3_schema_url", "schema_url"],
30
+ )
31
+ @click.option(
32
+ "--schema_url",
33
+ help="URL for schema config",
34
+ type=click.STRING,
35
+ default=settings.SCHEMA_URL,
36
+ show_default=True,
37
+ cls=MutuallyExclusiveOption,
38
+ mutually_exclusive=["config", "s3_schema_url"],
30
39
  )
31
40
  @click.option(
32
41
  "--s3_schema_url",
@@ -35,7 +44,7 @@ logger = logging.getLogger(__name__)
35
44
  default=settings.S3_SCHEMA_URL,
36
45
  show_default=True,
37
46
  cls=MutuallyExclusiveOption,
38
- mutually_exclusive=["config"],
47
+ mutually_exclusive=["config", "schema_url"],
39
48
  )
40
49
  @click.option("--host", "-h", help="PG_HOST override")
41
50
  @click.option("--password", is_flag=True, help="Prompt for database password")
@@ -67,6 +76,7 @@ logger = logging.getLogger(__name__)
67
76
  def main(
68
77
  teardown: bool,
69
78
  config: str,
79
+ schema_url: str,
70
80
  s3_schema_url: str,
71
81
  user: str,
72
82
  password: bool,
@@ -89,13 +99,24 @@ def main(
89
99
  )
90
100
  kwargs = {key: value for key, value in kwargs.items() if value is not None}
91
101
 
92
- validate_config(config=config, s3_schema_url=s3_schema_url)
102
+ validate_config(
103
+ config=config,
104
+ schema_url=schema_url,
105
+ s3_schema_url=s3_schema_url,
106
+ )
93
107
 
94
- show_settings(config=config, s3_schema_url=s3_schema_url)
108
+ show_settings(
109
+ config=config,
110
+ schema_url=schema_url,
111
+ s3_schema_url=s3_schema_url,
112
+ **kwargs,
113
+ )
95
114
 
96
115
  validate: bool = False if teardown else True
97
116
 
98
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
117
+ for doc in config_loader(
118
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
119
+ ):
99
120
  sync: Sync = Sync(
100
121
  doc,
101
122
  verbose=verbose,
@@ -53,7 +53,13 @@ from threading import Thread
53
53
  import click
54
54
  import sqlalchemy as sa
55
55
 
56
- from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
56
+ from pgsync.settings import (
57
+ BLOCK_SIZE,
58
+ CHECKPOINT_PATH,
59
+ S3_SCHEMA_URL,
60
+ SCHEMA,
61
+ SCHEMA_URL,
62
+ )
57
63
  from pgsync.sync import Sync
58
64
  from pgsync.utils import (
59
65
  config_loader,
@@ -386,7 +392,16 @@ def run_task(
386
392
  default=SCHEMA,
387
393
  show_default=True,
388
394
  cls=MutuallyExclusiveOption,
389
- mutually_exclusive=["s3_schema_url"],
395
+ mutually_exclusive=["s3_schema_url", "schema_url"],
396
+ )
397
+ @click.option(
398
+ "--schema_url",
399
+ help="URL for schema config",
400
+ type=click.STRING,
401
+ default=SCHEMA_URL,
402
+ show_default=True,
403
+ cls=MutuallyExclusiveOption,
404
+ mutually_exclusive=["config", "s3_schema_url"],
390
405
  )
391
406
  @click.option(
392
407
  "--s3_schema_url",
@@ -395,7 +410,7 @@ def run_task(
395
410
  default=S3_SCHEMA_URL,
396
411
  show_default=True,
397
412
  cls=MutuallyExclusiveOption,
398
- mutually_exclusive=["config"],
413
+ mutually_exclusive=["config", "schema_url"],
399
414
  )
400
415
  @click.option(
401
416
  "--verbose",
@@ -428,7 +443,12 @@ def run_task(
428
443
  default="multiprocess_async",
429
444
  )
430
445
  def main(
431
- config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
446
+ config: str,
447
+ schema_url: str,
448
+ s3_schema_url: str,
449
+ nprocs: int,
450
+ mode: str,
451
+ verbose: bool,
432
452
  ) -> None:
433
453
  """
434
454
  TODO:
@@ -436,11 +456,17 @@ def main(
436
456
  - Handle KeyboardInterrupt Exception
437
457
  """
438
458
 
439
- validate_config(config=config, s3_schema_url=s3_schema_url)
459
+ validate_config(
460
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
461
+ )
440
462
 
441
- show_settings(config=config, s3_schema_url=s3_schema_url)
463
+ show_settings(
464
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
465
+ )
442
466
 
443
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
467
+ for doc in config_loader(
468
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
469
+ ):
444
470
  tasks: t.Generator = fetch_tasks(doc)
445
471
  if mode == "synchronous":
446
472
  synchronous(tasks, doc, verbose=verbose)
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "5.0.0"
5
+ __version__ = "6.1.0"
@@ -8,8 +8,11 @@ import time
8
8
  import typing as t
9
9
  from contextlib import contextmanager
10
10
 
11
+ import psycopg2
11
12
  import sqlalchemy as sa
13
+ from psycopg2.extras import LogicalReplicationConnection
12
14
  from sqlalchemy.dialects import postgresql # noqa
15
+ from sqlalchemy.engine.url import make_url
13
16
  from sqlalchemy.orm import sessionmaker
14
17
 
15
18
  from .constants import (
@@ -31,6 +34,8 @@ from .exc import (
31
34
  )
32
35
  from .settings import (
33
36
  IS_MYSQL_COMPAT,
37
+ MYSQL_DATABASE,
38
+ PG_DATABASE,
34
39
  PG_HOST_RO,
35
40
  PG_PASSWORD_RO,
36
41
  PG_PORT_RO,
@@ -39,6 +44,12 @@ from .settings import (
39
44
  PG_URL_RO,
40
45
  PG_USER_RO,
41
46
  QUERY_CHUNK_SIZE,
47
+ SQLALCHEMY_MAX_OVERFLOW,
48
+ SQLALCHEMY_POOL_PRE_PING,
49
+ SQLALCHEMY_POOL_RECYCLE,
50
+ SQLALCHEMY_POOL_SIZE,
51
+ SQLALCHEMY_POOL_TIMEOUT,
52
+ SQLALCHEMY_USE_NULLPOOL,
42
53
  STREAM_RESULTS,
43
54
  )
44
55
  from .trigger import CREATE_TRIGGER_TEMPLATE
@@ -230,6 +241,7 @@ class Base(object):
230
241
  self.__columns: dict = {}
231
242
  self.verbose: bool = verbose
232
243
  self._conn = None
244
+ self._session = None
233
245
 
234
246
  def connect(self) -> None:
235
247
  """Connect to database."""
@@ -338,8 +350,19 @@ class Base(object):
338
350
 
339
351
  @property
340
352
  def session(self) -> sessionmaker:
341
- Session = sessionmaker(bind=self.engine.connect(), autoflush=True)
342
- return Session()
353
+ if self._session is None:
354
+ Session = sessionmaker(bind=self.engine, autoflush=True)
355
+ self._session = Session()
356
+ return self._session
357
+
358
+ def close_session(self) -> None:
359
+ """Close the cached session and reset it."""
360
+ if self._session is not None:
361
+ try:
362
+ self._session.close()
363
+ except Exception:
364
+ pass
365
+ self._session = None
343
366
 
344
367
  @property
345
368
  def engine(self) -> sa.engine.Engine:
@@ -726,6 +749,21 @@ class Base(object):
726
749
  )
727
750
  )[0]
728
751
 
752
+ def get_replication_connection(
753
+ self, engine: sa.engine.Engine
754
+ ) -> psycopg2.extensions.connection:
755
+ url: sa.engine.URL = make_url(str(engine.url))
756
+ # Build a libpq-style connection by keyword args
757
+ conn: psycopg2.extensions.connection = psycopg2.connect(
758
+ host=url.host,
759
+ port=url.port or 5432,
760
+ user=url.username,
761
+ password=url.password,
762
+ dbname=url.database,
763
+ connection_factory=LogicalReplicationConnection,
764
+ )
765
+ return conn
766
+
729
767
  def logical_slot_get_changes(
730
768
  self,
731
769
  slot_name: str,
@@ -822,6 +860,7 @@ class Base(object):
822
860
  schema: str,
823
861
  tables: t.Set,
824
862
  user_defined_fkey_tables: dict,
863
+ node_columns: dict,
825
864
  ) -> None:
826
865
  create_view(
827
866
  self.engine,
@@ -832,6 +871,7 @@ class Base(object):
832
871
  tables,
833
872
  user_defined_fkey_tables,
834
873
  self._materialized_views(schema),
874
+ node_columns,
835
875
  )
836
876
 
837
877
  def drop_view(self, schema: str) -> None:
@@ -946,9 +986,9 @@ class Base(object):
946
986
  """Check if the trigger function exists."""
947
987
  return self.exists(
948
988
  sa.text(
949
- f"SELECT 1 FROM pg_proc WHERE proname = :name "
950
- f"AND pronamespace = (SELECT oid FROM pg_namespace "
951
- f"WHERE nspname = :schema)"
989
+ "SELECT 1 FROM pg_proc WHERE proname = :name "
990
+ "AND pronamespace = (SELECT oid FROM pg_namespace "
991
+ "WHERE nspname = :schema)"
952
992
  ).bindparams(name=TRIGGER_FUNC, schema=schema),
953
993
  )
954
994
 
@@ -989,7 +1029,7 @@ class Base(object):
989
1029
  self.disable_trigger(schema, table)
990
1030
  logger.debug(f"Disabled trigger on table: {schema}.{table}")
991
1031
 
992
- def enable_trigger(self, schema: str, table, str) -> None:
1032
+ def enable_trigger(self, schema: str, table: str) -> None:
993
1033
  """Enable a pgsync defined trigger."""
994
1034
  for name in ("notify", "truncate"):
995
1035
  self.execute(
@@ -1107,20 +1147,21 @@ class Base(object):
1107
1147
  # including trailing space below is deliberate
1108
1148
  suffix: str = f"{row[span[1]:]} "
1109
1149
 
1110
- if "old-key" and "new-tuple" in suffix:
1150
+ if "old-key" in suffix and "new-tuple" in suffix:
1111
1151
  # this can only be an UPDATE operation
1112
1152
  if payload.tg_op != UPDATE:
1113
1153
  msg = f"Unknown {payload.tg_op} operation for row: {row}"
1114
1154
  raise LogicalSlotParseError(msg)
1115
1155
 
1116
- i: int = suffix.index("old-key:")
1156
+ i: int = suffix.find("old-key:")
1117
1157
  if i > -1:
1118
- j: int = suffix.index("new-tuple:")
1119
- s: str = suffix[i + len("old-key:") : j]
1120
- for key, value in _parse_logical_slot(s):
1121
- payload.old[key] = value
1158
+ j: int = suffix.find("new-tuple:")
1159
+ if j > -1:
1160
+ s: str = suffix[i + len("old-key:") : j]
1161
+ for key, value in _parse_logical_slot(s):
1162
+ payload.old[key] = value
1122
1163
 
1123
- i = suffix.index("new-tuple:")
1164
+ i = suffix.find("new-tuple:")
1124
1165
  if i > -1:
1125
1166
  s = suffix[i + len("new-tuple:") :]
1126
1167
  for key, value in _parse_logical_slot(s):
@@ -1336,7 +1377,28 @@ def _pg_engine(
1336
1377
  password=password,
1337
1378
  port=port,
1338
1379
  )
1339
- return sa.create_engine(url, echo=echo, connect_args=connect_args)
1380
+
1381
+ # Use NullPool for testing to avoid connection exhaustion
1382
+ if SQLALCHEMY_USE_NULLPOOL:
1383
+ from sqlalchemy.pool import NullPool
1384
+
1385
+ return sa.create_engine(
1386
+ url,
1387
+ echo=echo,
1388
+ connect_args=connect_args,
1389
+ poolclass=NullPool,
1390
+ )
1391
+
1392
+ return sa.create_engine(
1393
+ url,
1394
+ echo=echo,
1395
+ connect_args=connect_args,
1396
+ pool_size=SQLALCHEMY_POOL_SIZE,
1397
+ max_overflow=SQLALCHEMY_MAX_OVERFLOW,
1398
+ pool_pre_ping=SQLALCHEMY_POOL_PRE_PING,
1399
+ pool_recycle=SQLALCHEMY_POOL_RECYCLE,
1400
+ pool_timeout=SQLALCHEMY_POOL_TIMEOUT,
1401
+ )
1340
1402
 
1341
1403
 
1342
1404
  def pg_execute(
@@ -1367,7 +1429,7 @@ def create_database(database: str, echo: bool = False) -> None:
1367
1429
  """Create a database."""
1368
1430
  logger.debug(f"Creating database: {database}")
1369
1431
  with pg_engine(
1370
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1432
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
1371
1433
  echo=echo,
1372
1434
  ) as engine:
1373
1435
  pg_execute(
@@ -1382,8 +1444,7 @@ def drop_database(database: str, echo: bool = False) -> None:
1382
1444
  """Drop a database."""
1383
1445
  logger.debug(f"Dropping database: {database}")
1384
1446
  with pg_engine(
1385
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1386
- echo=echo,
1447
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE, echo=echo
1387
1448
  ) as engine:
1388
1449
  pg_execute(
1389
1450
  engine,
@@ -1396,7 +1457,7 @@ def drop_database(database: str, echo: bool = False) -> None:
1396
1457
  def database_exists(database: str, echo: bool = False) -> bool:
1397
1458
  """Check if database is present."""
1398
1459
  with pg_engine(
1399
- "information_schema" if IS_MYSQL_COMPAT else "postgres",
1460
+ MYSQL_DATABASE if IS_MYSQL_COMPAT else PG_DATABASE,
1400
1461
  echo=echo,
1401
1462
  ) as engine:
1402
1463
  with engine.connect() as conn:
@@ -135,6 +135,7 @@ ELASTICSEARCH_MAPPING_PARAMETERS = [
135
135
  "boost",
136
136
  "coerce",
137
137
  "copy_to",
138
+ "dimension",
138
139
  "doc_values",
139
140
  "dynamic",
140
141
  "eager_global_ordinals",
@@ -198,6 +199,7 @@ MATERIALIZED_VIEW_COLUMNS = [
198
199
  "indices",
199
200
  "primary_keys",
200
201
  "table_name",
202
+ "columns",
201
203
  ]
202
204
 
203
205
  # Primary key delimiter
@@ -20,6 +20,7 @@ def teardown(
20
20
  drop_index: bool = True,
21
21
  delete_checkpoint: bool = True,
22
22
  config: t.Optional[str] = None,
23
+ schema_url: t.Optional[str] = None,
23
24
  s3_schema_url: t.Optional[str] = None,
24
25
  validate: bool = False,
25
26
  ) -> None:
@@ -33,11 +34,17 @@ def teardown(
33
34
  drop_index (bool, optional): Whether to drop the index. Defaults to True.
34
35
  delete_checkpoint (bool, optional): Whether to delete the checkpoint. Defaults to True.
35
36
  config (Optional[str], optional): The configuration file path. Defaults to None.
37
+ schema_url (Optional[str], optional): The schema URL. Defaults to None.
38
+ s3_schema_url (Optional[str], optional): The S3 schema URL. Defaults to
36
39
  validate (bool, optional): Whether to validate the configuration. Defaults to False.
37
40
  """
38
- validate_config(config=config, s3_schema_url=s3_schema_url)
41
+ validate_config(
42
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
43
+ )
39
44
 
40
- for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
45
+ for doc in config_loader(
46
+ config=config, schema_url=schema_url, s3_schema_url=s3_schema_url
47
+ ):
41
48
  if not database_exists(doc["database"]):
42
49
  logger.warning(f'Database {doc["database"]} does not exist')
43
50
  continue
@@ -56,7 +63,7 @@ def teardown(
56
63
  drop_database(sync.database)
57
64
  if drop_index:
58
65
  sync.search_client.teardown(sync.index)
59
- if delete_redis:
66
+ if delete_redis and sync.redis is not None:
60
67
  sync.redis.delete()
61
68
  if delete_checkpoint:
62
69
  try:
@@ -294,7 +294,7 @@ class Node(object):
294
294
  self.columns.append(self.model.c[column_name])
295
295
 
296
296
  @property
297
- def primary_keys(self):
297
+ def primary_keys(self) -> t.List[sa.sql.ColumnElement]:
298
298
  return [
299
299
  self.model.c[str(sa.text(primary_key))]
300
300
  for primary_key in self.model.primary_keys
@@ -311,7 +311,7 @@ class Node(object):
311
311
 
312
312
  def add_child(self, node: Node) -> None:
313
313
  """All nodes except the root node must have a relationship defined."""
314
- node.parent: Node = self
314
+ node.parent = self
315
315
  if not node.is_root and (
316
316
  not node.relationship.type or not node.relationship.variant
317
317
  ):
@@ -388,7 +388,9 @@ class Tree(threading.local):
388
388
  raise TableNotInNodeError(f"Table not specified in node: {nodes}")
389
389
 
390
390
  if not set(nodes.keys()).issubset(set(NODE_ATTRIBUTES)):
391
- attrs = set(nodes.keys()).difference(set(NODE_ATTRIBUTES))
391
+ attrs: t.Set[str] = set(nodes.keys()).difference(
392
+ set(NODE_ATTRIBUTES)
393
+ )
392
394
  raise NodeAttributeError(f"Unknown node attribute(s): {attrs}")
393
395
 
394
396
  node: Node = Node(