pgsync 4.0.0__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {pgsync-4.0.0 → pgsync-4.2.0}/PKG-INFO +22 -21
  2. {pgsync-4.0.0 → pgsync-4.2.0}/README.md +42 -4
  3. {pgsync-4.0.0 → pgsync-4.2.0}/README.rst +1 -1
  4. {pgsync-4.0.0 → pgsync-4.2.0}/bin/bootstrap +39 -8
  5. {pgsync-4.0.0 → pgsync-4.2.0}/bin/parallel_sync +28 -7
  6. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/__init__.py +1 -1
  7. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/base.py +304 -90
  8. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/helper.py +4 -3
  9. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/redisqueue.py +44 -1
  10. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/settings.py +43 -3
  11. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/sync.py +409 -258
  12. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/urls.py +34 -15
  13. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/utils.py +305 -246
  14. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/view.py +6 -4
  15. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync.egg-info/PKG-INFO +22 -21
  16. pgsync-4.2.0/pgsync.egg-info/requires.txt +30 -0
  17. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_helper.py +19 -10
  18. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_redisqueue.py +63 -0
  19. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_sync.py +204 -74
  20. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_urls.py +16 -10
  21. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_utils.py +39 -28
  22. pgsync-4.0.0/pgsync.egg-info/requires.txt +0 -30
  23. {pgsync-4.0.0 → pgsync-4.2.0}/AUTHORS.rst +0 -0
  24. {pgsync-4.0.0 → pgsync-4.2.0}/CONTRIBUTING.rst +0 -0
  25. {pgsync-4.0.0 → pgsync-4.2.0}/HISTORY.rst +0 -0
  26. {pgsync-4.0.0 → pgsync-4.2.0}/LICENSE +0 -0
  27. {pgsync-4.0.0 → pgsync-4.2.0}/MANIFEST.in +0 -0
  28. {pgsync-4.0.0 → pgsync-4.2.0}/bin/pgsync +0 -0
  29. {pgsync-4.0.0 → pgsync-4.2.0}/docs/Makefile +0 -0
  30. {pgsync-4.0.0 → pgsync-4.2.0}/docs/authors.rst +0 -0
  31. {pgsync-4.0.0 → pgsync-4.2.0}/docs/changelog.rst +0 -0
  32. {pgsync-4.0.0 → pgsync-4.2.0}/docs/conf.py +0 -0
  33. {pgsync-4.0.0 → pgsync-4.2.0}/docs/contributing.rst +0 -0
  34. {pgsync-4.0.0 → pgsync-4.2.0}/docs/history.rst +0 -0
  35. {pgsync-4.0.0 → pgsync-4.2.0}/docs/index.rst +0 -0
  36. {pgsync-4.0.0 → pgsync-4.2.0}/docs/installation.rst +0 -0
  37. {pgsync-4.0.0 → pgsync-4.2.0}/docs/logo.png +0 -0
  38. {pgsync-4.0.0 → pgsync-4.2.0}/docs/make.bat +0 -0
  39. {pgsync-4.0.0 → pgsync-4.2.0}/docs/readme.rst +0 -0
  40. {pgsync-4.0.0 → pgsync-4.2.0}/docs/usage.rst +0 -0
  41. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/constants.py +0 -0
  42. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/exc.py +0 -0
  43. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/node.py +0 -0
  44. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/plugin.py +0 -0
  45. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/querybuilder.py +0 -0
  46. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/search_client.py +0 -0
  47. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/singleton.py +0 -0
  48. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/transform.py +0 -0
  49. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync/trigger.py +0 -0
  50. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync.egg-info/SOURCES.txt +0 -0
  51. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync.egg-info/dependency_links.txt +0 -0
  52. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync.egg-info/not-zip-safe +0 -0
  53. {pgsync-4.0.0 → pgsync-4.2.0}/pgsync.egg-info/top_level.txt +0 -0
  54. {pgsync-4.0.0 → pgsync-4.2.0}/pyproject.toml +0 -0
  55. {pgsync-4.0.0 → pgsync-4.2.0}/setup.cfg +0 -0
  56. {pgsync-4.0.0 → pgsync-4.2.0}/setup.py +0 -0
  57. {pgsync-4.0.0 → pgsync-4.2.0}/tests/__init__.py +0 -0
  58. {pgsync-4.0.0 → pgsync-4.2.0}/tests/conftest.py +0 -0
  59. {pgsync-4.0.0 → pgsync-4.2.0}/tests/fixtures/schema.json +0 -0
  60. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_base.py +0 -0
  61. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_constants.py +0 -0
  62. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_env_vars.py +0 -0
  63. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_log_handlers.py +0 -0
  64. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_node.py +0 -0
  65. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_query_builder.py +0 -0
  66. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_search_client.py +0 -0
  67. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_settings.py +0 -0
  68. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_sync_nested_children.py +0 -0
  69. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_sync_root.py +0 -0
  70. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
  71. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
  72. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_transform.py +0 -0
  73. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_trigger.py +0 -0
  74. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_unique_behaviour.py +0 -0
  75. {pgsync-4.0.0 → pgsync-4.2.0}/tests/test_view.py +0 -0
  76. {pgsync-4.0.0 → pgsync-4.2.0}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 4.0.0
3
+ Version: 4.2.0
4
4
  Summary: Postgres to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -31,34 +31,34 @@ Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
32
  License-File: AUTHORS.rst
33
33
  Requires-Dist: async-timeout==5.0.1
34
- Requires-Dist: boto3==1.37.11
35
- Requires-Dist: botocore==1.37.11
36
- Requires-Dist: certifi==2025.1.31
37
- Requires-Dist: charset-normalizer==3.4.1
34
+ Requires-Dist: backports-datetime-fromisoformat==2.0.3
35
+ Requires-Dist: boto3==1.40.1
36
+ Requires-Dist: botocore==1.40.1
37
+ Requires-Dist: certifi==2025.8.3
38
+ Requires-Dist: charset-normalizer==3.4.2
38
39
  Requires-Dist: click==8.1.8
39
- Requires-Dist: elastic-transport==8.17.0
40
- Requires-Dist: elasticsearch==8.17.2
41
- Requires-Dist: elasticsearch-dsl==8.17.1
42
- Requires-Dist: environs==14.1.1
40
+ Requires-Dist: elastic-transport==8.17.1
41
+ Requires-Dist: elasticsearch==8.19.0
42
+ Requires-Dist: elasticsearch-dsl==8.15.4
43
+ Requires-Dist: environs==14.3.0
43
44
  Requires-Dist: events==0.5
44
- Requires-Dist: greenlet==3.1.1
45
+ Requires-Dist: greenlet==3.2.3
45
46
  Requires-Dist: idna==3.10
46
47
  Requires-Dist: jmespath==1.0.1
47
- Requires-Dist: marshmallow==3.26.1
48
+ Requires-Dist: marshmallow==4.0.0
48
49
  Requires-Dist: opensearch-dsl==2.1.0
49
- Requires-Dist: opensearch-py==2.8.0
50
- Requires-Dist: packaging==24.2
50
+ Requires-Dist: opensearch-py==3.0.0
51
51
  Requires-Dist: psycopg2-binary==2.9.10
52
52
  Requires-Dist: python-dateutil==2.9.0.post0
53
- Requires-Dist: python-dotenv==1.0.1
54
- Requires-Dist: redis==5.2.1
55
- Requires-Dist: requests==2.32.3
53
+ Requires-Dist: python-dotenv==1.1.1
54
+ Requires-Dist: redis==6.2.0
55
+ Requires-Dist: requests==2.32.4
56
56
  Requires-Dist: requests-aws4auth==1.3.1
57
- Requires-Dist: s3transfer==0.11.4
57
+ Requires-Dist: s3transfer==0.13.1
58
58
  Requires-Dist: six==1.17.0
59
- Requires-Dist: sqlalchemy==2.0.39
59
+ Requires-Dist: sqlalchemy==2.0.42
60
60
  Requires-Dist: sqlparse==0.5.3
61
- Requires-Dist: typing-extensions==4.12.2
61
+ Requires-Dist: typing-extensions==4.14.1
62
62
  Requires-Dist: urllib3==1.26.20
63
63
  Dynamic: author
64
64
  Dynamic: author-email
@@ -68,6 +68,7 @@ Dynamic: description-content-type
68
68
  Dynamic: home-page
69
69
  Dynamic: keywords
70
70
  Dynamic: license
71
+ Dynamic: license-file
71
72
  Dynamic: maintainer
72
73
  Dynamic: maintainer-email
73
74
  Dynamic: project-url
@@ -89,7 +90,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
89
90
  - [Postgres](https://www.postgresql.org) 9.6+
90
91
  - [Redis](https://redis.io) 3.1.0+
91
92
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
92
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
93
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
93
94
 
94
95
  ### Postgres setup
95
96
 
@@ -40,7 +40,7 @@ of engineering and development.
40
40
  Other benefits of PGSync include:
41
41
  - Real-time analytics
42
42
  - Reliable primary datastore/source of truth
43
- - Scale on-demand
43
+ - Scale on-demand (multiple consumers)
44
44
  - Easily join multiple nested tables
45
45
 
46
46
  #### Why?
@@ -66,7 +66,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
66
66
 
67
67
  #### How it works
68
68
 
69
- PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQlAlchemy](https://www.sqlalchemy.org).
69
+ PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
70
70
 
71
71
  PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
72
72
  This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
@@ -93,9 +93,14 @@ There are several ways of installing and trying PGSync
93
93
  - [Manual configuration](#manual-configuration)
94
94
 
95
95
 
96
- ##### Running in Docker
96
+ ##### Running in Docker (Using Github Repository)
97
97
 
98
98
  To startup all services with docker.
99
+
100
+ ```
101
+ $ git checkout https://github.com/toluaina/pgsync
102
+ ```
103
+
99
104
  Run:
100
105
  ```
101
106
  $ docker-compose up
@@ -106,6 +111,39 @@ Show the content in Elasticsearch/OpenSearch
106
111
  $ curl -X GET http://[Elasticsearch/OpenSearch host]:9201/reservations/_search?pretty=true
107
112
  ```
108
113
 
114
+
115
+ ##### Running with Docker (Using Image Repository)
116
+
117
+ To start all services with Docker, follow these steps:
118
+
119
+ 1. Pull the Docker image:
120
+
121
+ ```
122
+ $ docker pull toluaina1/pgsync:latest
123
+ ```
124
+
125
+ 2. Run the container:
126
+
127
+ ```
128
+ $ docker run --rm -it \
129
+ -e REDIS_CHECKPOINT=true \
130
+ -e REDIS_HOST=<redis_host_address> \
131
+ -e PG_URL=postgres://<username>:<password>@<postgres_host>/<database> \
132
+ -e ELASTICSEARCH_URL=http://<elasticsearch_host>:9200 \
133
+ -v "$(pwd)/schema.json:/app/schema.json" \
134
+ toluaina1/pgsync:latest -c schema.json -d -b
135
+ ```
136
+
137
+ Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
138
+
139
+ - redis_host_address — Address of the Redis server (e.g., host.docker.internal for local Docker setup)
140
+ - username — PostgreSQL username
141
+ - password — PostgreSQL password
142
+ - postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
143
+ - database — Name of PostgreSQL database
144
+ - elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
145
+
146
+
109
147
  ##### Manual configuration
110
148
 
111
149
  - Setup
@@ -156,7 +194,7 @@ Key features of PGSync are:
156
194
  - [Postgres](https://www.postgresql.org) 9.6+
157
195
  - [Redis](https://redis.io) 3.1.0+
158
196
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
159
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
197
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
160
198
 
161
199
 
162
200
  #### Example
@@ -12,7 +12,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
12
12
  - [Postgres](https://www.postgresql.org) 9.6+
13
13
  - [Redis](https://redis.io) 3.1.0+
14
14
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
15
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
15
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
16
16
 
17
17
  ### Postgres setup
18
18
 
@@ -5,8 +5,14 @@ import logging
5
5
 
6
6
  import click
7
7
 
8
+ from pgsync import settings
8
9
  from pgsync.sync import Sync
9
- from pgsync.utils import config_loader, get_config, show_settings
10
+ from pgsync.utils import (
11
+ config_loader,
12
+ MutuallyExclusiveOption,
13
+ show_settings,
14
+ validate_config,
15
+ )
10
16
 
11
17
  logger = logging.getLogger(__name__)
12
18
 
@@ -17,17 +23,40 @@ logger = logging.getLogger(__name__)
17
23
  "-c",
18
24
  help="Schema config",
19
25
  type=click.Path(exists=True),
26
+ default=settings.SCHEMA,
27
+ show_default=True,
28
+ cls=MutuallyExclusiveOption,
29
+ mutually_exclusive=["s3_schema_url"],
30
+ )
31
+ @click.option(
32
+ "--s3_schema_url",
33
+ help="S3 URL for schema config",
34
+ type=click.STRING,
35
+ default=settings.S3_SCHEMA_URL,
36
+ show_default=True,
37
+ cls=MutuallyExclusiveOption,
38
+ mutually_exclusive=["config"],
20
39
  )
21
- @click.option("--host", "-h", help="PG_HOST overide")
40
+ @click.option("--host", "-h", help="PG_HOST override")
22
41
  @click.option("--password", is_flag=True, help="Prompt for database password")
23
- @click.option("--port", "-p", help="PG_PORT overide", type=int)
42
+ @click.option("--port", "-p", help="PG_PORT override", type=int)
24
43
  @click.option(
25
44
  "--teardown",
26
45
  "-t",
27
46
  is_flag=True,
28
47
  help="Teardown database triggers and replication slots",
29
48
  )
30
- @click.option("--user", "-u", help="PG_USER overide")
49
+ @click.option(
50
+ "--no-create",
51
+ "-nc",
52
+ is_flag=True,
53
+ help=(
54
+ "Skip DDL statement for objects "
55
+ "(Functions, Views & Replication slots) creation"
56
+ ),
57
+ default=False,
58
+ )
59
+ @click.option("--user", "-u", help="PG_USER override")
31
60
  @click.option(
32
61
  "--verbose",
33
62
  "-v",
@@ -38,11 +67,13 @@ logger = logging.getLogger(__name__)
38
67
  def main(
39
68
  teardown: bool,
40
69
  config: str,
70
+ s3_schema_url: str,
41
71
  user: str,
42
72
  password: bool,
43
73
  host: str,
44
74
  port: int,
45
75
  verbose: bool,
76
+ no_create: bool = False,
46
77
  ) -> None:
47
78
  """Application onetime Bootstrap."""
48
79
  kwargs: dict = {
@@ -58,13 +89,13 @@ def main(
58
89
  )
59
90
  kwargs = {key: value for key, value in kwargs.items() if value is not None}
60
91
 
61
- config: str = get_config(config)
92
+ validate_config(config=config, s3_schema_url=s3_schema_url)
62
93
 
63
- show_settings(config)
94
+ show_settings(config=config, s3_schema_url=s3_schema_url)
64
95
 
65
96
  validate: bool = False if teardown else True
66
97
 
67
- for doc in config_loader(config):
98
+ for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
68
99
  sync: Sync = Sync(
69
100
  doc,
70
101
  verbose=verbose,
@@ -75,7 +106,7 @@ def main(
75
106
  if teardown:
76
107
  sync.teardown()
77
108
  continue
78
- sync.setup()
109
+ sync.setup(no_create=no_create)
79
110
  logger.info(f"Bootstrap: {sync.database}")
80
111
 
81
112
 
@@ -39,7 +39,6 @@ filtered based on the page number and row numbers.
39
39
  This systematic and parallel approach optimizes the synchronization process,
40
40
  especially in environments challenged by network latency.
41
41
  """
42
-
43
42
  import asyncio
44
43
  import multiprocessing
45
44
  import os
@@ -54,9 +53,15 @@ from threading import Thread
54
53
  import click
55
54
  import sqlalchemy as sa
56
55
 
57
- from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
56
+ from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
58
57
  from pgsync.sync import Sync
59
- from pgsync.utils import config_loader, get_config, show_settings, timeit
58
+ from pgsync.utils import (
59
+ config_loader,
60
+ MutuallyExclusiveOption,
61
+ show_settings,
62
+ timeit,
63
+ validate_config,
64
+ )
60
65
 
61
66
 
62
67
  def save_ctid(page: int, row: int, filename: str) -> None:
@@ -378,6 +383,19 @@ def run_task(
378
383
  "-c",
379
384
  help="Schema config",
380
385
  type=click.Path(exists=True),
386
+ default=SCHEMA,
387
+ show_default=True,
388
+ cls=MutuallyExclusiveOption,
389
+ mutually_exclusive=["s3_schema_url"],
390
+ )
391
+ @click.option(
392
+ "--s3_schema_url",
393
+ help="S3 URL for schema config",
394
+ type=click.STRING,
395
+ default=S3_SCHEMA_URL,
396
+ show_default=True,
397
+ cls=MutuallyExclusiveOption,
398
+ mutually_exclusive=["config"],
381
399
  )
382
400
  @click.option(
383
401
  "--verbose",
@@ -409,17 +427,20 @@ def run_task(
409
427
  ),
410
428
  default="multiprocess_async",
411
429
  )
412
- def main(config: str, nprocs: int, mode: str, verbose: bool) -> None:
430
+ def main(
431
+ config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
432
+ ) -> None:
413
433
  """
414
434
  TODO:
415
435
  - Track progress across cpus/threads
416
436
  - Handle KeyboardInterrupt Exception
417
437
  """
418
438
 
419
- show_settings()
420
- config: str = get_config(config)
439
+ validate_config(config=config, s3_schema_url=s3_schema_url)
440
+
441
+ show_settings(config=config, s3_schema_url=s3_schema_url)
421
442
 
422
- for doc in config_loader(config):
443
+ for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
423
444
  tasks: t.Generator = fetch_tasks(doc)
424
445
  if mode == "synchronous":
425
446
  synchronous(tasks, doc, verbose=verbose)
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "4.0.0"
5
+ __version__ = "4.2.0"