pgsync 4.1.0__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pgsync-4.1.0 → pgsync-4.2.0}/PKG-INFO +11 -11
  2. {pgsync-4.1.0 → pgsync-4.2.0}/README.md +42 -4
  3. {pgsync-4.1.0 → pgsync-4.2.0}/README.rst +1 -1
  4. {pgsync-4.1.0 → pgsync-4.2.0}/bin/bootstrap +24 -4
  5. {pgsync-4.1.0 → pgsync-4.2.0}/bin/parallel_sync +28 -7
  6. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/__init__.py +1 -1
  7. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/base.py +70 -8
  8. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/helper.py +4 -3
  9. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/redisqueue.py +28 -0
  10. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/settings.py +19 -1
  11. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/sync.py +85 -11
  12. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/utils.py +76 -35
  13. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/PKG-INFO +11 -11
  14. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/requires.txt +9 -9
  15. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_helper.py +19 -10
  16. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_redisqueue.py +63 -0
  17. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_urls.py +16 -10
  18. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_utils.py +36 -11
  19. {pgsync-4.1.0 → pgsync-4.2.0}/AUTHORS.rst +0 -0
  20. {pgsync-4.1.0 → pgsync-4.2.0}/CONTRIBUTING.rst +0 -0
  21. {pgsync-4.1.0 → pgsync-4.2.0}/HISTORY.rst +0 -0
  22. {pgsync-4.1.0 → pgsync-4.2.0}/LICENSE +0 -0
  23. {pgsync-4.1.0 → pgsync-4.2.0}/MANIFEST.in +0 -0
  24. {pgsync-4.1.0 → pgsync-4.2.0}/bin/pgsync +0 -0
  25. {pgsync-4.1.0 → pgsync-4.2.0}/docs/Makefile +0 -0
  26. {pgsync-4.1.0 → pgsync-4.2.0}/docs/authors.rst +0 -0
  27. {pgsync-4.1.0 → pgsync-4.2.0}/docs/changelog.rst +0 -0
  28. {pgsync-4.1.0 → pgsync-4.2.0}/docs/conf.py +0 -0
  29. {pgsync-4.1.0 → pgsync-4.2.0}/docs/contributing.rst +0 -0
  30. {pgsync-4.1.0 → pgsync-4.2.0}/docs/history.rst +0 -0
  31. {pgsync-4.1.0 → pgsync-4.2.0}/docs/index.rst +0 -0
  32. {pgsync-4.1.0 → pgsync-4.2.0}/docs/installation.rst +0 -0
  33. {pgsync-4.1.0 → pgsync-4.2.0}/docs/logo.png +0 -0
  34. {pgsync-4.1.0 → pgsync-4.2.0}/docs/make.bat +0 -0
  35. {pgsync-4.1.0 → pgsync-4.2.0}/docs/readme.rst +0 -0
  36. {pgsync-4.1.0 → pgsync-4.2.0}/docs/usage.rst +0 -0
  37. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/constants.py +0 -0
  38. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/exc.py +0 -0
  39. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/node.py +0 -0
  40. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/plugin.py +0 -0
  41. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/querybuilder.py +0 -0
  42. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/search_client.py +0 -0
  43. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/singleton.py +0 -0
  44. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/transform.py +0 -0
  45. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/trigger.py +0 -0
  46. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/urls.py +0 -0
  47. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync/view.py +0 -0
  48. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/SOURCES.txt +0 -0
  49. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/dependency_links.txt +0 -0
  50. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/not-zip-safe +0 -0
  51. {pgsync-4.1.0 → pgsync-4.2.0}/pgsync.egg-info/top_level.txt +0 -0
  52. {pgsync-4.1.0 → pgsync-4.2.0}/pyproject.toml +0 -0
  53. {pgsync-4.1.0 → pgsync-4.2.0}/setup.cfg +0 -0
  54. {pgsync-4.1.0 → pgsync-4.2.0}/setup.py +0 -0
  55. {pgsync-4.1.0 → pgsync-4.2.0}/tests/__init__.py +0 -0
  56. {pgsync-4.1.0 → pgsync-4.2.0}/tests/conftest.py +0 -0
  57. {pgsync-4.1.0 → pgsync-4.2.0}/tests/fixtures/schema.json +0 -0
  58. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_base.py +0 -0
  59. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_constants.py +0 -0
  60. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_env_vars.py +0 -0
  61. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_log_handlers.py +0 -0
  62. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_node.py +0 -0
  63. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_query_builder.py +0 -0
  64. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_search_client.py +0 -0
  65. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_settings.py +0 -0
  66. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync.py +0 -0
  67. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_nested_children.py +0 -0
  68. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_root.py +0 -0
  69. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_child.py +0 -0
  70. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_sync_single_child_fk_on_parent.py +0 -0
  71. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_transform.py +0 -0
  72. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_trigger.py +0 -0
  73. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_unique_behaviour.py +0 -0
  74. {pgsync-4.1.0 → pgsync-4.2.0}/tests/test_view.py +0 -0
  75. {pgsync-4.1.0 → pgsync-4.2.0}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgsync
3
- Version: 4.1.0
3
+ Version: 4.2.0
4
4
  Summary: Postgres to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -32,15 +32,15 @@ License-File: LICENSE
32
32
  License-File: AUTHORS.rst
33
33
  Requires-Dist: async-timeout==5.0.1
34
34
  Requires-Dist: backports-datetime-fromisoformat==2.0.3
35
- Requires-Dist: boto3==1.38.44
36
- Requires-Dist: botocore==1.38.44
37
- Requires-Dist: certifi==2025.6.15
35
+ Requires-Dist: boto3==1.40.1
36
+ Requires-Dist: botocore==1.40.1
37
+ Requires-Dist: certifi==2025.8.3
38
38
  Requires-Dist: charset-normalizer==3.4.2
39
39
  Requires-Dist: click==8.1.8
40
40
  Requires-Dist: elastic-transport==8.17.1
41
- Requires-Dist: elasticsearch==8.18.1
42
- Requires-Dist: elasticsearch-dsl==8.18.0
43
- Requires-Dist: environs==14.2.0
41
+ Requires-Dist: elasticsearch==8.19.0
42
+ Requires-Dist: elasticsearch-dsl==8.15.4
43
+ Requires-Dist: environs==14.3.0
44
44
  Requires-Dist: events==0.5
45
45
  Requires-Dist: greenlet==3.2.3
46
46
  Requires-Dist: idna==3.10
@@ -54,11 +54,11 @@ Requires-Dist: python-dotenv==1.1.1
54
54
  Requires-Dist: redis==6.2.0
55
55
  Requires-Dist: requests==2.32.4
56
56
  Requires-Dist: requests-aws4auth==1.3.1
57
- Requires-Dist: s3transfer==0.13.0
57
+ Requires-Dist: s3transfer==0.13.1
58
58
  Requires-Dist: six==1.17.0
59
- Requires-Dist: sqlalchemy==2.0.41
59
+ Requires-Dist: sqlalchemy==2.0.42
60
60
  Requires-Dist: sqlparse==0.5.3
61
- Requires-Dist: typing-extensions==4.14.0
61
+ Requires-Dist: typing-extensions==4.14.1
62
62
  Requires-Dist: urllib3==1.26.20
63
63
  Dynamic: author
64
64
  Dynamic: author-email
@@ -90,7 +90,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
90
90
  - [Postgres](https://www.postgresql.org) 9.6+
91
91
  - [Redis](https://redis.io) 3.1.0+
92
92
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
93
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
93
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
94
94
 
95
95
  ### Postgres setup
96
96
 
@@ -40,7 +40,7 @@ of engineering and development.
40
40
  Other benefits of PGSync include:
41
41
  - Real-time analytics
42
42
  - Reliable primary datastore/source of truth
43
- - Scale on-demand
43
+ - Scale on-demand (multiple consumers)
44
44
  - Easily join multiple nested tables
45
45
 
46
46
  #### Why?
@@ -66,7 +66,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
66
66
 
67
67
  #### How it works
68
68
 
69
- PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQlAlchemy](https://www.sqlalchemy.org).
69
+ PGSync is written in Python (supporting version 3.9 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQLAlchemy](https://www.sqlalchemy.org).
70
70
 
71
71
  PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
72
72
  This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
@@ -93,9 +93,14 @@ There are several ways of installing and trying PGSync
93
93
  - [Manual configuration](#manual-configuration)
94
94
 
95
95
 
96
- ##### Running in Docker
96
+ ##### Running in Docker (Using Github Repository)
97
97
 
98
98
  To startup all services with docker.
99
+
100
+ ```
101
+ $ git checkout https://github.com/toluaina/pgsync
102
+ ```
103
+
99
104
  Run:
100
105
  ```
101
106
  $ docker-compose up
@@ -106,6 +111,39 @@ Show the content in Elasticsearch/OpenSearch
106
111
  $ curl -X GET http://[Elasticsearch/OpenSearch host]:9201/reservations/_search?pretty=true
107
112
  ```
108
113
 
114
+
115
+ ##### Running with Docker (Using Image Repository)
116
+
117
+ To start all services with Docker, follow these steps:
118
+
119
+ 1. Pull the Docker image:
120
+
121
+ ```
122
+ $ docker pull toluaina1/pgsync:latest
123
+ ```
124
+
125
+ 2. Run the container:
126
+
127
+ ```
128
+ $ docker run --rm -it \
129
+ -e REDIS_CHECKPOINT=true \
130
+ -e REDIS_HOST=<redis_host_address> \
131
+ -e PG_URL=postgres://<username>:<password>@<postgres_host>/<database> \
132
+ -e ELASTICSEARCH_URL=http://<elasticsearch_host>:9200 \
133
+ -v "$(pwd)/schema.json:/app/schema.json" \
134
+ toluaina1/pgsync:latest -c schema.json -d -b
135
+ ```
136
+
137
+ Environment variable placeholders - full list [here](https://pgsync.com/env-vars):
138
+
139
+ - redis_host_address — Address of the Redis server (e.g., host.docker.internal for local Docker setup)
140
+ - username — PostgreSQL username
141
+ - password — PostgreSQL password
142
+ - postgres_host — Host address for PostgreSQL instance (e.g., host.docker.internal)
143
+ - database — Name of PostgreSQL database
144
+ - elasticsearch_host — Address of Elasticsearch/OpenSearch instance (e.g., host.docker.internal)
145
+
146
+
109
147
  ##### Manual configuration
110
148
 
111
149
  - Setup
@@ -156,7 +194,7 @@ Key features of PGSync are:
156
194
  - [Postgres](https://www.postgresql.org) 9.6+
157
195
  - [Redis](https://redis.io) 3.1.0+
158
196
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
159
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
197
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
160
198
 
161
199
 
162
200
  #### Example
@@ -12,7 +12,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
12
12
  - [Postgres](https://www.postgresql.org) 9.6+
13
13
  - [Redis](https://redis.io) 3.1.0+
14
14
  - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
15
- - [SQlAlchemy](https://www.sqlalchemy.org) 1.3.4+
15
+ - [SQLAlchemy](https://www.sqlalchemy.org) 1.3.4+
16
16
 
17
17
  ### Postgres setup
18
18
 
@@ -5,8 +5,14 @@ import logging
5
5
 
6
6
  import click
7
7
 
8
+ from pgsync import settings
8
9
  from pgsync.sync import Sync
9
- from pgsync.utils import config_loader, get_config, show_settings
10
+ from pgsync.utils import (
11
+ config_loader,
12
+ MutuallyExclusiveOption,
13
+ show_settings,
14
+ validate_config,
15
+ )
10
16
 
11
17
  logger = logging.getLogger(__name__)
12
18
 
@@ -17,6 +23,19 @@ logger = logging.getLogger(__name__)
17
23
  "-c",
18
24
  help="Schema config",
19
25
  type=click.Path(exists=True),
26
+ default=settings.SCHEMA,
27
+ show_default=True,
28
+ cls=MutuallyExclusiveOption,
29
+ mutually_exclusive=["s3_schema_url"],
30
+ )
31
+ @click.option(
32
+ "--s3_schema_url",
33
+ help="S3 URL for schema config",
34
+ type=click.STRING,
35
+ default=settings.S3_SCHEMA_URL,
36
+ show_default=True,
37
+ cls=MutuallyExclusiveOption,
38
+ mutually_exclusive=["config"],
20
39
  )
21
40
  @click.option("--host", "-h", help="PG_HOST override")
22
41
  @click.option("--password", is_flag=True, help="Prompt for database password")
@@ -48,6 +67,7 @@ logger = logging.getLogger(__name__)
48
67
  def main(
49
68
  teardown: bool,
50
69
  config: str,
70
+ s3_schema_url: str,
51
71
  user: str,
52
72
  password: bool,
53
73
  host: str,
@@ -69,13 +89,13 @@ def main(
69
89
  )
70
90
  kwargs = {key: value for key, value in kwargs.items() if value is not None}
71
91
 
72
- config: str = get_config(config)
92
+ validate_config(config=config, s3_schema_url=s3_schema_url)
73
93
 
74
- show_settings(config)
94
+ show_settings(config=config, s3_schema_url=s3_schema_url)
75
95
 
76
96
  validate: bool = False if teardown else True
77
97
 
78
- for doc in config_loader(config):
98
+ for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
79
99
  sync: Sync = Sync(
80
100
  doc,
81
101
  verbose=verbose,
@@ -47,16 +47,21 @@ import sys
47
47
  import typing as t
48
48
  from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
49
49
  from dataclasses import dataclass
50
- from pathlib import Path
51
50
  from queue import Queue
52
51
  from threading import Thread
53
52
 
54
53
  import click
55
54
  import sqlalchemy as sa
56
55
 
57
- from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
56
+ from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH, S3_SCHEMA_URL, SCHEMA
58
57
  from pgsync.sync import Sync
59
- from pgsync.utils import config_loader, get_config, show_settings, timeit
58
+ from pgsync.utils import (
59
+ config_loader,
60
+ MutuallyExclusiveOption,
61
+ show_settings,
62
+ timeit,
63
+ validate_config,
64
+ )
60
65
 
61
66
 
62
67
  def save_ctid(page: int, row: int, filename: str) -> None:
@@ -378,6 +383,19 @@ def run_task(
378
383
  "-c",
379
384
  help="Schema config",
380
385
  type=click.Path(exists=True),
386
+ default=SCHEMA,
387
+ show_default=True,
388
+ cls=MutuallyExclusiveOption,
389
+ mutually_exclusive=["s3_schema_url"],
390
+ )
391
+ @click.option(
392
+ "--s3_schema_url",
393
+ help="S3 URL for schema config",
394
+ type=click.STRING,
395
+ default=S3_SCHEMA_URL,
396
+ show_default=True,
397
+ cls=MutuallyExclusiveOption,
398
+ mutually_exclusive=["config"],
381
399
  )
382
400
  @click.option(
383
401
  "--verbose",
@@ -409,17 +427,20 @@ def run_task(
409
427
  ),
410
428
  default="multiprocess_async",
411
429
  )
412
- def main(config: str, nprocs: int, mode: str, verbose: bool) -> None:
430
+ def main(
431
+ config: str, s3_schema_url: str, nprocs: int, mode: str, verbose: bool
432
+ ) -> None:
413
433
  """
414
434
  TODO:
415
435
  - Track progress across cpus/threads
416
436
  - Handle KeyboardInterrupt Exception
417
437
  """
418
- config: str = get_config(config)
419
438
 
420
- show_settings(config)
439
+ validate_config(config=config, s3_schema_url=s3_schema_url)
440
+
441
+ show_settings(config=config, s3_schema_url=s3_schema_url)
421
442
 
422
- for doc in config_loader(config):
443
+ for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
423
444
  tasks: t.Generator = fetch_tasks(doc)
424
445
  if mode == "synchronous":
425
446
  synchronous(tasks, doc, verbose=verbose)
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "4.1.0"
5
+ __version__ = "4.2.0"
@@ -2,6 +2,7 @@
2
2
 
3
3
  import logging
4
4
  import os
5
+ import threading
5
6
  import time
6
7
  import typing as t
7
8
  from contextlib import contextmanager
@@ -28,8 +29,13 @@ from .exc import (
28
29
  TableNotFoundError,
29
30
  )
30
31
  from .settings import (
32
+ PG_HOST_RO,
33
+ PG_PASSWORD_RO,
34
+ PG_PORT_RO,
31
35
  PG_SSLMODE,
32
36
  PG_SSLROOTCERT,
37
+ PG_URL_RO,
38
+ PG_USER_RO,
33
39
  QUERY_CHUNK_SIZE,
34
40
  STREAM_RESULTS,
35
41
  )
@@ -48,7 +54,6 @@ try:
48
54
  except ImportError:
49
55
  pass
50
56
 
51
-
52
57
  logger = logging.getLogger(__name__)
53
58
 
54
59
  SSL_MODES = (
@@ -153,6 +158,8 @@ class TupleIdentifierType(sa.types.UserDefinedType):
153
158
 
154
159
 
155
160
  class Base(object):
161
+ _thread_local = threading.local()
162
+
156
163
  INT_TYPES = (
157
164
  "bigint",
158
165
  "bigserial",
@@ -190,6 +197,26 @@ class Base(object):
190
197
  self.__engine: sa.engine.Engine = _pg_engine(
191
198
  database, echo=False, **kwargs
192
199
  )
200
+ self.__engine_ro: t.Optional[sa.engine.Engine] = None
201
+ if (
202
+ PG_USER_RO
203
+ or PG_HOST_RO
204
+ or PG_PASSWORD_RO
205
+ or PG_PORT_RO
206
+ or PG_URL_RO
207
+ ):
208
+ kwargs.update(
209
+ {
210
+ "user": PG_USER_RO,
211
+ "host": PG_HOST_RO,
212
+ "password": PG_PASSWORD_RO,
213
+ "port": PG_PORT_RO,
214
+ "url": PG_URL_RO,
215
+ }
216
+ )
217
+ self.__engine_ro: sa.engine.Engine = _pg_engine(
218
+ database, echo=False, **kwargs
219
+ )
193
220
  self.__schemas: t.Optional[dict] = None
194
221
  # models is a dict of f'{schema}.{table}'
195
222
  self.__models: dict = {}
@@ -307,6 +334,8 @@ class Base(object):
307
334
  @property
308
335
  def engine(self) -> sa.engine.Engine:
309
336
  """Get the database engine."""
337
+ if getattr(self._thread_local, "read_only", False):
338
+ return self.__engine_ro
310
339
  return self.__engine
311
340
 
312
341
  @property
@@ -910,6 +939,37 @@ class Base(object):
910
939
  label="txid_current",
911
940
  )[0]
912
941
 
942
+ def pg_visible_in_snapshot(
943
+ self, literal_binds: bool = False
944
+ ) -> t.Callable[[t.List[int]], dict]:
945
+ def _pg_visible_in_snapshot(xid8s: t.List[int]) -> dict:
946
+ if not xid8s:
947
+ return {}
948
+ # TODO: use the SQLAlchemy ORM to handle this query
949
+ statement = sa.text(
950
+ """
951
+ SELECT xid AS xid8,
952
+ PG_VISIBLE_IN_SNAPSHOT(xid::xid8, PG_CURRENT_SNAPSHOT()) AS visible
953
+ FROM UNNEST(CAST(:xid8s AS text[]))
954
+ WITH ORDINALITY AS t(xid, ord)
955
+ ORDER BY t.ord
956
+ """
957
+ )
958
+ if self.verbose:
959
+ compiled_query(
960
+ statement,
961
+ label="xmin_visibility",
962
+ literal_binds=literal_binds,
963
+ )
964
+
965
+ # xid8s = list of xid8 strings
966
+ params: dict = {"xid8s": list(map(str, xid8s))}
967
+ with self.__engine_ro.connect() as conn:
968
+ result = conn.execute(statement, params)
969
+ return {int(row.xid8): row.visible for row in result}
970
+
971
+ return _pg_visible_in_snapshot
972
+
913
973
  def parse_value(self, type_: str, value: str) -> t.Optional[str]:
914
974
  """
915
975
  Parse datatypes from db.
@@ -1168,6 +1228,7 @@ def _pg_engine(
1168
1228
  echo: bool = False,
1169
1229
  sslmode: t.Optional[str] = None,
1170
1230
  sslrootcert: t.Optional[str] = None,
1231
+ url: t.Optional[str] = None,
1171
1232
  ) -> sa.engine.Engine:
1172
1233
  connect_args: dict = {}
1173
1234
  sslmode = sslmode or PG_SSLMODE
@@ -1187,13 +1248,14 @@ def _pg_engine(
1187
1248
  )
1188
1249
  connect_args["sslrootcert"] = sslrootcert
1189
1250
 
1190
- url: str = get_postgres_url(
1191
- database,
1192
- user=user,
1193
- host=host,
1194
- password=password,
1195
- port=port,
1196
- )
1251
+ if url is None:
1252
+ url: str = get_postgres_url(
1253
+ database,
1254
+ user=user,
1255
+ host=host,
1256
+ password=password,
1257
+ port=port,
1258
+ )
1197
1259
  return sa.create_engine(url, echo=echo, connect_args=connect_args)
1198
1260
 
1199
1261
 
@@ -8,7 +8,7 @@ import sqlalchemy as sa
8
8
 
9
9
  from .base import database_exists, drop_database
10
10
  from .sync import Sync
11
- from .utils import config_loader, get_config
11
+ from .utils import config_loader, validate_config
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -20,6 +20,7 @@ def teardown(
20
20
  drop_index: bool = True,
21
21
  delete_checkpoint: bool = True,
22
22
  config: t.Optional[str] = None,
23
+ s3_schema_url: t.Optional[str] = None,
23
24
  validate: bool = False,
24
25
  ) -> None:
25
26
  """
@@ -34,9 +35,9 @@ def teardown(
34
35
  config (Optional[str], optional): The configuration file path. Defaults to None.
35
36
  validate (bool, optional): Whether to validate the configuration. Defaults to False.
36
37
  """
37
- config: str = get_config(config)
38
+ validate_config(config=config, s3_schema_url=s3_schema_url)
38
39
 
39
- for doc in config_loader(config):
40
+ for doc in config_loader(config=config, s3_schema_url=s3_schema_url):
40
41
  if not database_exists(doc["database"]):
41
42
  logger.warning(f'Database {doc["database"]} does not exist')
42
43
  continue
@@ -52,6 +52,34 @@ class RedisQueue(object):
52
52
  logger.debug(f"pop size: {len(items[0])}")
53
53
  return list(map(lambda value: json.loads(value), items[0]))
54
54
 
55
+ def pop_visible_in_snapshot(
56
+ self,
57
+ pg_visible_in_snapshot: t.Callable[[t.List[int]], dict],
58
+ chunk_size: t.Optional[int] = None,
59
+ ) -> t.List[dict]:
60
+ """
61
+ Pop items in the queue that are visible in the current snapshot.
62
+ Uses the provided pg_visible_in_snapshot function to determine visibility.
63
+ This function is useful for read-only consumers that need to process items
64
+ that are visible in the current PostgreSQL snapshot.
65
+ """
66
+ chunk_size = chunk_size or REDIS_READ_CHUNK_SIZE
67
+ items: t.List = self.__db.lrange(self.key, 0, chunk_size - 1)
68
+ if not items:
69
+ return []
70
+ payloads = [json.loads(i) for i in items]
71
+ visible_map: dict = pg_visible_in_snapshot()(
72
+ [payload["xmin"] for payload in payloads]
73
+ )
74
+ visible: t.List[dict] = []
75
+ for item, payload in zip(items, payloads):
76
+ if visible_map.get(payload["xmin"]):
77
+ # Claim atomically
78
+ removed = self.__db.lrem(self.key, 1, item)
79
+ if removed:
80
+ visible.append(payload)
81
+ return visible
82
+
55
83
  def push(self, items: t.List) -> None:
56
84
  """Push multiple items onto the queue."""
57
85
  self.__db.rpush(self.key, *map(json.dumps, items))
@@ -43,6 +43,7 @@ REPLICATION_SLOT_CLEANUP_INTERVAL = env.float(
43
43
  )
44
44
  # path to the application schema config
45
45
  SCHEMA = env.str("SCHEMA", default=None)
46
+ S3_SCHEMA_URL = env.str("S3_SCHEMA_URL", default=None)
46
47
  USE_ASYNC = env.bool("USE_ASYNC", default=False)
47
48
  STREAM_RESULTS = env.bool("STREAM_RESULTS", default=True)
48
49
  # db polling interval
@@ -173,9 +174,26 @@ if PG_URL:
173
174
  PG_USER = env.str("PG_USER", default=None)
174
175
  else:
175
176
  # If PG_URL is not set, we need to use the other PG_* variables
176
- PG_URL = None
177
177
  PG_USER = env.str("PG_USER")
178
178
 
179
+ # Read-only Postgres:
180
+ # This is used for read-only consumers that do not require replication slots or triggers.
181
+ # full database url including user, password, host, port and dbname
182
+ PG_URL_RO = env.str("PG_URL_RO", default=None)
183
+ PG_HOST_RO = env.str("PG_HOST_RO", default=None)
184
+ PG_PASSWORD_RO = env.str("PG_PASSWORD_RO", default=None)
185
+ PG_PORT_RO = env.int("PG_PORT_RO", default=None)
186
+ PG_SSLMODE_RO = env.str("PG_SSLMODE_RO", default=None)
187
+ PG_SSLROOTCERT_RO = env.str("PG_SSLROOTCERT_RO", default=None)
188
+ PG_USER_RO = env.str("PG_USER_RO", default=None)
189
+ if PG_URL_RO:
190
+ # If PG_URL_RO is set, we don't need to use the other PG_*_RO variables
191
+ PG_HOST_RO = None
192
+ PG_PASSWORD_RO = None
193
+ PG_PORT_RO = None
194
+ PG_SSLMODE_RO = None
195
+ PG_SSLROOTCERT_RO = None
196
+
179
197
  # Redis:
180
198
  REDIS_AUTH = env.str("REDIS_AUTH", default=None)
181
199
  REDIS_USER = env.str("REDIS_USER", default=None)