pgsync 3.1.0__tar.gz → 3.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgsync-3.1.0 → pgsync-3.2.1}/PKG-INFO +24 -23
- {pgsync-3.1.0 → pgsync-3.2.1}/bin/bootstrap +9 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/bin/parallel_sync +1 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/__init__.py +1 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/base.py +59 -43
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/constants.py +2 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/helper.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/node.py +13 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/plugin.py +10 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/querybuilder.py +23 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/redisqueue.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/search_client.py +23 -19
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/settings.py +3 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/sync.py +74 -39
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/transform.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/urls.py +8 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/utils.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/view.py +16 -9
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/PKG-INFO +24 -23
- pgsync-3.2.1/pgsync.egg-info/requires.txt +30 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/setup.py +5 -8
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/conftest.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_base.py +33 -3
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_constants.py +2 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_env_vars.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_helper.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_log_handlers.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_node.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_query_builder.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_search_client.py +2 -1
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_settings.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync.py +14 -8
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_nested_children.py +23 -5
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_root.py +206 -49
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_single_child_fk_on_child.py +71 -62
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_single_child_fk_on_parent.py +81 -60
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_transform.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_trigger.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_unique_behaviour.py +1 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_urls.py +19 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_utils.py +1 -0
- pgsync-3.1.0/pgsync.egg-info/requires.txt +0 -29
- {pgsync-3.1.0 → pgsync-3.2.1}/AUTHORS.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/CONTRIBUTING.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/HISTORY.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/LICENSE +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/MANIFEST.in +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/README.md +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/README.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/bin/pgsync +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/Makefile +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/authors.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/changelog.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/conf.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/contributing.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/history.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/index.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/installation.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/logo.png +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/make.bat +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/readme.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/docs/usage.rst +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/exc.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/singleton.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/trigger.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/SOURCES.txt +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/dependency_links.txt +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/not-zip-safe +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/top_level.txt +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/pyproject.toml +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/setup.cfg +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/__init__.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/fixtures/schema.json +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_redisqueue.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_view.py +0 -0
- {pgsync-3.1.0 → pgsync-3.2.1}/tests/testing_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pgsync
|
|
3
|
-
Version: 3.1
|
|
3
|
+
Version: 3.2.1
|
|
4
4
|
Summary: Postgres to Elasticsearch/OpenSearch sync
|
|
5
5
|
Home-page: https://github.com/toluaina/pgsync
|
|
6
6
|
Author: Tolu Aina
|
|
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
|
|
|
13
13
|
Project-URL: Source, https://github.com/toluaina/pgsync
|
|
14
14
|
Project-URL: Web, https://pgsync.com
|
|
15
15
|
Project-URL: Documentation, https://pgsync.com
|
|
16
|
-
Keywords:
|
|
16
|
+
Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
|
|
17
17
|
Classifier: Development Status :: 5 - Production/Stable
|
|
18
18
|
Classifier: Intended Audience :: Developers
|
|
19
19
|
Classifier: Natural Language :: English
|
|
@@ -31,34 +31,35 @@ Description-Content-Type: text/markdown
|
|
|
31
31
|
License-File: LICENSE
|
|
32
32
|
License-File: AUTHORS.rst
|
|
33
33
|
Requires-Dist: async-timeout==4.0.3
|
|
34
|
-
Requires-Dist: boto3==1.
|
|
35
|
-
Requires-Dist: botocore==1.
|
|
36
|
-
Requires-Dist: certifi==
|
|
34
|
+
Requires-Dist: boto3==1.35.5
|
|
35
|
+
Requires-Dist: botocore==1.35.5
|
|
36
|
+
Requires-Dist: certifi==2024.7.4
|
|
37
37
|
Requires-Dist: charset-normalizer==3.3.2
|
|
38
38
|
Requires-Dist: click==8.1.7
|
|
39
|
-
Requires-Dist: elastic-transport==8.
|
|
40
|
-
Requires-Dist: elasticsearch==8.
|
|
41
|
-
Requires-Dist: elasticsearch-dsl==8.
|
|
42
|
-
Requires-Dist: environs==
|
|
39
|
+
Requires-Dist: elastic-transport==8.15.0
|
|
40
|
+
Requires-Dist: elasticsearch==8.15.0
|
|
41
|
+
Requires-Dist: elasticsearch-dsl==8.15.1
|
|
42
|
+
Requires-Dist: environs==11.0.0
|
|
43
|
+
Requires-Dist: events==0.5
|
|
43
44
|
Requires-Dist: greenlet==3.0.3
|
|
44
|
-
Requires-Dist: idna==3.
|
|
45
|
+
Requires-Dist: idna==3.8
|
|
45
46
|
Requires-Dist: jmespath==1.0.1
|
|
46
|
-
Requires-Dist: marshmallow==3.
|
|
47
|
+
Requires-Dist: marshmallow==3.22.0
|
|
47
48
|
Requires-Dist: opensearch-dsl==2.1.0
|
|
48
|
-
Requires-Dist: opensearch-py==2.
|
|
49
|
-
Requires-Dist: packaging==
|
|
49
|
+
Requires-Dist: opensearch-py==2.7.1
|
|
50
|
+
Requires-Dist: packaging==24.1
|
|
50
51
|
Requires-Dist: psycopg2-binary==2.9.9
|
|
51
|
-
Requires-Dist: python-dateutil==2.
|
|
52
|
-
Requires-Dist: python-dotenv==1.0.
|
|
53
|
-
Requires-Dist: redis==5.0.
|
|
54
|
-
Requires-Dist: requests==2.
|
|
55
|
-
Requires-Dist: requests-aws4auth==1.
|
|
56
|
-
Requires-Dist: s3transfer==0.10.
|
|
52
|
+
Requires-Dist: python-dateutil==2.9.0.post0
|
|
53
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
54
|
+
Requires-Dist: redis==5.0.8
|
|
55
|
+
Requires-Dist: requests==2.32.3
|
|
56
|
+
Requires-Dist: requests-aws4auth==1.3.1
|
|
57
|
+
Requires-Dist: s3transfer==0.10.2
|
|
57
58
|
Requires-Dist: six==1.16.0
|
|
58
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
59
|
-
Requires-Dist: sqlparse==0.
|
|
60
|
-
Requires-Dist: typing-extensions==4.
|
|
61
|
-
Requires-Dist: urllib3==1.26.
|
|
59
|
+
Requires-Dist: sqlalchemy==2.0.32
|
|
60
|
+
Requires-Dist: sqlparse==0.5.1
|
|
61
|
+
Requires-Dist: typing-extensions==4.12.2
|
|
62
|
+
Requires-Dist: urllib3==1.26.19
|
|
62
63
|
|
|
63
64
|
# PostgreSQL to Elasticsearch/OpenSearch sync
|
|
64
65
|
|
|
@@ -35,7 +35,15 @@ logger = logging.getLogger(__name__)
|
|
|
35
35
|
default=False,
|
|
36
36
|
help="Turn on verbosity",
|
|
37
37
|
)
|
|
38
|
-
def main(
|
|
38
|
+
def main(
|
|
39
|
+
teardown: bool,
|
|
40
|
+
config: str,
|
|
41
|
+
user: str,
|
|
42
|
+
password: bool,
|
|
43
|
+
host: str,
|
|
44
|
+
port: int,
|
|
45
|
+
verbose: bool,
|
|
46
|
+
) -> None:
|
|
39
47
|
"""Application onetime Bootstrap."""
|
|
40
48
|
kwargs: dict = {
|
|
41
49
|
"user": user,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""PGSync Base."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
import typing as t
|
|
@@ -48,6 +49,15 @@ except ImportError:
|
|
|
48
49
|
|
|
49
50
|
logger = logging.getLogger(__name__)
|
|
50
51
|
|
|
52
|
+
SSL_MODES = (
|
|
53
|
+
"allow",
|
|
54
|
+
"disable",
|
|
55
|
+
"prefer",
|
|
56
|
+
"require",
|
|
57
|
+
"verify-ca",
|
|
58
|
+
"verify-full",
|
|
59
|
+
)
|
|
60
|
+
|
|
51
61
|
|
|
52
62
|
class Payload(object):
|
|
53
63
|
"""
|
|
@@ -141,6 +151,36 @@ class TupleIdentifierType(sa.types.UserDefinedType):
|
|
|
141
151
|
|
|
142
152
|
|
|
143
153
|
class Base(object):
|
|
154
|
+
INT_TYPES = (
|
|
155
|
+
"bigint",
|
|
156
|
+
"bigserial",
|
|
157
|
+
"int",
|
|
158
|
+
"int2",
|
|
159
|
+
"int4",
|
|
160
|
+
"int8",
|
|
161
|
+
"integer",
|
|
162
|
+
"serial",
|
|
163
|
+
"serial2",
|
|
164
|
+
"serial4",
|
|
165
|
+
"serial8",
|
|
166
|
+
"smallint",
|
|
167
|
+
"smallserial",
|
|
168
|
+
)
|
|
169
|
+
FLOAT_TYPES = (
|
|
170
|
+
"double precision",
|
|
171
|
+
"float4",
|
|
172
|
+
"float8",
|
|
173
|
+
"real",
|
|
174
|
+
)
|
|
175
|
+
CHAR_TYPES = (
|
|
176
|
+
"char",
|
|
177
|
+
"character",
|
|
178
|
+
"character varying",
|
|
179
|
+
"text",
|
|
180
|
+
"uuid",
|
|
181
|
+
"varchar",
|
|
182
|
+
)
|
|
183
|
+
|
|
144
184
|
def __init__(
|
|
145
185
|
self, database: str, verbose: bool = False, *args, **kwargs
|
|
146
186
|
) -> None:
|
|
@@ -433,7 +473,7 @@ class Base(object):
|
|
|
433
473
|
func: sa.sql.functions._FunctionGenerator,
|
|
434
474
|
txmin: t.Optional[int] = None,
|
|
435
475
|
txmax: t.Optional[int] = None,
|
|
436
|
-
upto_lsn: t.Optional[
|
|
476
|
+
upto_lsn: t.Optional[str] = None,
|
|
437
477
|
upto_nchanges: t.Optional[int] = None,
|
|
438
478
|
limit: t.Optional[int] = None,
|
|
439
479
|
offset: t.Optional[int] = None,
|
|
@@ -446,7 +486,7 @@ class Base(object):
|
|
|
446
486
|
func (sa.sql.functions._FunctionGenerator): The function to use to read from the slot.
|
|
447
487
|
txmin (Optional[int], optional): The minimum transaction ID to read from. Defaults to None.
|
|
448
488
|
txmax (Optional[int], optional): The maximum transaction ID to read from. Defaults to None.
|
|
449
|
-
upto_lsn (Optional[
|
|
489
|
+
upto_lsn (Optional[str], optional): The maximum LSN to read up to. Defaults to None.
|
|
450
490
|
upto_nchanges (Optional[int], optional): The maximum number of changes to read. Defaults to None.
|
|
451
491
|
limit (Optional[int], optional): The maximum number of rows to return. Defaults to None.
|
|
452
492
|
offset (Optional[int], optional): The number of rows to skip before returning. Defaults to None.
|
|
@@ -489,12 +529,20 @@ class Base(object):
|
|
|
489
529
|
statement = statement.offset(offset)
|
|
490
530
|
return statement
|
|
491
531
|
|
|
532
|
+
@property
|
|
533
|
+
def current_wal_lsn(self) -> str:
|
|
534
|
+
return self.fetchone(
|
|
535
|
+
sa.select(sa.func.MAX(sa.text("pg_current_wal_lsn"))).select_from(
|
|
536
|
+
sa.func.PG_CURRENT_WAL_LSN()
|
|
537
|
+
)
|
|
538
|
+
)[0]
|
|
539
|
+
|
|
492
540
|
def logical_slot_get_changes(
|
|
493
541
|
self,
|
|
494
542
|
slot_name: str,
|
|
495
543
|
txmin: t.Optional[int] = None,
|
|
496
544
|
txmax: t.Optional[int] = None,
|
|
497
|
-
upto_lsn: t.Optional[
|
|
545
|
+
upto_lsn: t.Optional[str] = None,
|
|
498
546
|
upto_nchanges: t.Optional[int] = None,
|
|
499
547
|
limit: t.Optional[int] = None,
|
|
500
548
|
offset: t.Optional[int] = None,
|
|
@@ -524,7 +572,7 @@ class Base(object):
|
|
|
524
572
|
slot_name: str,
|
|
525
573
|
txmin: t.Optional[int] = None,
|
|
526
574
|
txmax: t.Optional[int] = None,
|
|
527
|
-
upto_lsn: t.Optional[
|
|
575
|
+
upto_lsn: t.Optional[str] = None,
|
|
528
576
|
upto_nchanges: t.Optional[int] = None,
|
|
529
577
|
limit: t.Optional[int] = None,
|
|
530
578
|
offset: t.Optional[int] = None,
|
|
@@ -550,7 +598,7 @@ class Base(object):
|
|
|
550
598
|
slot_name: str,
|
|
551
599
|
txmin: t.Optional[int] = None,
|
|
552
600
|
txmax: t.Optional[int] = None,
|
|
553
|
-
upto_lsn: t.Optional[
|
|
601
|
+
upto_lsn: t.Optional[str] = None,
|
|
554
602
|
upto_nchanges: t.Optional[int] = None,
|
|
555
603
|
) -> int:
|
|
556
604
|
statement: sa.sql.Select = self._logical_slot_changes(
|
|
@@ -730,45 +778,20 @@ class Base(object):
|
|
|
730
778
|
|
|
731
779
|
NB: All integers are long in python3 and call to convert is just int
|
|
732
780
|
"""
|
|
781
|
+
if self.verbose:
|
|
782
|
+
logger.debug(f"type: {type_} value: {value}")
|
|
733
783
|
if value.lower() == "null":
|
|
734
784
|
return None
|
|
735
|
-
|
|
736
|
-
if type_.lower() in (
|
|
737
|
-
"bigint",
|
|
738
|
-
"bigserial",
|
|
739
|
-
"int",
|
|
740
|
-
"int2",
|
|
741
|
-
"int4",
|
|
742
|
-
"int8",
|
|
743
|
-
"integer",
|
|
744
|
-
"serial",
|
|
745
|
-
"serial2",
|
|
746
|
-
"serial4",
|
|
747
|
-
"serial8",
|
|
748
|
-
"smallint",
|
|
749
|
-
"smallserial",
|
|
750
|
-
):
|
|
785
|
+
if type_.lower() in self.INT_TYPES:
|
|
751
786
|
try:
|
|
752
787
|
value = int(value)
|
|
753
788
|
except ValueError:
|
|
754
789
|
raise
|
|
755
|
-
if type_.lower() in
|
|
756
|
-
"char",
|
|
757
|
-
"character",
|
|
758
|
-
"character varying",
|
|
759
|
-
"text",
|
|
760
|
-
"uuid",
|
|
761
|
-
"varchar",
|
|
762
|
-
):
|
|
790
|
+
if type_.lower() in self.CHAR_TYPES:
|
|
763
791
|
value = value.lstrip("'").rstrip("'")
|
|
764
792
|
if type_.lower() == "boolean":
|
|
765
793
|
value = bool(value)
|
|
766
|
-
if type_.lower() in
|
|
767
|
-
"double precision",
|
|
768
|
-
"float4",
|
|
769
|
-
"float8",
|
|
770
|
-
"real",
|
|
771
|
-
):
|
|
794
|
+
if type_.lower() in self.FLOAT_TYPES:
|
|
772
795
|
try:
|
|
773
796
|
value = float(value)
|
|
774
797
|
except ValueError:
|
|
@@ -999,14 +1022,7 @@ def _pg_engine(
|
|
|
999
1022
|
sslrootcert = sslrootcert or PG_SSLROOTCERT
|
|
1000
1023
|
|
|
1001
1024
|
if sslmode:
|
|
1002
|
-
if sslmode not in
|
|
1003
|
-
"allow",
|
|
1004
|
-
"disable",
|
|
1005
|
-
"prefer",
|
|
1006
|
-
"require",
|
|
1007
|
-
"verify-ca",
|
|
1008
|
-
"verify-full",
|
|
1009
|
-
):
|
|
1025
|
+
if sslmode not in SSL_MODES:
|
|
1010
1026
|
raise ValueError(f'Invalid sslmode: "{sslmode}"')
|
|
1011
1027
|
connect_args["sslmode"] = sslmode
|
|
1012
1028
|
|
|
@@ -89,6 +89,7 @@ ELASTICSEARCH_TYPES = [
|
|
|
89
89
|
"constant_keyword",
|
|
90
90
|
"date",
|
|
91
91
|
"date_range",
|
|
92
|
+
"dense_vector",
|
|
92
93
|
"double",
|
|
93
94
|
"double_range",
|
|
94
95
|
"flattened",
|
|
@@ -207,5 +208,5 @@ LOGICAL_SLOT_PREFIX = re.compile(
|
|
|
207
208
|
r"table\s\"?(?P<schema>[\w-]+)\"?.\"?(?P<table>[\w-]+)\"?:\s(?P<tg_op>[A-Z]+):" # noqa E501
|
|
208
209
|
)
|
|
209
210
|
LOGICAL_SLOT_SUFFIX = re.compile(
|
|
210
|
-
'\s(?P<key>"?\w+"?)\[(?P<type>[\w\s]+)\]:(?P<value>[
|
|
211
|
+
r'\s(?P<key>"?\w+"?)\[(?P<type>[\w\s]+)\]:(?P<value>(?:"[^"]*"|\'[^\']*\'|null|\d+e[+-]?\d+|\w+))'
|
|
211
212
|
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""PGSync Node class representation."""
|
|
2
|
+
|
|
2
3
|
from __future__ import annotations
|
|
3
4
|
|
|
4
5
|
import re
|
|
@@ -68,6 +69,9 @@ class ForeignKey:
|
|
|
68
69
|
def __str__(self):
|
|
69
70
|
return f"foreign_key: {self.parent}:{self.child}"
|
|
70
71
|
|
|
72
|
+
def __repr__(self):
|
|
73
|
+
return self.__str__()
|
|
74
|
+
|
|
71
75
|
|
|
72
76
|
@dataclass
|
|
73
77
|
class Relationship:
|
|
@@ -113,6 +117,9 @@ class Relationship:
|
|
|
113
117
|
def __str__(self):
|
|
114
118
|
return f"relationship: {self.variant}.{self.type}:{self.tables}"
|
|
115
119
|
|
|
120
|
+
def __repr__(self):
|
|
121
|
+
return self.__str__()
|
|
122
|
+
|
|
116
123
|
|
|
117
124
|
@dataclass
|
|
118
125
|
class Node(object):
|
|
@@ -277,6 +284,7 @@ class Tree(threading.local):
|
|
|
277
284
|
def __post_init__(self):
|
|
278
285
|
self.tables: t.Set[str] = set()
|
|
279
286
|
self.__nodes: t.Dict[Node] = {}
|
|
287
|
+
self.__schemas: t.Set[str] = set()
|
|
280
288
|
self.root: t.Optional[Node] = None
|
|
281
289
|
self.build(self.nodes)
|
|
282
290
|
|
|
@@ -327,6 +335,7 @@ class Tree(threading.local):
|
|
|
327
335
|
node.add_child(self.build(child))
|
|
328
336
|
|
|
329
337
|
self.__nodes[key] = node
|
|
338
|
+
self.__schemas.add(schema)
|
|
330
339
|
return node
|
|
331
340
|
|
|
332
341
|
def get_node(self, table: str, schema: str) -> Node:
|
|
@@ -345,3 +354,7 @@ class Tree(threading.local):
|
|
|
345
354
|
else:
|
|
346
355
|
raise RuntimeError(f"Node for {schema}.{table} not found")
|
|
347
356
|
return self.__nodes[key]
|
|
357
|
+
|
|
358
|
+
@property
|
|
359
|
+
def schemas(self) -> t.Set[str]:
|
|
360
|
+
return self.__schemas
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""PGSync Plugin."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
5
|
+
import sys
|
|
4
6
|
import typing as t
|
|
5
7
|
from abc import ABC, abstractmethod
|
|
6
8
|
from importlib import import_module
|
|
@@ -42,7 +44,14 @@ class Plugins(object):
|
|
|
42
44
|
self.plugins: list = []
|
|
43
45
|
self._paths: list = []
|
|
44
46
|
logger.debug(f"Reloading plugins from package: {self.package}")
|
|
45
|
-
|
|
47
|
+
# skip in test
|
|
48
|
+
if "test" not in sys.argv[0]:
|
|
49
|
+
self.walk(self.package)
|
|
50
|
+
|
|
51
|
+
# main plugin ordering
|
|
52
|
+
self.plugins = sorted(
|
|
53
|
+
self.plugins, key=lambda x: self.names.index(x.name)
|
|
54
|
+
)
|
|
46
55
|
|
|
47
56
|
def walk(self, package: str) -> None:
|
|
48
57
|
"""Recursively walk the supplied package and fetch all plugins."""
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""PGSync QueryBuilder."""
|
|
2
|
+
|
|
2
3
|
import threading
|
|
3
4
|
import typing as t
|
|
4
5
|
from collections import defaultdict
|
|
@@ -20,6 +21,23 @@ class QueryBuilder(threading.local):
|
|
|
20
21
|
self.isouter: bool = True
|
|
21
22
|
self._cache: dict = {}
|
|
22
23
|
|
|
24
|
+
def _eval_expression(
|
|
25
|
+
self, expression: sa.sql.elements.BinaryExpression
|
|
26
|
+
) -> sa.sql.elements.BinaryExpression:
|
|
27
|
+
if isinstance(
|
|
28
|
+
expression.left.type, sa.dialects.postgresql.UUID
|
|
29
|
+
) or isinstance(expression.right.type, sa.dialects.postgresql.UUID):
|
|
30
|
+
if not isinstance(
|
|
31
|
+
expression.left.type, sa.dialects.postgresql.UUID
|
|
32
|
+
) or not isinstance(
|
|
33
|
+
expression.right.type, sa.dialects.postgresql.UUID
|
|
34
|
+
):
|
|
35
|
+
# handle UUID typed expressions:
|
|
36
|
+
# psycopg2.errors.UndefinedFunction: operator does not exist: uuid = integer
|
|
37
|
+
return expression.left is None
|
|
38
|
+
|
|
39
|
+
return expression
|
|
40
|
+
|
|
23
41
|
def _build_filters(
|
|
24
42
|
self, filters: t.Dict[str, t.List[dict]], node: Node
|
|
25
43
|
) -> t.Optional[sa.sql.elements.BooleanClauseList]:
|
|
@@ -45,7 +63,11 @@ class QueryBuilder(threading.local):
|
|
|
45
63
|
for values in filters.get(node.table):
|
|
46
64
|
where: t.List = []
|
|
47
65
|
for column, value in values.items():
|
|
48
|
-
where.append(
|
|
66
|
+
where.append(
|
|
67
|
+
self._eval_expression(
|
|
68
|
+
node.model.c[column] == value
|
|
69
|
+
)
|
|
70
|
+
)
|
|
49
71
|
# and clause is applied for composite primary keys
|
|
50
72
|
clause.append(sa.and_(*where))
|
|
51
73
|
return sa.or_(*clause)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""PGSync SearchClient helper."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import typing as t
|
|
4
5
|
from collections import defaultdict
|
|
@@ -173,7 +174,7 @@ class SearchClient(object):
|
|
|
173
174
|
):
|
|
174
175
|
"""Bulk index, update, delete docs to Elasticsearch/OpenSearch."""
|
|
175
176
|
if settings.ELASTICSEARCH_STREAMING_BULK:
|
|
176
|
-
for ok,
|
|
177
|
+
for ok, info in self.streaming_bulk(
|
|
177
178
|
self.__client,
|
|
178
179
|
actions,
|
|
179
180
|
index=index,
|
|
@@ -188,10 +189,12 @@ class SearchClient(object):
|
|
|
188
189
|
):
|
|
189
190
|
if ok:
|
|
190
191
|
self.doc_count += 1
|
|
192
|
+
else:
|
|
193
|
+
logger.error(f"Document failed to index: {info}")
|
|
191
194
|
else:
|
|
192
195
|
# parallel bulk consumes more memory and is also more likely
|
|
193
196
|
# to result in 429 errors.
|
|
194
|
-
for ok,
|
|
197
|
+
for ok, info in self.parallel_bulk(
|
|
195
198
|
self.__client,
|
|
196
199
|
actions,
|
|
197
200
|
thread_count=thread_count,
|
|
@@ -205,6 +208,8 @@ class SearchClient(object):
|
|
|
205
208
|
):
|
|
206
209
|
if ok:
|
|
207
210
|
self.doc_count += 1
|
|
211
|
+
else:
|
|
212
|
+
logger.error(f"Document failed to index: {info}")
|
|
208
213
|
|
|
209
214
|
def refresh(self, indices: t.List[str]) -> None:
|
|
210
215
|
"""Refresh the Elasticsearch/OpenSearch index."""
|
|
@@ -245,7 +250,7 @@ class SearchClient(object):
|
|
|
245
250
|
if "is out of range for a long" not in str(e):
|
|
246
251
|
raise
|
|
247
252
|
|
|
248
|
-
def search(self, index: str, body: dict):
|
|
253
|
+
def search(self, index: str, body: dict) -> t.Any:
|
|
249
254
|
"""
|
|
250
255
|
Search in Elasticsearch/OpenSearch.
|
|
251
256
|
|
|
@@ -259,6 +264,7 @@ class SearchClient(object):
|
|
|
259
264
|
tree: Tree,
|
|
260
265
|
setting: t.Optional[dict] = None,
|
|
261
266
|
mapping: t.Optional[dict] = None,
|
|
267
|
+
mappings: t.Optional[dict] = None,
|
|
262
268
|
routing: t.Optional[str] = None,
|
|
263
269
|
) -> None:
|
|
264
270
|
"""Create Elasticsearch/OpenSearch setting and mapping if required."""
|
|
@@ -267,7 +273,8 @@ class SearchClient(object):
|
|
|
267
273
|
if not self.__client.indices.exists(index=index):
|
|
268
274
|
if setting:
|
|
269
275
|
body.update(**{"settings": {"index": setting}})
|
|
270
|
-
|
|
276
|
+
if mappings:
|
|
277
|
+
body.update(**{"mappings": {"index": mappings}})
|
|
271
278
|
if mapping:
|
|
272
279
|
if "dynamic_templates" in mapping:
|
|
273
280
|
body.update(**{"mappings": mapping})
|
|
@@ -381,9 +388,9 @@ def get_search_client(
|
|
|
381
388
|
service,
|
|
382
389
|
session_token=credentials.token,
|
|
383
390
|
),
|
|
384
|
-
use_ssl=True,
|
|
385
391
|
verify_certs=True,
|
|
386
392
|
connection_class=connection_class,
|
|
393
|
+
timeout=settings.ELASTICSEARCH_TIMEOUT,
|
|
387
394
|
)
|
|
388
395
|
elif settings.ELASTICSEARCH:
|
|
389
396
|
return client(
|
|
@@ -395,18 +402,18 @@ def get_search_client(
|
|
|
395
402
|
service,
|
|
396
403
|
session_token=credentials.token,
|
|
397
404
|
),
|
|
398
|
-
use_ssl=True,
|
|
399
405
|
verify_certs=True,
|
|
400
406
|
node_class=node_class,
|
|
407
|
+
timeout=settings.ELASTICSEARCH_TIMEOUT,
|
|
401
408
|
)
|
|
402
409
|
else:
|
|
403
410
|
hosts: t.List[str] = [url]
|
|
404
411
|
# API
|
|
405
412
|
cloud_id: t.Optional[str] = settings.ELASTICSEARCH_CLOUD_ID
|
|
406
413
|
api_key: t.Optional[t.Union[str, t.Tuple[str, str]]] = None
|
|
407
|
-
http_auth: t.Optional[
|
|
408
|
-
|
|
409
|
-
|
|
414
|
+
http_auth: t.Optional[t.Union[str, t.Tuple[str, str]]] = (
|
|
415
|
+
settings.ELASTICSEARCH_HTTP_AUTH
|
|
416
|
+
)
|
|
410
417
|
if (
|
|
411
418
|
settings.ELASTICSEARCH_API_KEY_ID
|
|
412
419
|
and settings.ELASTICSEARCH_API_KEY
|
|
@@ -424,17 +431,15 @@ def get_search_client(
|
|
|
424
431
|
ca_certs: t.Optional[str] = settings.ELASTICSEARCH_CA_CERTS
|
|
425
432
|
client_cert: t.Optional[str] = settings.ELASTICSEARCH_CLIENT_CERT
|
|
426
433
|
client_key: t.Optional[str] = settings.ELASTICSEARCH_CLIENT_KEY
|
|
427
|
-
ssl_assert_hostname: t.Optional[
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
ssl_assert_fingerprint: t.Optional[
|
|
431
|
-
|
|
432
|
-
|
|
434
|
+
ssl_assert_hostname: t.Optional[str] = (
|
|
435
|
+
settings.ELASTICSEARCH_SSL_ASSERT_HOSTNAME
|
|
436
|
+
)
|
|
437
|
+
ssl_assert_fingerprint: t.Optional[str] = (
|
|
438
|
+
settings.ELASTICSEARCH_SSL_ASSERT_FINGERPRINT
|
|
439
|
+
)
|
|
433
440
|
ssl_version: t.Optional[int] = settings.ELASTICSEARCH_SSL_VERSION
|
|
434
441
|
ssl_context: t.Optional[t.Any] = settings.ELASTICSEARCH_SSL_CONTEXT
|
|
435
442
|
ssl_show_warn: bool = settings.ELASTICSEARCH_SSL_SHOW_WARN
|
|
436
|
-
# Transport
|
|
437
|
-
timeout: float = settings.ELASTICSEARCH_TIMEOUT
|
|
438
443
|
return client(
|
|
439
444
|
hosts=hosts,
|
|
440
445
|
http_auth=http_auth,
|
|
@@ -453,6 +458,5 @@ def get_search_client(
|
|
|
453
458
|
ssl_version=ssl_version,
|
|
454
459
|
ssl_context=ssl_context,
|
|
455
460
|
ssl_show_warn=ssl_show_warn,
|
|
456
|
-
|
|
457
|
-
timeout=timeout,
|
|
461
|
+
timeout=settings.ELASTICSEARCH_TIMEOUT,
|
|
458
462
|
)
|
|
@@ -4,6 +4,7 @@ This module contains the settings for PGSync.
|
|
|
4
4
|
It reads environment variables from a .env file and sets default values for each variable.
|
|
5
5
|
The variables are used to configure various parameters such as block size, checkpoint path, polling interval, etc.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
import logging
|
|
8
9
|
import logging.config
|
|
9
10
|
import os
|
|
@@ -148,7 +149,7 @@ elif ELASTICSEARCH:
|
|
|
148
149
|
OPENSEARCH_AWS_HOSTED = env.bool("OPENSEARCH_AWS_HOSTED", default=False)
|
|
149
150
|
OPENSEARCH_AWS_SERVERLESS = env.bool(
|
|
150
151
|
"OPENSEARCH_AWS_SERVERLESS", default=False
|
|
151
|
-
)
|
|
152
|
+
)
|
|
152
153
|
|
|
153
154
|
# Postgres:
|
|
154
155
|
PG_HOST = env.str("PG_HOST", default="localhost")
|
|
@@ -160,6 +161,7 @@ PG_USER = env.str("PG_USER")
|
|
|
160
161
|
|
|
161
162
|
# Redis:
|
|
162
163
|
REDIS_AUTH = env.str("REDIS_AUTH", default=None)
|
|
164
|
+
REDIS_USER = env.str("REDIS_USER", default=None)
|
|
163
165
|
REDIS_DB = env.int("REDIS_DB", default=0)
|
|
164
166
|
REDIS_HOST = env.str("REDIS_HOST", default="localhost")
|
|
165
167
|
# redis poll interval (in secs)
|