pgsync 3.1.0__tar.gz → 3.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {pgsync-3.1.0 → pgsync-3.2.1}/PKG-INFO +24 -23
  2. {pgsync-3.1.0 → pgsync-3.2.1}/bin/bootstrap +9 -1
  3. {pgsync-3.1.0 → pgsync-3.2.1}/bin/parallel_sync +1 -1
  4. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/__init__.py +1 -1
  5. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/base.py +59 -43
  6. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/constants.py +2 -1
  7. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/helper.py +1 -0
  8. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/node.py +13 -0
  9. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/plugin.py +10 -1
  10. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/querybuilder.py +23 -1
  11. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/redisqueue.py +1 -0
  12. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/search_client.py +23 -19
  13. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/settings.py +3 -1
  14. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/sync.py +74 -39
  15. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/transform.py +1 -0
  16. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/urls.py +8 -0
  17. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/utils.py +1 -0
  18. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/view.py +16 -9
  19. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/PKG-INFO +24 -23
  20. pgsync-3.2.1/pgsync.egg-info/requires.txt +30 -0
  21. {pgsync-3.1.0 → pgsync-3.2.1}/setup.py +5 -8
  22. {pgsync-3.1.0 → pgsync-3.2.1}/tests/conftest.py +1 -0
  23. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_base.py +33 -3
  24. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_constants.py +2 -0
  25. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_env_vars.py +1 -0
  26. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_helper.py +1 -0
  27. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_log_handlers.py +1 -0
  28. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_node.py +1 -0
  29. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_query_builder.py +1 -0
  30. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_search_client.py +2 -1
  31. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_settings.py +1 -0
  32. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync.py +14 -8
  33. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_nested_children.py +23 -5
  34. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_root.py +206 -49
  35. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_single_child_fk_on_child.py +71 -62
  36. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_sync_single_child_fk_on_parent.py +81 -60
  37. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_transform.py +1 -0
  38. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_trigger.py +1 -0
  39. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_unique_behaviour.py +1 -0
  40. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_urls.py +19 -0
  41. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_utils.py +1 -0
  42. pgsync-3.1.0/pgsync.egg-info/requires.txt +0 -29
  43. {pgsync-3.1.0 → pgsync-3.2.1}/AUTHORS.rst +0 -0
  44. {pgsync-3.1.0 → pgsync-3.2.1}/CONTRIBUTING.rst +0 -0
  45. {pgsync-3.1.0 → pgsync-3.2.1}/HISTORY.rst +0 -0
  46. {pgsync-3.1.0 → pgsync-3.2.1}/LICENSE +0 -0
  47. {pgsync-3.1.0 → pgsync-3.2.1}/MANIFEST.in +0 -0
  48. {pgsync-3.1.0 → pgsync-3.2.1}/README.md +0 -0
  49. {pgsync-3.1.0 → pgsync-3.2.1}/README.rst +0 -0
  50. {pgsync-3.1.0 → pgsync-3.2.1}/bin/pgsync +0 -0
  51. {pgsync-3.1.0 → pgsync-3.2.1}/docs/Makefile +0 -0
  52. {pgsync-3.1.0 → pgsync-3.2.1}/docs/authors.rst +0 -0
  53. {pgsync-3.1.0 → pgsync-3.2.1}/docs/changelog.rst +0 -0
  54. {pgsync-3.1.0 → pgsync-3.2.1}/docs/conf.py +0 -0
  55. {pgsync-3.1.0 → pgsync-3.2.1}/docs/contributing.rst +0 -0
  56. {pgsync-3.1.0 → pgsync-3.2.1}/docs/history.rst +0 -0
  57. {pgsync-3.1.0 → pgsync-3.2.1}/docs/index.rst +0 -0
  58. {pgsync-3.1.0 → pgsync-3.2.1}/docs/installation.rst +0 -0
  59. {pgsync-3.1.0 → pgsync-3.2.1}/docs/logo.png +0 -0
  60. {pgsync-3.1.0 → pgsync-3.2.1}/docs/make.bat +0 -0
  61. {pgsync-3.1.0 → pgsync-3.2.1}/docs/readme.rst +0 -0
  62. {pgsync-3.1.0 → pgsync-3.2.1}/docs/usage.rst +0 -0
  63. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/exc.py +0 -0
  64. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/singleton.py +0 -0
  65. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync/trigger.py +0 -0
  66. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/SOURCES.txt +0 -0
  67. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/dependency_links.txt +0 -0
  68. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/not-zip-safe +0 -0
  69. {pgsync-3.1.0 → pgsync-3.2.1}/pgsync.egg-info/top_level.txt +0 -0
  70. {pgsync-3.1.0 → pgsync-3.2.1}/pyproject.toml +0 -0
  71. {pgsync-3.1.0 → pgsync-3.2.1}/setup.cfg +0 -0
  72. {pgsync-3.1.0 → pgsync-3.2.1}/tests/__init__.py +0 -0
  73. {pgsync-3.1.0 → pgsync-3.2.1}/tests/fixtures/schema.json +0 -0
  74. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_redisqueue.py +0 -0
  75. {pgsync-3.1.0 → pgsync-3.2.1}/tests/test_view.py +0 -0
  76. {pgsync-3.1.0 → pgsync-3.2.1}/tests/testing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pgsync
3
- Version: 3.1.0
3
+ Version: 3.2.1
4
4
  Summary: Postgres to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
13
13
  Project-URL: Source, https://github.com/toluaina/pgsync
14
14
  Project-URL: Web, https://pgsync.com
15
15
  Project-URL: Documentation, https://pgsync.com
16
- Keywords: pgsync,elasticsearch,opensearch,postgres,change data capture
16
+ Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
17
17
  Classifier: Development Status :: 5 - Production/Stable
18
18
  Classifier: Intended Audience :: Developers
19
19
  Classifier: Natural Language :: English
@@ -31,34 +31,35 @@ Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
32
  License-File: AUTHORS.rst
33
33
  Requires-Dist: async-timeout==4.0.3
34
- Requires-Dist: boto3==1.34.11
35
- Requires-Dist: botocore==1.34.11
36
- Requires-Dist: certifi==2023.11.17
34
+ Requires-Dist: boto3==1.35.5
35
+ Requires-Dist: botocore==1.35.5
36
+ Requires-Dist: certifi==2024.7.4
37
37
  Requires-Dist: charset-normalizer==3.3.2
38
38
  Requires-Dist: click==8.1.7
39
- Requires-Dist: elastic-transport==8.11.0
40
- Requires-Dist: elasticsearch==8.11.1
41
- Requires-Dist: elasticsearch-dsl==8.11.0
42
- Requires-Dist: environs==10.0.0
39
+ Requires-Dist: elastic-transport==8.15.0
40
+ Requires-Dist: elasticsearch==8.15.0
41
+ Requires-Dist: elasticsearch-dsl==8.15.1
42
+ Requires-Dist: environs==11.0.0
43
+ Requires-Dist: events==0.5
43
44
  Requires-Dist: greenlet==3.0.3
44
- Requires-Dist: idna==3.6
45
+ Requires-Dist: idna==3.8
45
46
  Requires-Dist: jmespath==1.0.1
46
- Requires-Dist: marshmallow==3.20.1
47
+ Requires-Dist: marshmallow==3.22.0
47
48
  Requires-Dist: opensearch-dsl==2.1.0
48
- Requires-Dist: opensearch-py==2.4.2
49
- Requires-Dist: packaging==23.2
49
+ Requires-Dist: opensearch-py==2.7.1
50
+ Requires-Dist: packaging==24.1
50
51
  Requires-Dist: psycopg2-binary==2.9.9
51
- Requires-Dist: python-dateutil==2.8.2
52
- Requires-Dist: python-dotenv==1.0.0
53
- Requires-Dist: redis==5.0.1
54
- Requires-Dist: requests==2.31.0
55
- Requires-Dist: requests-aws4auth==1.2.3
56
- Requires-Dist: s3transfer==0.10.0
52
+ Requires-Dist: python-dateutil==2.9.0.post0
53
+ Requires-Dist: python-dotenv==1.0.1
54
+ Requires-Dist: redis==5.0.8
55
+ Requires-Dist: requests==2.32.3
56
+ Requires-Dist: requests-aws4auth==1.3.1
57
+ Requires-Dist: s3transfer==0.10.2
57
58
  Requires-Dist: six==1.16.0
58
- Requires-Dist: sqlalchemy==2.0.25
59
- Requires-Dist: sqlparse==0.4.4
60
- Requires-Dist: typing-extensions==4.9.0
61
- Requires-Dist: urllib3==1.26.18
59
+ Requires-Dist: sqlalchemy==2.0.32
60
+ Requires-Dist: sqlparse==0.5.1
61
+ Requires-Dist: typing-extensions==4.12.2
62
+ Requires-Dist: urllib3==1.26.19
62
63
 
63
64
  # PostgreSQL to Elasticsearch/OpenSearch sync
64
65
 
@@ -35,7 +35,15 @@ logger = logging.getLogger(__name__)
35
35
  default=False,
36
36
  help="Turn on verbosity",
37
37
  )
38
- def main(teardown, config, user, password, host, port, verbose):
38
+ def main(
39
+ teardown: bool,
40
+ config: str,
41
+ user: str,
42
+ password: bool,
43
+ host: str,
44
+ port: int,
45
+ verbose: bool,
46
+ ) -> None:
39
47
  """Application onetime Bootstrap."""
40
48
  kwargs: dict = {
41
49
  "user": user,
@@ -409,7 +409,7 @@ def run_task(
409
409
  ),
410
410
  default="multiprocess_async",
411
411
  )
412
- def main(config, nprocs, mode, verbose):
412
+ def main(config: str, nprocs: int, mode: str, verbose: bool) -> None:
413
413
  """
414
414
  TODO:
415
415
  - Track progress across cpus/threads
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "3.1.0"
5
+ __version__ = "3.2.1"
@@ -1,4 +1,5 @@
1
1
  """PGSync Base."""
2
+
2
3
  import logging
3
4
  import os
4
5
  import typing as t
@@ -48,6 +49,15 @@ except ImportError:
48
49
 
49
50
  logger = logging.getLogger(__name__)
50
51
 
52
+ SSL_MODES = (
53
+ "allow",
54
+ "disable",
55
+ "prefer",
56
+ "require",
57
+ "verify-ca",
58
+ "verify-full",
59
+ )
60
+
51
61
 
52
62
  class Payload(object):
53
63
  """
@@ -141,6 +151,36 @@ class TupleIdentifierType(sa.types.UserDefinedType):
141
151
 
142
152
 
143
153
  class Base(object):
154
+ INT_TYPES = (
155
+ "bigint",
156
+ "bigserial",
157
+ "int",
158
+ "int2",
159
+ "int4",
160
+ "int8",
161
+ "integer",
162
+ "serial",
163
+ "serial2",
164
+ "serial4",
165
+ "serial8",
166
+ "smallint",
167
+ "smallserial",
168
+ )
169
+ FLOAT_TYPES = (
170
+ "double precision",
171
+ "float4",
172
+ "float8",
173
+ "real",
174
+ )
175
+ CHAR_TYPES = (
176
+ "char",
177
+ "character",
178
+ "character varying",
179
+ "text",
180
+ "uuid",
181
+ "varchar",
182
+ )
183
+
144
184
  def __init__(
145
185
  self, database: str, verbose: bool = False, *args, **kwargs
146
186
  ) -> None:
@@ -433,7 +473,7 @@ class Base(object):
433
473
  func: sa.sql.functions._FunctionGenerator,
434
474
  txmin: t.Optional[int] = None,
435
475
  txmax: t.Optional[int] = None,
436
- upto_lsn: t.Optional[int] = None,
476
+ upto_lsn: t.Optional[str] = None,
437
477
  upto_nchanges: t.Optional[int] = None,
438
478
  limit: t.Optional[int] = None,
439
479
  offset: t.Optional[int] = None,
@@ -446,7 +486,7 @@ class Base(object):
446
486
  func (sa.sql.functions._FunctionGenerator): The function to use to read from the slot.
447
487
  txmin (Optional[int], optional): The minimum transaction ID to read from. Defaults to None.
448
488
  txmax (Optional[int], optional): The maximum transaction ID to read from. Defaults to None.
449
- upto_lsn (Optional[int], optional): The maximum LSN to read up to. Defaults to None.
489
+ upto_lsn (Optional[str], optional): The maximum LSN to read up to. Defaults to None.
450
490
  upto_nchanges (Optional[int], optional): The maximum number of changes to read. Defaults to None.
451
491
  limit (Optional[int], optional): The maximum number of rows to return. Defaults to None.
452
492
  offset (Optional[int], optional): The number of rows to skip before returning. Defaults to None.
@@ -489,12 +529,20 @@ class Base(object):
489
529
  statement = statement.offset(offset)
490
530
  return statement
491
531
 
532
+ @property
533
+ def current_wal_lsn(self) -> str:
534
+ return self.fetchone(
535
+ sa.select(sa.func.MAX(sa.text("pg_current_wal_lsn"))).select_from(
536
+ sa.func.PG_CURRENT_WAL_LSN()
537
+ )
538
+ )[0]
539
+
492
540
  def logical_slot_get_changes(
493
541
  self,
494
542
  slot_name: str,
495
543
  txmin: t.Optional[int] = None,
496
544
  txmax: t.Optional[int] = None,
497
- upto_lsn: t.Optional[int] = None,
545
+ upto_lsn: t.Optional[str] = None,
498
546
  upto_nchanges: t.Optional[int] = None,
499
547
  limit: t.Optional[int] = None,
500
548
  offset: t.Optional[int] = None,
@@ -524,7 +572,7 @@ class Base(object):
524
572
  slot_name: str,
525
573
  txmin: t.Optional[int] = None,
526
574
  txmax: t.Optional[int] = None,
527
- upto_lsn: t.Optional[int] = None,
575
+ upto_lsn: t.Optional[str] = None,
528
576
  upto_nchanges: t.Optional[int] = None,
529
577
  limit: t.Optional[int] = None,
530
578
  offset: t.Optional[int] = None,
@@ -550,7 +598,7 @@ class Base(object):
550
598
  slot_name: str,
551
599
  txmin: t.Optional[int] = None,
552
600
  txmax: t.Optional[int] = None,
553
- upto_lsn: t.Optional[int] = None,
601
+ upto_lsn: t.Optional[str] = None,
554
602
  upto_nchanges: t.Optional[int] = None,
555
603
  ) -> int:
556
604
  statement: sa.sql.Select = self._logical_slot_changes(
@@ -730,45 +778,20 @@ class Base(object):
730
778
 
731
779
  NB: All integers are long in python3 and call to convert is just int
732
780
  """
781
+ if self.verbose:
782
+ logger.debug(f"type: {type_} value: {value}")
733
783
  if value.lower() == "null":
734
784
  return None
735
-
736
- if type_.lower() in (
737
- "bigint",
738
- "bigserial",
739
- "int",
740
- "int2",
741
- "int4",
742
- "int8",
743
- "integer",
744
- "serial",
745
- "serial2",
746
- "serial4",
747
- "serial8",
748
- "smallint",
749
- "smallserial",
750
- ):
785
+ if type_.lower() in self.INT_TYPES:
751
786
  try:
752
787
  value = int(value)
753
788
  except ValueError:
754
789
  raise
755
- if type_.lower() in (
756
- "char",
757
- "character",
758
- "character varying",
759
- "text",
760
- "uuid",
761
- "varchar",
762
- ):
790
+ if type_.lower() in self.CHAR_TYPES:
763
791
  value = value.lstrip("'").rstrip("'")
764
792
  if type_.lower() == "boolean":
765
793
  value = bool(value)
766
- if type_.lower() in (
767
- "double precision",
768
- "float4",
769
- "float8",
770
- "real",
771
- ):
794
+ if type_.lower() in self.FLOAT_TYPES:
772
795
  try:
773
796
  value = float(value)
774
797
  except ValueError:
@@ -999,14 +1022,7 @@ def _pg_engine(
999
1022
  sslrootcert = sslrootcert or PG_SSLROOTCERT
1000
1023
 
1001
1024
  if sslmode:
1002
- if sslmode not in (
1003
- "allow",
1004
- "disable",
1005
- "prefer",
1006
- "require",
1007
- "verify-ca",
1008
- "verify-full",
1009
- ):
1025
+ if sslmode not in SSL_MODES:
1010
1026
  raise ValueError(f'Invalid sslmode: "{sslmode}"')
1011
1027
  connect_args["sslmode"] = sslmode
1012
1028
 
@@ -89,6 +89,7 @@ ELASTICSEARCH_TYPES = [
89
89
  "constant_keyword",
90
90
  "date",
91
91
  "date_range",
92
+ "dense_vector",
92
93
  "double",
93
94
  "double_range",
94
95
  "flattened",
@@ -207,5 +208,5 @@ LOGICAL_SLOT_PREFIX = re.compile(
207
208
  r"table\s\"?(?P<schema>[\w-]+)\"?.\"?(?P<table>[\w-]+)\"?:\s(?P<tg_op>[A-Z]+):" # noqa E501
208
209
  )
209
210
  LOGICAL_SLOT_SUFFIX = re.compile(
210
- '\s(?P<key>"?\w+"?)\[(?P<type>[\w\s]+)\]:(?P<value>[\w\'"\-]+)'
211
+ r'\s(?P<key>"?\w+"?)\[(?P<type>[\w\s]+)\]:(?P<value>(?:"[^"]*"|\'[^\']*\'|null|\d+e[+-]?\d+|\w+))'
211
212
  )
@@ -1,4 +1,5 @@
1
1
  """PGSync helpers."""
2
+
2
3
  import logging
3
4
  import os
4
5
  import typing as t
@@ -1,4 +1,5 @@
1
1
  """PGSync Node class representation."""
2
+
2
3
  from __future__ import annotations
3
4
 
4
5
  import re
@@ -68,6 +69,9 @@ class ForeignKey:
68
69
  def __str__(self):
69
70
  return f"foreign_key: {self.parent}:{self.child}"
70
71
 
72
+ def __repr__(self):
73
+ return self.__str__()
74
+
71
75
 
72
76
  @dataclass
73
77
  class Relationship:
@@ -113,6 +117,9 @@ class Relationship:
113
117
  def __str__(self):
114
118
  return f"relationship: {self.variant}.{self.type}:{self.tables}"
115
119
 
120
+ def __repr__(self):
121
+ return self.__str__()
122
+
116
123
 
117
124
  @dataclass
118
125
  class Node(object):
@@ -277,6 +284,7 @@ class Tree(threading.local):
277
284
  def __post_init__(self):
278
285
  self.tables: t.Set[str] = set()
279
286
  self.__nodes: t.Dict[Node] = {}
287
+ self.__schemas: t.Set[str] = set()
280
288
  self.root: t.Optional[Node] = None
281
289
  self.build(self.nodes)
282
290
 
@@ -327,6 +335,7 @@ class Tree(threading.local):
327
335
  node.add_child(self.build(child))
328
336
 
329
337
  self.__nodes[key] = node
338
+ self.__schemas.add(schema)
330
339
  return node
331
340
 
332
341
  def get_node(self, table: str, schema: str) -> Node:
@@ -345,3 +354,7 @@ class Tree(threading.local):
345
354
  else:
346
355
  raise RuntimeError(f"Node for {schema}.{table} not found")
347
356
  return self.__nodes[key]
357
+
358
+ @property
359
+ def schemas(self) -> t.Set[str]:
360
+ return self.__schemas
@@ -1,6 +1,8 @@
1
1
  """PGSync Plugin."""
2
+
2
3
  import logging
3
4
  import os
5
+ import sys
4
6
  import typing as t
5
7
  from abc import ABC, abstractmethod
6
8
  from importlib import import_module
@@ -42,7 +44,14 @@ class Plugins(object):
42
44
  self.plugins: list = []
43
45
  self._paths: list = []
44
46
  logger.debug(f"Reloading plugins from package: {self.package}")
45
- self.walk(self.package)
47
+ # skip in test
48
+ if "test" not in sys.argv[0]:
49
+ self.walk(self.package)
50
+
51
+ # main plugin ordering
52
+ self.plugins = sorted(
53
+ self.plugins, key=lambda x: self.names.index(x.name)
54
+ )
46
55
 
47
56
  def walk(self, package: str) -> None:
48
57
  """Recursively walk the supplied package and fetch all plugins."""
@@ -1,4 +1,5 @@
1
1
  """PGSync QueryBuilder."""
2
+
2
3
  import threading
3
4
  import typing as t
4
5
  from collections import defaultdict
@@ -20,6 +21,23 @@ class QueryBuilder(threading.local):
20
21
  self.isouter: bool = True
21
22
  self._cache: dict = {}
22
23
 
24
+ def _eval_expression(
25
+ self, expression: sa.sql.elements.BinaryExpression
26
+ ) -> sa.sql.elements.BinaryExpression:
27
+ if isinstance(
28
+ expression.left.type, sa.dialects.postgresql.UUID
29
+ ) or isinstance(expression.right.type, sa.dialects.postgresql.UUID):
30
+ if not isinstance(
31
+ expression.left.type, sa.dialects.postgresql.UUID
32
+ ) or not isinstance(
33
+ expression.right.type, sa.dialects.postgresql.UUID
34
+ ):
35
+ # handle UUID typed expressions:
36
+ # psycopg2.errors.UndefinedFunction: operator does not exist: uuid = integer
37
+ return expression.left is None
38
+
39
+ return expression
40
+
23
41
  def _build_filters(
24
42
  self, filters: t.Dict[str, t.List[dict]], node: Node
25
43
  ) -> t.Optional[sa.sql.elements.BooleanClauseList]:
@@ -45,7 +63,11 @@ class QueryBuilder(threading.local):
45
63
  for values in filters.get(node.table):
46
64
  where: t.List = []
47
65
  for column, value in values.items():
48
- where.append(node.model.c[column] == value)
66
+ where.append(
67
+ self._eval_expression(
68
+ node.model.c[column] == value
69
+ )
70
+ )
49
71
  # and clause is applied for composite primary keys
50
72
  clause.append(sa.and_(*where))
51
73
  return sa.or_(*clause)
@@ -1,4 +1,5 @@
1
1
  """PGSync RedisQueue."""
2
+
2
3
  import json
3
4
  import logging
4
5
  import typing as t
@@ -1,4 +1,5 @@
1
1
  """PGSync SearchClient helper."""
2
+
2
3
  import logging
3
4
  import typing as t
4
5
  from collections import defaultdict
@@ -173,7 +174,7 @@ class SearchClient(object):
173
174
  ):
174
175
  """Bulk index, update, delete docs to Elasticsearch/OpenSearch."""
175
176
  if settings.ELASTICSEARCH_STREAMING_BULK:
176
- for ok, _ in self.streaming_bulk(
177
+ for ok, info in self.streaming_bulk(
177
178
  self.__client,
178
179
  actions,
179
180
  index=index,
@@ -188,10 +189,12 @@ class SearchClient(object):
188
189
  ):
189
190
  if ok:
190
191
  self.doc_count += 1
192
+ else:
193
+ logger.error(f"Document failed to index: {info}")
191
194
  else:
192
195
  # parallel bulk consumes more memory and is also more likely
193
196
  # to result in 429 errors.
194
- for ok, _ in self.parallel_bulk(
197
+ for ok, info in self.parallel_bulk(
195
198
  self.__client,
196
199
  actions,
197
200
  thread_count=thread_count,
@@ -205,6 +208,8 @@ class SearchClient(object):
205
208
  ):
206
209
  if ok:
207
210
  self.doc_count += 1
211
+ else:
212
+ logger.error(f"Document failed to index: {info}")
208
213
 
209
214
  def refresh(self, indices: t.List[str]) -> None:
210
215
  """Refresh the Elasticsearch/OpenSearch index."""
@@ -245,7 +250,7 @@ class SearchClient(object):
245
250
  if "is out of range for a long" not in str(e):
246
251
  raise
247
252
 
248
- def search(self, index: str, body: dict):
253
+ def search(self, index: str, body: dict) -> t.Any:
249
254
  """
250
255
  Search in Elasticsearch/OpenSearch.
251
256
 
@@ -259,6 +264,7 @@ class SearchClient(object):
259
264
  tree: Tree,
260
265
  setting: t.Optional[dict] = None,
261
266
  mapping: t.Optional[dict] = None,
267
+ mappings: t.Optional[dict] = None,
262
268
  routing: t.Optional[str] = None,
263
269
  ) -> None:
264
270
  """Create Elasticsearch/OpenSearch setting and mapping if required."""
@@ -267,7 +273,8 @@ class SearchClient(object):
267
273
  if not self.__client.indices.exists(index=index):
268
274
  if setting:
269
275
  body.update(**{"settings": {"index": setting}})
270
-
276
+ if mappings:
277
+ body.update(**{"mappings": {"index": mappings}})
271
278
  if mapping:
272
279
  if "dynamic_templates" in mapping:
273
280
  body.update(**{"mappings": mapping})
@@ -381,9 +388,9 @@ def get_search_client(
381
388
  service,
382
389
  session_token=credentials.token,
383
390
  ),
384
- use_ssl=True,
385
391
  verify_certs=True,
386
392
  connection_class=connection_class,
393
+ timeout=settings.ELASTICSEARCH_TIMEOUT,
387
394
  )
388
395
  elif settings.ELASTICSEARCH:
389
396
  return client(
@@ -395,18 +402,18 @@ def get_search_client(
395
402
  service,
396
403
  session_token=credentials.token,
397
404
  ),
398
- use_ssl=True,
399
405
  verify_certs=True,
400
406
  node_class=node_class,
407
+ timeout=settings.ELASTICSEARCH_TIMEOUT,
401
408
  )
402
409
  else:
403
410
  hosts: t.List[str] = [url]
404
411
  # API
405
412
  cloud_id: t.Optional[str] = settings.ELASTICSEARCH_CLOUD_ID
406
413
  api_key: t.Optional[t.Union[str, t.Tuple[str, str]]] = None
407
- http_auth: t.Optional[
408
- t.Union[str, t.Tuple[str, str]]
409
- ] = settings.ELASTICSEARCH_HTTP_AUTH
414
+ http_auth: t.Optional[t.Union[str, t.Tuple[str, str]]] = (
415
+ settings.ELASTICSEARCH_HTTP_AUTH
416
+ )
410
417
  if (
411
418
  settings.ELASTICSEARCH_API_KEY_ID
412
419
  and settings.ELASTICSEARCH_API_KEY
@@ -424,17 +431,15 @@ def get_search_client(
424
431
  ca_certs: t.Optional[str] = settings.ELASTICSEARCH_CA_CERTS
425
432
  client_cert: t.Optional[str] = settings.ELASTICSEARCH_CLIENT_CERT
426
433
  client_key: t.Optional[str] = settings.ELASTICSEARCH_CLIENT_KEY
427
- ssl_assert_hostname: t.Optional[
428
- str
429
- ] = settings.ELASTICSEARCH_SSL_ASSERT_HOSTNAME
430
- ssl_assert_fingerprint: t.Optional[
431
- str
432
- ] = settings.ELASTICSEARCH_SSL_ASSERT_FINGERPRINT
434
+ ssl_assert_hostname: t.Optional[str] = (
435
+ settings.ELASTICSEARCH_SSL_ASSERT_HOSTNAME
436
+ )
437
+ ssl_assert_fingerprint: t.Optional[str] = (
438
+ settings.ELASTICSEARCH_SSL_ASSERT_FINGERPRINT
439
+ )
433
440
  ssl_version: t.Optional[int] = settings.ELASTICSEARCH_SSL_VERSION
434
441
  ssl_context: t.Optional[t.Any] = settings.ELASTICSEARCH_SSL_CONTEXT
435
442
  ssl_show_warn: bool = settings.ELASTICSEARCH_SSL_SHOW_WARN
436
- # Transport
437
- timeout: float = settings.ELASTICSEARCH_TIMEOUT
438
443
  return client(
439
444
  hosts=hosts,
440
445
  http_auth=http_auth,
@@ -453,6 +458,5 @@ def get_search_client(
453
458
  ssl_version=ssl_version,
454
459
  ssl_context=ssl_context,
455
460
  ssl_show_warn=ssl_show_warn,
456
- # use_ssl=use_ssl,
457
- timeout=timeout,
461
+ timeout=settings.ELASTICSEARCH_TIMEOUT,
458
462
  )
@@ -4,6 +4,7 @@ This module contains the settings for PGSync.
4
4
  It reads environment variables from a .env file and sets default values for each variable.
5
5
  The variables are used to configure various parameters such as block size, checkpoint path, polling interval, etc.
6
6
  """
7
+
7
8
  import logging
8
9
  import logging.config
9
10
  import os
@@ -148,7 +149,7 @@ elif ELASTICSEARCH:
148
149
  OPENSEARCH_AWS_HOSTED = env.bool("OPENSEARCH_AWS_HOSTED", default=False)
149
150
  OPENSEARCH_AWS_SERVERLESS = env.bool(
150
151
  "OPENSEARCH_AWS_SERVERLESS", default=False
151
- ) # noqa E501
152
+ )
152
153
 
153
154
  # Postgres:
154
155
  PG_HOST = env.str("PG_HOST", default="localhost")
@@ -160,6 +161,7 @@ PG_USER = env.str("PG_USER")
160
161
 
161
162
  # Redis:
162
163
  REDIS_AUTH = env.str("REDIS_AUTH", default=None)
164
+ REDIS_USER = env.str("REDIS_USER", default=None)
163
165
  REDIS_DB = env.int("REDIS_DB", default=0)
164
166
  REDIS_HOST = env.str("REDIS_HOST", default="localhost")
165
167
  # redis poll interval (in secs)