pgsync 3.0.0__tar.gz → 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {pgsync-3.0.0 → pgsync-3.2.0}/PKG-INFO +25 -29
  2. {pgsync-3.0.0 → pgsync-3.2.0}/bin/bootstrap +2 -2
  3. {pgsync-3.0.0 → pgsync-3.2.0}/bin/parallel_sync +72 -81
  4. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/__init__.py +1 -1
  5. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/base.py +227 -196
  6. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/constants.py +9 -5
  7. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/helper.py +7 -6
  8. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/node.py +53 -43
  9. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/plugin.py +9 -5
  10. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/querybuilder.py +51 -28
  11. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/redisqueue.py +5 -4
  12. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/search_client.py +104 -81
  13. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/settings.py +19 -6
  14. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/singleton.py +2 -2
  15. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/sync.py +128 -88
  16. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/transform.py +5 -4
  17. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/urls.py +17 -17
  18. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/utils.py +36 -26
  19. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/view.py +43 -34
  20. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync.egg-info/PKG-INFO +25 -29
  21. pgsync-3.2.0/pgsync.egg-info/requires.txt +30 -0
  22. pgsync-3.2.0/pyproject.toml +3 -0
  23. {pgsync-3.0.0 → pgsync-3.2.0}/setup.cfg +1 -1
  24. {pgsync-3.0.0 → pgsync-3.2.0}/setup.py +5 -8
  25. {pgsync-3.0.0 → pgsync-3.2.0}/tests/conftest.py +134 -81
  26. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_base.py +122 -73
  27. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_constants.py +2 -0
  28. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_env_vars.py +1 -0
  29. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_helper.py +1 -0
  30. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_log_handlers.py +1 -0
  31. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_node.py +25 -20
  32. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_query_builder.py +1 -0
  33. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_search_client.py +6 -9
  34. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_settings.py +2 -1
  35. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_sync.py +36 -32
  36. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_sync_nested_children.py +39 -44
  37. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_sync_root.py +36 -55
  38. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_sync_single_child_fk_on_child.py +35 -47
  39. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_sync_single_child_fk_on_parent.py +35 -47
  40. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_transform.py +1 -0
  41. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_trigger.py +4 -2
  42. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_unique_behaviour.py +2 -10
  43. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_urls.py +1 -0
  44. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_utils.py +5 -4
  45. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_view.py +83 -55
  46. {pgsync-3.0.0 → pgsync-3.2.0}/tests/testing_utils.py +5 -3
  47. pgsync-3.0.0/pgsync.egg-info/requires.txt +0 -35
  48. pgsync-3.0.0/pyproject.toml +0 -3
  49. {pgsync-3.0.0 → pgsync-3.2.0}/AUTHORS.rst +0 -0
  50. {pgsync-3.0.0 → pgsync-3.2.0}/CONTRIBUTING.rst +0 -0
  51. {pgsync-3.0.0 → pgsync-3.2.0}/HISTORY.rst +0 -0
  52. {pgsync-3.0.0 → pgsync-3.2.0}/LICENSE +0 -0
  53. {pgsync-3.0.0 → pgsync-3.2.0}/MANIFEST.in +0 -0
  54. {pgsync-3.0.0 → pgsync-3.2.0}/README.md +0 -0
  55. {pgsync-3.0.0 → pgsync-3.2.0}/README.rst +0 -0
  56. {pgsync-3.0.0 → pgsync-3.2.0}/bin/pgsync +0 -0
  57. {pgsync-3.0.0 → pgsync-3.2.0}/docs/Makefile +0 -0
  58. {pgsync-3.0.0 → pgsync-3.2.0}/docs/authors.rst +0 -0
  59. {pgsync-3.0.0 → pgsync-3.2.0}/docs/changelog.rst +0 -0
  60. {pgsync-3.0.0 → pgsync-3.2.0}/docs/conf.py +0 -0
  61. {pgsync-3.0.0 → pgsync-3.2.0}/docs/contributing.rst +0 -0
  62. {pgsync-3.0.0 → pgsync-3.2.0}/docs/history.rst +0 -0
  63. {pgsync-3.0.0 → pgsync-3.2.0}/docs/index.rst +0 -0
  64. {pgsync-3.0.0 → pgsync-3.2.0}/docs/installation.rst +0 -0
  65. {pgsync-3.0.0 → pgsync-3.2.0}/docs/logo.png +0 -0
  66. {pgsync-3.0.0 → pgsync-3.2.0}/docs/make.bat +0 -0
  67. {pgsync-3.0.0 → pgsync-3.2.0}/docs/readme.rst +0 -0
  68. {pgsync-3.0.0 → pgsync-3.2.0}/docs/usage.rst +0 -0
  69. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/exc.py +0 -0
  70. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync/trigger.py +0 -0
  71. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync.egg-info/SOURCES.txt +0 -0
  72. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync.egg-info/dependency_links.txt +0 -0
  73. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync.egg-info/not-zip-safe +0 -0
  74. {pgsync-3.0.0 → pgsync-3.2.0}/pgsync.egg-info/top_level.txt +0 -0
  75. {pgsync-3.0.0 → pgsync-3.2.0}/tests/__init__.py +0 -0
  76. {pgsync-3.0.0 → pgsync-3.2.0}/tests/fixtures/schema.json +0 -0
  77. {pgsync-3.0.0 → pgsync-3.2.0}/tests/test_redisqueue.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pgsync
3
- Version: 3.0.0
3
+ Version: 3.2.0
4
4
  Summary: Postgres to Elasticsearch/OpenSearch sync
5
5
  Home-page: https://github.com/toluaina/pgsync
6
6
  Author: Tolu Aina
@@ -13,7 +13,7 @@ Project-URL: Funding, https://github.com/sponsors/toluaina
13
13
  Project-URL: Source, https://github.com/toluaina/pgsync
14
14
  Project-URL: Web, https://pgsync.com
15
15
  Project-URL: Documentation, https://pgsync.com
16
- Keywords: pgsync,elasticsearch,opensearch,postgres,change data capture
16
+ Keywords: change data capture,elasticsearch,opensearch,pgsync,postgres
17
17
  Classifier: Development Status :: 5 - Production/Stable
18
18
  Classifier: Intended Audience :: Developers
19
19
  Classifier: Natural Language :: English
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.8
21
21
  Classifier: Programming Language :: Python :: 3.9
22
22
  Classifier: Programming Language :: Python :: 3.10
23
23
  Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
24
25
  Classifier: Programming Language :: Python :: Implementation :: CPython
25
26
  Classifier: Programming Language :: Python :: Implementation :: PyPy
26
27
  Classifier: License :: OSI Approved :: MIT License
@@ -30,40 +31,35 @@ Description-Content-Type: text/markdown
30
31
  License-File: LICENSE
31
32
  License-File: AUTHORS.rst
32
33
  Requires-Dist: async-timeout==4.0.3
33
- Requires-Dist: black==23.11.0
34
- Requires-Dist: boto3==1.29.3
35
- Requires-Dist: botocore==1.32.3
36
- Requires-Dist: certifi==2023.11.17
34
+ Requires-Dist: boto3==1.34.142
35
+ Requires-Dist: botocore==1.34.142
36
+ Requires-Dist: certifi==2024.7.4
37
37
  Requires-Dist: charset-normalizer==3.3.2
38
38
  Requires-Dist: click==8.1.7
39
- Requires-Dist: elasticsearch==7.13.4
40
- Requires-Dist: elasticsearch-dsl==7.4.1
41
- Requires-Dist: environs==9.5.0
42
- Requires-Dist: faker==20.0.3
43
- Requires-Dist: greenlet==3.0.1
44
- Requires-Dist: idna==3.4
45
- Requires-Dist: isort==5.12.0
39
+ Requires-Dist: elastic-transport==8.13.1
40
+ Requires-Dist: elasticsearch==8.14.0
41
+ Requires-Dist: elasticsearch-dsl==8.14.0
42
+ Requires-Dist: environs==11.0.0
43
+ Requires-Dist: events==0.5
44
+ Requires-Dist: greenlet==3.0.3
45
+ Requires-Dist: idna==3.7
46
46
  Requires-Dist: jmespath==1.0.1
47
- Requires-Dist: marshmallow==3.20.1
48
- Requires-Dist: mypy-extensions==1.0.0
47
+ Requires-Dist: marshmallow==3.21.3
49
48
  Requires-Dist: opensearch-dsl==2.1.0
50
- Requires-Dist: opensearch-py==2.4.1
51
- Requires-Dist: packaging==23.2
52
- Requires-Dist: pathspec==0.11.2
53
- Requires-Dist: platformdirs==4.0.0
49
+ Requires-Dist: opensearch-py==2.6.0
50
+ Requires-Dist: packaging==24.1
54
51
  Requires-Dist: psycopg2-binary==2.9.9
55
- Requires-Dist: python-dateutil==2.8.2
56
- Requires-Dist: python-dotenv==1.0.0
57
- Requires-Dist: redis==5.0.1
58
- Requires-Dist: requests==2.31.0
52
+ Requires-Dist: python-dateutil==2.9.0.post0
53
+ Requires-Dist: python-dotenv==1.0.1
54
+ Requires-Dist: redis==5.0.7
55
+ Requires-Dist: requests==2.32.3
59
56
  Requires-Dist: requests-aws4auth==1.2.3
60
- Requires-Dist: s3transfer==0.7.0
57
+ Requires-Dist: s3transfer==0.10.2
61
58
  Requires-Dist: six==1.16.0
62
- Requires-Dist: sqlalchemy==1.4.50
63
- Requires-Dist: sqlparse==0.4.4
64
- Requires-Dist: tomli==2.0.1
65
- Requires-Dist: typing-extensions==4.8.0
66
- Requires-Dist: urllib3==1.26.18
59
+ Requires-Dist: sqlalchemy==2.0.31
60
+ Requires-Dist: sqlparse==0.5.0
61
+ Requires-Dist: typing-extensions==4.12.2
62
+ Requires-Dist: urllib3==1.26.19
67
63
 
68
64
  # PostgreSQL to Elasticsearch/OpenSearch sync
69
65
 
@@ -56,9 +56,9 @@ def main(teardown, config, user, password, host, port, verbose):
56
56
 
57
57
  validate: bool = False if teardown else True
58
58
 
59
- for document in config_loader(config):
59
+ for doc in config_loader(config):
60
60
  sync: Sync = Sync(
61
- document,
61
+ doc,
62
62
  verbose=verbose,
63
63
  validate=validate,
64
64
  repl_slots=False,
@@ -1,42 +1,42 @@
1
1
  #!/usr/bin/env python
2
2
 
3
3
  """
4
- Parallel sync is an innovative, experimental feature designed to optimize
5
- throughput by utilizing available CPUs/threads, particularly beneficial
4
+ Parallel sync is an innovative, experimental feature designed to optimize
5
+ throughput by utilizing available CPUs/threads, particularly beneficial
6
6
  in environments experiencing high network latency.
7
7
 
8
8
  Scenario & Challenge:
9
- In instances where your PG database, Elasticsearch/OpenSearch, and PGSync
10
- servers operate on divergent networks, a delay in request/response time is
11
- noticeable. The primary constraint emerges from the database query's roundtrip,
12
- which even server-side cursors can address only to a limited extent by fetching
13
- a certain number of records at a time. The consequent delay in fetching the
9
+ In instances where your PG database, Elasticsearch/OpenSearch, and PGSync
10
+ servers operate on divergent networks, a delay in request/response time is
11
+ noticeable. The primary constraint emerges from the database query's roundtrip,
12
+ which even server-side cursors can address only to a limited extent by fetching
13
+ a certain number of records at a time. The consequent delay in fetching the
14
14
  next cursor significantly hampers the overall synchronization speed.
15
15
 
16
16
  Solution:
17
- To mitigate this, the strategy is to conduct an initial fast/parallel sync,
18
- thereby populating Elasticsearch/OpenSearch in a single iteration.
17
+ To mitigate this, the strategy is to conduct an initial fast/parallel sync,
18
+ thereby populating Elasticsearch/OpenSearch in a single iteration.
19
19
  Post this, the regular pgsync can continue running as a daemon.
20
20
 
21
21
  Approach and Technical Implementation:
22
- The approach centers around utilizing the Tuple identifier record of the table
23
- columns. Every table incorporates a system column – "ctid" of type "tid,"
22
+ The approach centers around utilizing the Tuple identifier record of the table
23
+ columns. Every table incorporates a system column – "ctid" of type "tid,"
24
24
  which helps identify the page record and the row number in each block.
25
25
  This element facilitates the pagination of the sync process.
26
26
 
27
- Technically, pagination implies dividing each paged record amongst the
28
- available CPUs/threads. This division enables the parallel execution of
29
- Elasticsearch/OpenSearch bulk inserts. The "ctid" serves as a tuple
27
+ Technically, pagination implies dividing each paged record amongst the
28
+ available CPUs/threads. This division enables the parallel execution of
29
+ Elasticsearch/OpenSearch bulk inserts. The "ctid" serves as a tuple
30
30
  (for instance, (1, 5)), pinpointing the row in a disk page.
31
31
 
32
- By leveraging this method, all paged row records are retrieved upfront and
33
- allocated as work units across the worker threads/CPUs.
34
- Each work unit, defined by the BLOCK_SIZE, denotes the number of root node
32
+ By leveraging this method, all paged row records are retrieved upfront and
33
+ allocated as work units across the worker threads/CPUs.
34
+ Each work unit, defined by the BLOCK_SIZE, denotes the number of root node
35
35
  records assigned for each worker to process.
36
36
 
37
- Subsequently, the workers execute queries for each assigned chunk of work,
38
- filtered based on the page number and row numbers.
39
- This systematic and parallel approach optimizes the synchronization process,
37
+ Subsequently, the workers execute queries for each assigned chunk of work,
38
+ filtered based on the page number and row numbers.
39
+ This systematic and parallel approach optimizes the synchronization process,
40
40
  especially in environments challenged by network latency.
41
41
  """
42
42
 
@@ -45,56 +45,50 @@ import multiprocessing
45
45
  import os
46
46
  import re
47
47
  import sys
48
+ import typing as t
48
49
  from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
49
50
  from dataclasses import dataclass
50
51
  from queue import Queue
51
52
  from threading import Thread
52
- from typing import Generator, Optional, Union
53
53
 
54
54
  import click
55
55
  import sqlalchemy as sa
56
56
 
57
57
  from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
58
58
  from pgsync.sync import Sync
59
- from pgsync.utils import (
60
- compiled_query,
61
- config_loader,
62
- get_config,
63
- show_settings,
64
- timeit,
65
- )
59
+ from pgsync.utils import config_loader, get_config, show_settings, timeit
66
60
 
67
61
 
68
- def save_ctid(page: int, row: int, name: str) -> None:
62
+ def save_ctid(page: int, row: int, filename: str) -> None:
69
63
  """
70
64
  Save the checkpoint for a given page and row in a file with the given name.
71
65
 
72
66
  Args:
73
67
  page (int): The page number to save.
74
68
  row (int): The row number to save.
75
- name (str): The name of the file to save the checkpoint in.
69
+ filename (str): The name of the file to save the checkpoint in.
76
70
  """
77
- checkpoint_file: str = os.path.join(CHECKPOINT_PATH, f".{name}.ctid")
78
- with open(checkpoint_file, "w+") as fp:
71
+ filepath: str = os.path.join(CHECKPOINT_PATH, f".{filename}.ctid")
72
+ with open(filepath, "w+") as fp:
79
73
  fp.write(f"{page},{row}\n")
80
74
 
81
75
 
82
- def read_ctid(name: str) -> None:
76
+ def read_ctid(filename: str) -> t.Tuple[t.Optional[int], t.Optional[int]]:
83
77
  """
84
78
  Reads the checkpoint file for the given name and returns the page and row numbers.
85
79
 
86
80
  Args:
87
- name (str): The name of the checkpoint file.
81
+ filename (str): The name of the checkpoint file.
88
82
 
89
83
  Returns:
90
84
  tuple: A tuple containing the page and row numbers. If the checkpoint file does not exist, returns (None, None).
91
85
  """
92
- checkpoint_file: str = os.path.join(CHECKPOINT_PATH, f".{name}.ctid")
93
- if os.path.exists(checkpoint_file):
94
- with open(checkpoint_file, "r") as fp:
86
+ filepath: str = os.path.join(CHECKPOINT_PATH, f".{filename}.ctid")
87
+ if os.path.exists(filepath):
88
+ with open(filepath, "r") as fp:
95
89
  pairs: str = fp.read().split()[0].split(",")
96
- page = int(pairs[0])
97
- row = int(pairs[1])
90
+ page: int = int(pairs[0])
91
+ row: int = int(pairs[1])
98
92
  return page, row
99
93
  return None, None
100
94
 
@@ -120,7 +114,6 @@ class Task:
120
114
  sync: Sync = Sync(
121
115
  self.doc, verbose=self.verbose, validate=self.validate
122
116
  )
123
- sync.tree.build(sync.nodes)
124
117
  txmin: int = sync.checkpoint
125
118
  txmax: int = sync.txid_current
126
119
  sync.search_client.bulk(
@@ -134,19 +127,19 @@ class Task:
134
127
  @timeit
135
128
  def fetch_tasks(
136
129
  doc: dict,
137
- block_size: Optional[int] = None,
138
- ) -> Generator:
130
+ block_size: t.Optional[int] = None,
131
+ ) -> t.Generator:
139
132
  block_size = block_size or BLOCK_SIZE
140
133
  pages: dict = {}
141
134
  sync: Sync = Sync(doc)
142
- page: Optional[int] = None
143
- row: Optional[int] = None
144
- name: str = re.sub(
135
+ page: t.Optional[int] = None
136
+ row: t.Optional[int] = None
137
+ filename: str = re.sub(
145
138
  "[^0-9a-zA-Z_]+", "", f"{sync.database.lower()}_{sync.index}"
146
139
  )
147
- page, row = read_ctid(name=name)
140
+ page, row = read_ctid(filename)
148
141
  statement: sa.sql.Select = sa.select(
149
- [
142
+ *[
150
143
  sa.literal_column("1").label("x"),
151
144
  sa.literal_column("1").label("y"),
152
145
  sa.column("ctid"),
@@ -213,11 +206,13 @@ def fetch_tasks(
213
206
 
214
207
  @timeit
215
208
  def synchronous(
216
- tasks: Generator, doc: dict, verbose: bool = False, validate: bool = False
209
+ tasks: t.Generator,
210
+ doc: dict,
211
+ verbose: bool = False,
212
+ validate: bool = False,
217
213
  ) -> None:
218
214
  sys.stdout.write("Synchronous\n")
219
215
  sync: Sync = Sync(doc, verbose=verbose, validate=validate)
220
- sync.tree.build(sync.nodes)
221
216
  txmin: int = sync.checkpoint
222
217
  txmax: int = sync.txid_current
223
218
  index: str = sync.index
@@ -231,9 +226,9 @@ def synchronous(
231
226
 
232
227
  @timeit
233
228
  def multithreaded(
234
- tasks: Generator,
229
+ tasks: t.Generator,
235
230
  doc: dict,
236
- nprocs: Optional[int] = None,
231
+ nthreads: t.Optional[int] = None,
237
232
  verbose: bool = False,
238
233
  validate: bool = False,
239
234
  ) -> None:
@@ -250,12 +245,11 @@ def multithreaded(
250
245
  )
251
246
  queue.task_done()
252
247
 
253
- nprocs: int = nprocs or 1
248
+ nthreads: int = nthreads or 1
254
249
  queue: Queue = Queue()
255
250
  sync: Sync = Sync(doc, verbose=verbose, validate=validate)
256
- sync.tree.build(sync.nodes)
257
251
 
258
- for _ in range(nprocs):
252
+ for _ in range(nthreads):
259
253
  thread: Thread = Thread(
260
254
  target=worker,
261
255
  args=(
@@ -274,15 +268,15 @@ def multithreaded(
274
268
 
275
269
  @timeit
276
270
  def multiprocess(
277
- tasks: Generator,
271
+ tasks: t.Generator,
278
272
  doc: dict,
279
- nprocs: Optional[int] = None,
273
+ ncpus: t.Optional[int] = None,
280
274
  verbose: bool = False,
281
275
  validate: bool = False,
282
276
  ) -> None:
283
277
  sys.stdout.write("Multiprocess\n")
284
278
  task: Task = Task(doc, verbose=verbose, validate=validate)
285
- with ProcessPoolExecutor(max_workers=nprocs) as executor:
279
+ with ProcessPoolExecutor(max_workers=ncpus) as executor:
286
280
  try:
287
281
  list(executor.map(task.process, tasks))
288
282
  except Exception as e:
@@ -292,14 +286,14 @@ def multiprocess(
292
286
 
293
287
  @timeit
294
288
  def multithreaded_async(
295
- tasks: Generator,
289
+ tasks: t.Generator,
296
290
  doc: dict,
297
- nprocs: Optional[int] = None,
291
+ nthreads: t.Optional[int] = None,
298
292
  verbose: bool = False,
299
293
  validate: bool = False,
300
294
  ) -> None:
301
295
  sys.stdout.write("Multi-threaded async\n")
302
- executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=nprocs)
296
+ executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=nthreads)
303
297
  event_loop = asyncio.get_event_loop()
304
298
  event_loop.run_until_complete(
305
299
  run_tasks(executor, tasks, doc, verbose=verbose, validate=validate)
@@ -309,14 +303,14 @@ def multithreaded_async(
309
303
 
310
304
  @timeit
311
305
  def multiprocess_async(
312
- tasks: Generator,
306
+ tasks: t.Generator,
313
307
  doc: dict,
314
- nprocs: Optional[int] = None,
308
+ ncpus: t.Optional[int] = None,
315
309
  verbose: bool = False,
316
310
  validate: bool = False,
317
311
  ) -> None:
318
312
  sys.stdout.write("Multi-process async\n")
319
- executor: ProcessPoolExecutor = ProcessPoolExecutor(max_workers=nprocs)
313
+ executor: ProcessPoolExecutor = ProcessPoolExecutor(max_workers=ncpus)
320
314
  event_loop = asyncio.get_event_loop()
321
315
  try:
322
316
  event_loop.run_until_complete(
@@ -328,18 +322,18 @@ def multiprocess_async(
328
322
 
329
323
 
330
324
  async def run_tasks(
331
- executor: Union[ThreadPoolExecutor, ProcessPoolExecutor],
332
- tasks: Generator,
325
+ executor: t.Union[ThreadPoolExecutor, ProcessPoolExecutor],
326
+ tasks: t.Generator,
333
327
  doc: dict,
334
328
  verbose: bool = False,
335
329
  validate: bool = False,
336
330
  ) -> None:
337
- sync: Optional[Sync] = None
331
+ sync: t.Optional[Sync] = None
338
332
  if isinstance(executor, ThreadPoolExecutor):
339
333
  # threads can share a common Sync object
340
334
  sync = Sync(doc, verbose=verbose, validate=validate)
341
335
  event_loop = asyncio.get_event_loop()
342
- completed, pending = await asyncio.wait(
336
+ completed, _ = await asyncio.wait(
343
337
  [
344
338
  event_loop.run_in_executor(
345
339
  executor, run_task, task, sync, doc, verbose, validate
@@ -354,14 +348,13 @@ async def run_tasks(
354
348
 
355
349
  def run_task(
356
350
  task: dict,
357
- sync: Optional[Sync] = None,
358
- doc: Optional[dict] = None,
351
+ sync: t.Optional[Sync] = None,
352
+ doc: t.Optional[dict] = None,
359
353
  verbose: bool = False,
360
354
  validate: bool = False,
361
355
  ) -> int:
362
356
  if sync is None:
363
357
  sync: Sync = Sync(doc, verbose=verbose, validate=validate)
364
- sync.tree.build(sync.nodes)
365
358
  txmin: int = sync.checkpoint
366
359
  txmax: int = sync.txid_current
367
360
  sync.search_client.bulk(
@@ -371,10 +364,10 @@ def run_task(
371
364
  if len(task) > 0:
372
365
  page: int = max(task.keys())
373
366
  row: int = max(task[page])
374
- name: str = re.sub(
367
+ filename: str = re.sub(
375
368
  "[^0-9a-zA-Z_]+", "", f"{sync.database.lower()}_{sync.index}"
376
369
  )
377
- save_ctid(page=page, row=row, name=name)
370
+ save_ctid(page, row, filename)
378
371
 
379
372
  return 1
380
373
 
@@ -426,20 +419,18 @@ def main(config, nprocs, mode, verbose):
426
419
  show_settings()
427
420
  config: str = get_config(config)
428
421
 
429
- for document in config_loader(config):
430
- tasks: Generator = fetch_tasks(document)
422
+ for doc in config_loader(config):
423
+ tasks: t.Generator = fetch_tasks(doc)
431
424
  if mode == "synchronous":
432
- synchronous(tasks, document, verbose=verbose)
425
+ synchronous(tasks, doc, verbose=verbose)
433
426
  elif mode == "multithreaded":
434
- multithreaded(tasks, document, nprocs=nprocs, verbose=verbose)
427
+ multithreaded(tasks, doc, nthreads=nprocs, verbose=verbose)
435
428
  elif mode == "multiprocess":
436
- multiprocess(tasks, document, nprocs=nprocs, verbose=verbose)
429
+ multiprocess(tasks, doc, ncpus=nprocs, verbose=verbose)
437
430
  elif mode == "multithreaded_async":
438
- multithreaded_async(
439
- tasks, document, nprocs=nprocs, verbose=verbose
440
- )
431
+ multithreaded_async(tasks, doc, nthreads=nprocs, verbose=verbose)
441
432
  elif mode == "multiprocess_async":
442
- multiprocess_async(tasks, document, nprocs=nprocs, verbose=verbose)
433
+ multiprocess_async(tasks, doc, ncpus=nprocs, verbose=verbose)
443
434
 
444
435
 
445
436
  if __name__ == "__main__":
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = "Tolu Aina"
4
4
  __email__ = "tolu@pgsync.com"
5
- __version__ = "3.0.0"
5
+ __version__ = "3.2.0"