PyPI - pgsync - Versions diffs - 2.5.0__tar.gz → 3.1.0__tar.gz - Mend

pgsync 2.5.0tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

pgsync-3.1.0/LICENSE +21 -0
{pgsync-2.5.0 → pgsync-3.1.0}/PKG-INFO +36 -6
{pgsync-2.5.0 → pgsync-3.1.0}/README.md +5 -5
{pgsync-2.5.0 → pgsync-3.1.0}/README.rst +1 -1
{pgsync-2.5.0 → pgsync-3.1.0}/bin/bootstrap +8 -2
{pgsync-2.5.0 → pgsync-3.1.0}/bin/parallel_sync +104 -97
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/__init__.py +1 -1
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/base.py +202 -159
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/constants.py +14 -1
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/helper.py +18 -8
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/node.py +62 -48
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/plugin.py +16 -5
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/querybuilder.py +28 -46
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/redisqueue.py +5 -5
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/search_client.py +108 -76
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/settings.py +26 -6
pgsync-3.1.0/pgsync/singleton.py +39 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/sync.py +137 -100
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/transform.py +20 -9
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/trigger.py +7 -1
pgsync-3.1.0/pgsync/urls.py +145 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/utils.py +77 -26
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/view.py +215 -44
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync.egg-info/PKG-INFO +36 -6
pgsync-3.1.0/pgsync.egg-info/requires.txt +29 -0
pgsync-3.1.0/pyproject.toml +3 -0
{pgsync-2.5.0 → pgsync-3.1.0}/setup.cfg +1 -1
{pgsync-2.5.0 → pgsync-3.1.0}/setup.py +6 -5
{pgsync-2.5.0 → pgsync-3.1.0}/tests/conftest.py +133 -82
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_base.py +96 -73
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_constants.py +1 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_node.py +24 -21
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_redisqueue.py +16 -16
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_search_client.py +5 -9
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_settings.py +1 -1
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_sync.py +23 -26
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_sync_nested_children.py +39 -44
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_sync_root.py +35 -55
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_sync_single_child_fk_on_child.py +35 -48
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_sync_single_child_fk_on_parent.py +35 -48
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_trigger.py +3 -2
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_unique_behaviour.py +2 -10
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_utils.py +4 -4
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_view.py +83 -55
{pgsync-2.5.0 → pgsync-3.1.0}/tests/testing_utils.py +5 -3
pgsync-2.5.0/LICENSE +0 -165
pgsync-2.5.0/pgsync/singleton.py +0 -20
pgsync-2.5.0/pgsync/urls.py +0 -99
pgsync-2.5.0/pgsync.egg-info/requires.txt +0 -43
pgsync-2.5.0/pyproject.toml +0 -3
{pgsync-2.5.0 → pgsync-3.1.0}/AUTHORS.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/CONTRIBUTING.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/HISTORY.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/MANIFEST.in +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/bin/pgsync +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/Makefile +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/authors.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/changelog.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/conf.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/contributing.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/history.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/index.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/installation.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/logo.png +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/make.bat +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/readme.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/docs/usage.rst +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/exc.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync.egg-info/SOURCES.txt +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync.egg-info/dependency_links.txt +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync.egg-info/not-zip-safe +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/pgsync.egg-info/top_level.txt +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/__init__.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/fixtures/schema.json +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_env_vars.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_helper.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_log_handlers.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_query_builder.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_transform.py +0 -0
{pgsync-2.5.0 → pgsync-3.1.0}/tests/test_urls.py +0 -0

pgsync-3.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Tolu Aina
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

{pgsync-2.5.0 → pgsync-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.1
 Name: pgsync
-Version: 2.5.0
+Version: 3.1.0
 Summary: Postgres to Elasticsearch/OpenSearch sync
 Home-page: https://github.com/toluaina/pgsync
 Author: Tolu Aina
 Author-email: tolu@pgsync.com
 Maintainer: Tolu Aina
 Maintainer-email: tolu@pgsync.com
-License: LGPLv3
+License: MIT
 Project-URL: Bug Reports, https://github.com/toluaina/pgsync/issues
 Project-URL: Funding, https://github.com/sponsors/toluaina
 Project-URL: Source, https://github.com/toluaina/pgsync
@@ -17,18 +17,48 @@ Keywords: pgsync,elasticsearch,opensearch,postgres,change data capture
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Natural Language :: English
-Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
-Requires-Python: >=3.7.0
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: AUTHORS.rst
+Requires-Dist: async-timeout==4.0.3
+Requires-Dist: boto3==1.34.11
+Requires-Dist: botocore==1.34.11
+Requires-Dist: certifi==2023.11.17
+Requires-Dist: charset-normalizer==3.3.2
+Requires-Dist: click==8.1.7
+Requires-Dist: elastic-transport==8.11.0
+Requires-Dist: elasticsearch==8.11.1
+Requires-Dist: elasticsearch-dsl==8.11.0
+Requires-Dist: environs==10.0.0
+Requires-Dist: greenlet==3.0.3
+Requires-Dist: idna==3.6
+Requires-Dist: jmespath==1.0.1
+Requires-Dist: marshmallow==3.20.1
+Requires-Dist: opensearch-dsl==2.1.0
+Requires-Dist: opensearch-py==2.4.2
+Requires-Dist: packaging==23.2
+Requires-Dist: psycopg2-binary==2.9.9
+Requires-Dist: python-dateutil==2.8.2
+Requires-Dist: python-dotenv==1.0.0
+Requires-Dist: redis==5.0.1
+Requires-Dist: requests==2.31.0
+Requires-Dist: requests-aws4auth==1.2.3
+Requires-Dist: s3transfer==0.10.0
+Requires-Dist: six==1.16.0
+Requires-Dist: sqlalchemy==2.0.25
+Requires-Dist: sqlparse==0.4.4
+Requires-Dist: typing-extensions==4.9.0
+Requires-Dist: urllib3==1.26.18
 # PostgreSQL to Elasticsearch/OpenSearch sync
@@ -40,7 +70,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
 ### Requirements
-- [Python](https://www.python.org) 3.7+
+- [Python](https://www.python.org) 3.8+
 - [Postgres](https://www.postgresql.org) 9.6+
 - [Redis](https://redis.io) 3.1.0
 - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+

{pgsync-2.5.0 → pgsync-3.1.0}/README.md RENAMED Viewed

@@ -66,7 +66,7 @@ the search capabilities of [Elasticsearch](https://www.elastic.co/products/elast
 #### How it works
-PGSync is written in Python (supporting version 3.7 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQlAlchemy](https://www.sqlalchemy.org).
+PGSync is written in Python (supporting version 3.8 onwards) and the stack is composed of: [Redis](https://redis.io), [Elasticsearch](https://www.elastic.co/products/elastic-stack)/[OpenSearch](https://opensearch.org/), [Postgres](https://www.postgresql.org), and [SQlAlchemy](https://www.sqlalchemy.org).
 PGSync leverages the [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) feature of [Postgres](https://www.postgresql.org) (introduced in PostgreSQL 9.4) to capture a continuous stream of change events.
 This feature needs to be enabled in your [Postgres](https://www.postgresql.org) configuration file by setting in the postgresql.conf file:
@@ -152,7 +152,7 @@ Key features of PGSync are:
 #### Requirements
-- [Python](https://www.python.org) 3.7+
+- [Python](https://www.python.org) 3.8+
 - [Postgres](https://www.postgresql.org) 9.6+
 - [Redis](https://redis.io) 3.1.0
 - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+
@@ -305,8 +305,8 @@ Contributions are very welcome! Check out the [Contribution](CONTRIBUTING.rst) G
 #### License
-This code is released under the [GNU Lesser General Public License](https://www.gnu.org/licenses/gpl-3.0.html), version 3.0 (LGPL-3.0).
+This project is licensed under the terms of the [MIT](https://opensource.org/license/mit/) license.
 Please see [LICENSE](LICENSE) for more details.
-You should have received a copy of the GNU Lesser General Public License along with PGSync.
-If not, see https://www.gnu.org/licenses/.
+You should have received a copy of the MIT License along with PGSync.
+If not, see https://opensource.org/license/mit/.

{pgsync-2.5.0 → pgsync-3.1.0}/README.rst RENAMED Viewed

@@ -8,7 +8,7 @@ expose structured denormalized documents in [Elasticsearch](https://www.elastic.
 ### Requirements
-- [Python](https://www.python.org) 3.7+
+- [Python](https://www.python.org) 3.8+
 - [Postgres](https://www.postgresql.org) 9.6+
 - [Redis](https://redis.io) 3.1.0
 - [Elasticsearch](https://www.elastic.co/products/elastic-stack) 6.3.1+ or [OpenSearch](https://opensearch.org/) 1.3.7+

{pgsync-2.5.0 → pgsync-3.1.0}/bin/bootstrap RENAMED Viewed

@@ -54,9 +54,15 @@ def main(teardown, config, user, password, host, port, verbose):
     show_settings(config)
-    for document in config_loader(config):
+    validate: bool = False if teardown else True
+    for doc in config_loader(config):
         sync: Sync = Sync(
-            document, verbose=verbose, repl_slots=False, **kwargs
+            doc,
+            verbose=verbose,
+            validate=validate,
+            repl_slots=False,
+            **kwargs,
         )
         if teardown:
             sync.teardown()

{pgsync-2.5.0 → pgsync-3.1.0}/bin/parallel_sync RENAMED Viewed

@@ -1,44 +1,43 @@
 #!/usr/bin/env python
 """
-Parallel sync is an experimental feature that leverages the available
-CPU's/Threads to increase throughput.
-This is can be useful for environments that have a high network latency.
-In this scenario, your PG database, Elasticsearch/OpenSearch, and PGSync
-servers are on different networks with a delay between request/response time.
-The main bottleneck, in this case, is usually the roundtrip of the database
-query.
-Even with server-side cursors, we are still only able to fetch
-a limited number of records at a time from the cursor.
-The delay in the next cursor fetch can slow down the overall sync
-considerably.
-The solution here is to perform an initial fast/parallel sync
-to populate Elasticsearch/OpenSearch in a single iteration.
-When this is complete, we can then continue to run the normal `pgsync`
-as a daemon.
-This approach uses the Tuple identifier record of the table columns.
-Each table contains a system column - "ctid" of type "tid" that
-identifies the page record and row number in each block.
-We can use this to paginate the sync process.
-Pagination here technically implies that we are splitting each paged record
-between CPU's/Threads.
-This allows us to perform Elasticserch/OpenSearch bulk inserts in parallel.
-The "ctid" is a tuple of (page, row-number) e.g (1, 5) that identifies the
-row in a disk page.
-This method allows us to fetch all paged row records upfront and split them
-into work units amongst the workers(threads/cpus).
-Each chunk of work is defined by the BLOCK_SIZE and corresponds to the number
-of root node records each worker needs to process.
-The worker's query for each chunk of work filtering by the page number
-and row numbers.
+Parallel sync is an innovative, experimental feature designed to optimize
+throughput by utilizing available CPUs/threads, particularly beneficial
+in environments experiencing high network latency.
+Scenario & Challenge:
+In instances where your PG database, Elasticsearch/OpenSearch, and PGSync
+servers operate on divergent networks, a delay in request/response time is
+noticeable. The primary constraint emerges from the database query's roundtrip,
+which even server-side cursors can address only to a limited extent by fetching
+a certain number of records at a time. The consequent delay in fetching the
+next cursor significantly hampers the overall synchronization speed.
+Solution:
+To mitigate this, the strategy is to conduct an initial fast/parallel sync,
+thereby populating Elasticsearch/OpenSearch in a single iteration.
+Post this, the regular pgsync can continue running as a daemon.
+Approach and Technical Implementation:
+The approach centers around utilizing the Tuple identifier record of the table
+columns. Every table incorporates a system column – "ctid" of type "tid,"
+which helps identify the page record and the row number in each block.
+This element facilitates the pagination of the sync process.
+Technically, pagination implies dividing each paged record amongst the
+available CPUs/threads. This division enables the parallel execution of
+Elasticsearch/OpenSearch bulk inserts. The "ctid" serves as a tuple
+(for instance, (1, 5)), pinpointing the row in a disk page.
+By leveraging this method, all paged row records are retrieved upfront and
+allocated as work units across the worker threads/CPUs.
+Each work unit, defined by the BLOCK_SIZE, denotes the number of root node
+records assigned for each worker to process.
+Subsequently, the workers execute queries for each assigned chunk of work,
+filtered based on the page number and row numbers.
+This systematic and parallel approach optimizes the synchronization process,
+especially in environments challenged by network latency.
 """
 import asyncio
@@ -46,39 +45,50 @@ import multiprocessing
 import os
 import re
 import sys
+import typing as t
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass
 from queue import Queue
 from threading import Thread
-from typing import Generator, Optional, Union
 import click
 import sqlalchemy as sa
 from pgsync.settings import BLOCK_SIZE, CHECKPOINT_PATH
 from pgsync.sync import Sync
-from pgsync.utils import (
-    compiled_query,
-    config_loader,
-    get_config,
-    show_settings,
-    timeit,
-)
+from pgsync.utils import config_loader, get_config, show_settings, timeit
+def save_ctid(page: int, row: int, filename: str) -> None:
+    """
+    Save the checkpoint for a given page and row in a file with the given name.
-def save_ctid(page: int, row: int, name: str) -> None:
-    checkpoint_file: str = os.path.join(CHECKPOINT_PATH, f".{name}.ctid")
-    with open(checkpoint_file, "w+") as fp:
+    Args:
+        page (int): The page number to save.
+        row (int): The row number to save.
+        filename (str): The name of the file to save the checkpoint in.
+    """
+    filepath: str = os.path.join(CHECKPOINT_PATH, f".{filename}.ctid")
+    with open(filepath, "w+") as fp:
         fp.write(f"{page},{row}\n")
-def read_ctid(name: str) -> None:
-    checkpoint_file: str = os.path.join(CHECKPOINT_PATH, f".{name}.ctid")
-    if os.path.exists(checkpoint_file):
-        with open(checkpoint_file, "r") as fp:
+def read_ctid(filename: str) -> t.Tuple[t.Optional[int], t.Optional[int]]:
+    """
+    Reads the checkpoint file for the given name and returns the page and row numbers.
+    Args:
+        filename (str): The name of the checkpoint file.
+    Returns:
+        tuple: A tuple containing the page and row numbers. If the checkpoint file does not exist, returns (None, None).
+    """
+    filepath: str = os.path.join(CHECKPOINT_PATH, f".{filename}.ctid")
+    if os.path.exists(filepath):
+        with open(filepath, "r") as fp:
             pairs: str = fp.read().split()[0].split(",")
-            page = int(pairs[0])
-            row = int(pairs[1])
+            page: int = int(pairs[0])
+            row: int = int(pairs[1])
             return page, row
     return None, None
@@ -104,7 +114,6 @@ class Task:
         sync: Sync = Sync(
             self.doc, verbose=self.verbose, validate=self.validate
         )
-        sync.tree.build(sync.nodes)
         txmin: int = sync.checkpoint
         txmax: int = sync.txid_current
         sync.search_client.bulk(
@@ -118,19 +127,19 @@ class Task:
 @timeit
 def fetch_tasks(
     doc: dict,
-    block_size: Optional[int] = None,
-) -> Generator:
+    block_size: t.Optional[int] = None,
+) -> t.Generator:
     block_size = block_size or BLOCK_SIZE
     pages: dict = {}
     sync: Sync = Sync(doc)
-    page: Optional[int] = None
-    row: Optional[int] = None
-    name: str = re.sub(
+    page: t.Optional[int] = None
+    row: t.Optional[int] = None
+    filename: str = re.sub(
         "[^0-9a-zA-Z_]+", "", f"{sync.database.lower()}_{sync.index}"
     )
-    page, row = read_ctid(name=name)
+    page, row = read_ctid(filename)
     statement: sa.sql.Select = sa.select(
-        [
+        *[
             sa.literal_column("1").label("x"),
             sa.literal_column("1").label("y"),
             sa.column("ctid"),
@@ -197,11 +206,13 @@ def fetch_tasks(
 @timeit
 def synchronous(
-    tasks: Generator, doc: dict, verbose: bool = False, validate: bool = False
+    tasks: t.Generator,
+    doc: dict,
+    verbose: bool = False,
+    validate: bool = False,
 ) -> None:
     sys.stdout.write("Synchronous\n")
     sync: Sync = Sync(doc, verbose=verbose, validate=validate)
-    sync.tree.build(sync.nodes)
     txmin: int = sync.checkpoint
     txmax: int = sync.txid_current
     index: str = sync.index
@@ -215,9 +226,9 @@ def synchronous(
 @timeit
 def multithreaded(
-    tasks: Generator,
+    tasks: t.Generator,
     doc: dict,
-    nprocs: Optional[int] = None,
+    nthreads: t.Optional[int] = None,
     verbose: bool = False,
     validate: bool = False,
 ) -> None:
@@ -234,12 +245,11 @@ def multithreaded(
             )
             queue.task_done()
-    nprocs: int = nprocs or 1
+    nthreads: int = nthreads or 1
     queue: Queue = Queue()
     sync: Sync = Sync(doc, verbose=verbose, validate=validate)
-    sync.tree.build(sync.nodes)
-    for _ in range(nprocs):
+    for _ in range(nthreads):
         thread: Thread = Thread(
             target=worker,
             args=(
@@ -258,15 +268,15 @@ def multithreaded(
 @timeit
 def multiprocess(
-    tasks: Generator,
+    tasks: t.Generator,
     doc: dict,
-    nprocs: Optional[int] = None,
+    ncpus: t.Optional[int] = None,
     verbose: bool = False,
     validate: bool = False,
 ) -> None:
     sys.stdout.write("Multiprocess\n")
     task: Task = Task(doc, verbose=verbose, validate=validate)
-    with ProcessPoolExecutor(max_workers=nprocs) as executor:
+    with ProcessPoolExecutor(max_workers=ncpus) as executor:
         try:
             list(executor.map(task.process, tasks))
         except Exception as e:
@@ -276,14 +286,14 @@ def multiprocess(
 @timeit
 def multithreaded_async(
-    tasks: Generator,
+    tasks: t.Generator,
     doc: dict,
-    nprocs: Optional[int] = None,
+    nthreads: t.Optional[int] = None,
     verbose: bool = False,
     validate: bool = False,
 ) -> None:
     sys.stdout.write("Multi-threaded async\n")
-    executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=nprocs)
+    executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=nthreads)
     event_loop = asyncio.get_event_loop()
     event_loop.run_until_complete(
         run_tasks(executor, tasks, doc, verbose=verbose, validate=validate)
@@ -293,14 +303,14 @@ def multithreaded_async(
 @timeit
 def multiprocess_async(
-    tasks: Generator,
+    tasks: t.Generator,
     doc: dict,
-    nprocs: Optional[int] = None,
+    ncpus: t.Optional[int] = None,
     verbose: bool = False,
     validate: bool = False,
 ) -> None:
     sys.stdout.write("Multi-process async\n")
-    executor: ProcessPoolExecutor = ProcessPoolExecutor(max_workers=nprocs)
+    executor: ProcessPoolExecutor = ProcessPoolExecutor(max_workers=ncpus)
     event_loop = asyncio.get_event_loop()
     try:
         event_loop.run_until_complete(
@@ -312,18 +322,18 @@ def multiprocess_async(
 async def run_tasks(
-    executor: Union[ThreadPoolExecutor, ProcessPoolExecutor],
-    tasks: Generator,
+    executor: t.Union[ThreadPoolExecutor, ProcessPoolExecutor],
+    tasks: t.Generator,
     doc: dict,
     verbose: bool = False,
     validate: bool = False,
 ) -> None:
-    sync: Optional[Sync] = None
+    sync: t.Optional[Sync] = None
     if isinstance(executor, ThreadPoolExecutor):
         # threads can share a common Sync object
         sync = Sync(doc, verbose=verbose, validate=validate)
     event_loop = asyncio.get_event_loop()
-    completed, pending = await asyncio.wait(
+    completed, _ = await asyncio.wait(
         [
             event_loop.run_in_executor(
                 executor, run_task, task, sync, doc, verbose, validate
@@ -338,14 +348,13 @@ async def run_tasks(
 def run_task(
     task: dict,
-    sync: Optional[Sync] = None,
-    doc: Optional[dict] = None,
+    sync: t.Optional[Sync] = None,
+    doc: t.Optional[dict] = None,
     verbose: bool = False,
     validate: bool = False,
 ) -> int:
     if sync is None:
         sync: Sync = Sync(doc, verbose=verbose, validate=validate)
-    sync.tree.build(sync.nodes)
     txmin: int = sync.checkpoint
     txmax: int = sync.txid_current
     sync.search_client.bulk(
@@ -355,10 +364,10 @@ def run_task(
     if len(task) > 0:
         page: int = max(task.keys())
         row: int = max(task[page])
-        name: str = re.sub(
+        filename: str = re.sub(
             "[^0-9a-zA-Z_]+", "", f"{sync.database.lower()}_{sync.index}"
         )
-        save_ctid(page=page, row=row, name=name)
+        save_ctid(page, row, filename)
     return 1
@@ -410,20 +419,18 @@ def main(config, nprocs, mode, verbose):
     show_settings()
     config: str = get_config(config)
-    for document in config_loader(config):
-        tasks: Generator = fetch_tasks(document)
+    for doc in config_loader(config):
+        tasks: t.Generator = fetch_tasks(doc)
         if mode == "synchronous":
-            synchronous(tasks, document, verbose=verbose)
+            synchronous(tasks, doc, verbose=verbose)
         elif mode == "multithreaded":
-            multithreaded(tasks, document, nprocs=nprocs, verbose=verbose)
+            multithreaded(tasks, doc, nthreads=nprocs, verbose=verbose)
         elif mode == "multiprocess":
-            multiprocess(tasks, document, nprocs=nprocs, verbose=verbose)
+            multiprocess(tasks, doc, ncpus=nprocs, verbose=verbose)
         elif mode == "multithreaded_async":
-            multithreaded_async(
-                tasks, document, nprocs=nprocs, verbose=verbose
-            )
+            multithreaded_async(tasks, doc, nthreads=nprocs, verbose=verbose)
         elif mode == "multiprocess_async":
-            multiprocess_async(tasks, document, nprocs=nprocs, verbose=verbose)
+            multiprocess_async(tasks, doc, ncpus=nprocs, verbose=verbose)
 if __name__ == "__main__":

{pgsync-2.5.0 → pgsync-3.1.0}/pgsync/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@
 __author__ = "Tolu Aina"
 __email__ = "tolu@pgsync.com"
-__version__ = "2.5.0"
+__version__ = "3.1.0"

pgsync 2.5.0__tar.gz → 3.1.0__tar.gz

pgsync 2.5.0tar.gz → 3.1.0tar.gz