PyPI - pgbelt - Versions diffs - 0.9.2__tar.gz → 0.9.3__tar.gz - Mend

pgbelt 0.9.2tar.gz → 0.9.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{pgbelt-0.9.2 → pgbelt-0.9.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pgbelt
-Version: 0.9.2
+Version: 0.9.3
 Summary: A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication.
 License-File: LICENSE
 Author: Varjitt Jeeva

{pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/sync.py RENAMED Viewed

@@ -12,8 +12,9 @@ from pgbelt.util.dump import dump_source_tables
 from pgbelt.util.dump import load_dumped_tables
 from pgbelt.util.logs import get_logger
 from pgbelt.util.postgres import analyze_table_pkeys
-from pgbelt.util.postgres import compare_100_rows
+from pgbelt.util.postgres import compare_100_random_rows
 from pgbelt.util.postgres import compare_latest_100_rows
+from pgbelt.util.postgres import compare_tables_without_pkeys
 from pgbelt.util.postgres import dump_sequences
 from pgbelt.util.postgres import load_sequences
 from pgbelt.util.postgres import run_analyze
@@ -143,10 +144,15 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
     try:
         logger = get_logger(conf.db, conf.dc, "sync")
         await gather(
-            compare_100_rows(src_pool, dst_pool, conf.tables, conf.schema_name, logger),
+            compare_100_random_rows(
+                src_pool, dst_pool, conf.tables, conf.schema_name, logger
+            ),
             compare_latest_100_rows(
                 src_pool, dst_pool, conf.tables, conf.schema_name, logger
             ),
+            compare_tables_without_pkeys(
+                src_pool, dst_pool, conf.tables, conf.schema_name, logger
+            ),
         )
     finally:
         await gather(*[p.close() for p in pools])
@@ -218,7 +224,7 @@ async def sync(
             )
         await gather(
-            compare_100_rows(
+            compare_100_random_rows(
                 src_pool,
                 dst_owner_pool,
                 conf.tables,

{pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/dump.py RENAMED Viewed

@@ -90,8 +90,22 @@ async def _dump_table(config: DbupgradeConfig, table: str, logger: Logger) -> No
     out = await _execute_subprocess(command, f"dumped {table}", logger)
     content = out.decode("utf-8")
-    # Strip out unwanted lines, stupid PG17 adding transaction_timeout lines.
-    keywords = ["transaction_timeout"]
+    # Strip out unwanted lines, stupid PG17
+    keywords = [
+        "transaction_timeout",
+        # "SET statement_timeout", # This one is fine
+        # "SET lock_timeout", # This one is fine
+        # "SET idle_in_transaction_session_timeout", # This one is fine
+        "SET client_encoding",
+        "SET standard_conforming_strings",
+        "SET check_function_bodies",
+        "SET xmloption",
+        "SET client_min_messages",
+        "SET row_security",
+        "pg_catalog.set_config",  # Stupid search path, this should not be run.
+        "\\restrict",
+        "\\unrestrict",
+    ]
     lines = content.split("\n")
     filtered_lines = [
         line for line in lines if not any(keyword in line for keyword in keywords)

{pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/postgres.py RENAMED Viewed

@@ -214,22 +214,23 @@ async def compare_data(
     )
-async def compare_100_rows(
+async def compare_100_random_rows(
     src_pool: Pool, dst_pool: Pool, tables: list[str], schema: str, logger: Logger
 ) -> None:
     """
     Validate data between source and destination databases by doing the following:
     1. Get all tables with primary keys
-    2. For each of those tables, select * limit 100
+    2. For each of those tables, select 100 random rows
     3. For each row, ensure the row in the destination is identical
     """
-    logger.info("Comparing 100 rows...")
+    logger.info("Comparing 100 random rows...")
     query = """
     SELECT * FROM
     (
         SELECT *
         FROM {table}
+        ORDER BY RANDOM()
         LIMIT 100
     ) AS T1
     ORDER BY {order_by_pkeys};
@@ -259,6 +260,98 @@ async def compare_latest_100_rows(
     await compare_data(src_pool, dst_pool, query, tables, schema, logger)
+async def compare_tables_without_pkeys(
+    src_pool: Pool,
+    dst_pool: Pool,
+    tables: list[str],
+    schema: str,
+    logger: Logger,
+) -> None:
+    """
+    Validate data for tables without primary keys by:
+    1. Getting the list of tables without primary keys
+    2. For each table, selecting 100 random rows from source
+    3. For each row, verifying it exists in destination by matching all columns
+    """
+    logger.info("Comparing tables without primary keys...")
+    _, no_pkeys, _ = await analyze_table_pkeys(src_pool, schema, logger)
+    # Filter by tables list if provided
+    if tables:
+        no_pkeys = [t for t in no_pkeys if t in tables]
+    if not no_pkeys:
+        logger.info("No tables without primary keys to compare.")
+        return
+    src_old_extra_float_digits = await src_pool.fetchval("SHOW extra_float_digits;")
+    await src_pool.execute("SET extra_float_digits TO 0;")
+    dst_old_extra_float_digits = await dst_pool.fetchval("SHOW extra_float_digits;")
+    await dst_pool.execute("SET extra_float_digits TO 0;")
+    for table in no_pkeys:
+        full_table_name = f'{schema}."{table}"'
+        logger.debug(f"Validating table without primary key: {full_table_name}...")
+        # Select 100 random rows from source
+        query = f"""
+        SELECT * FROM {full_table_name}
+        ORDER BY RANDOM()
+        LIMIT 100;
+        """
+        src_rows = await src_pool.fetch(query)
+        if len(src_rows) == 0:
+            logger.debug(f"Table {full_table_name} is empty in source.")
+            continue
+        # For each source row, check if it exists in destination
+        for src_row in src_rows:
+            # Build WHERE clause matching all columns
+            where_clauses = []
+            for key, value in src_row.items():
+                # Handle Decimal NaN values
+                if isinstance(value, Decimal) and value.is_nan():
+                    value = None
+                if value is None:
+                    where_clauses.append(f'"{key}" IS NULL')
+                elif isinstance(value, (int, float, Decimal)):
+                    where_clauses.append(f'"{key}" = {value}')
+                elif isinstance(value, bool):
+                    where_clauses.append(f'"{key}" = {str(value).upper()}')
+                elif isinstance(value, bytes):
+                    hex_val = value.hex()
+                    where_clauses.append(f"\"{key}\" = '\\x{hex_val}'")
+                else:
+                    # Escape single quotes in string values
+                    escaped_val = str(value).replace("'", "''")
+                    where_clauses.append(f"\"{key}\" = '{escaped_val}'")
+            where_clause = " AND ".join(where_clauses)
+            check_query = (
+                f"SELECT 1 FROM {full_table_name} WHERE {where_clause} LIMIT 1;"
+            )
+            dst_result = await dst_pool.fetch(check_query)
+            if len(dst_result) == 0:
+                raise AssertionError(
+                    f"Row from source not found in destination.\n"
+                    f"Table: {full_table_name}\n"
+                    f"Source Row: {dict(src_row)}"
+                )
+        logger.debug(f"Table {full_table_name} validated successfully.")
+    await src_pool.execute(f"SET extra_float_digits TO {src_old_extra_float_digits};")
+    await dst_pool.execute(f"SET extra_float_digits TO {dst_old_extra_float_digits};")
+    logger.info("Tables without primary keys validation complete!")
 async def table_empty(pool: Pool, table: str, schema: str, logger: Logger) -> bool:
     """
     return true if the table is empty

{pgbelt-0.9.2 → pgbelt-0.9.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pgbelt"
-version = "0.9.2"
+version = "0.9.3"
 description = "A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication."
 authors = ["Varjitt Jeeva <varjitt.jeeva@autodesk.com>"]
 readme = "README.md"
@@ -23,7 +23,7 @@ pre-commit = "~4.5.1"
 flake8 = "^7.3.0"
 pytest-cov = "~6.2.1"
 pytest = "^9.0.2"
-coverage = {extras = ["toml"], version = "^7.10"}
+coverage = {extras = ["toml"], version = "^7.13"}
 safety = "^3.6.1"
 mypy = "^1.17"
 xdoctest = {extras = ["colors"], version = "^1.2.0"}