pgbelt 0.9.2__tar.gz → 0.9.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pgbelt-0.9.2 → pgbelt-0.9.3}/PKG-INFO +1 -1
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/sync.py +9 -3
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/dump.py +16 -2
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/postgres.py +96 -3
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pyproject.toml +2 -2
- {pgbelt-0.9.2 → pgbelt-0.9.3}/LICENSE +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/README.md +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/__init__.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/__init__.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/convenience.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/helpers.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/login.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/preflight.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/schema.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/setup.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/status.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/cmd/teardown.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/config/__init__.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/config/config.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/config/models.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/config/remote.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/main.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/__init__.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/asyncfuncs.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/logs.py +0 -0
- {pgbelt-0.9.2 → pgbelt-0.9.3}/pgbelt/util/pglogical.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pgbelt
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.3
|
|
4
4
|
Summary: A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Author: Varjitt Jeeva
|
|
@@ -12,8 +12,9 @@ from pgbelt.util.dump import dump_source_tables
|
|
|
12
12
|
from pgbelt.util.dump import load_dumped_tables
|
|
13
13
|
from pgbelt.util.logs import get_logger
|
|
14
14
|
from pgbelt.util.postgres import analyze_table_pkeys
|
|
15
|
-
from pgbelt.util.postgres import
|
|
15
|
+
from pgbelt.util.postgres import compare_100_random_rows
|
|
16
16
|
from pgbelt.util.postgres import compare_latest_100_rows
|
|
17
|
+
from pgbelt.util.postgres import compare_tables_without_pkeys
|
|
17
18
|
from pgbelt.util.postgres import dump_sequences
|
|
18
19
|
from pgbelt.util.postgres import load_sequences
|
|
19
20
|
from pgbelt.util.postgres import run_analyze
|
|
@@ -143,10 +144,15 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
|
|
|
143
144
|
try:
|
|
144
145
|
logger = get_logger(conf.db, conf.dc, "sync")
|
|
145
146
|
await gather(
|
|
146
|
-
|
|
147
|
+
compare_100_random_rows(
|
|
148
|
+
src_pool, dst_pool, conf.tables, conf.schema_name, logger
|
|
149
|
+
),
|
|
147
150
|
compare_latest_100_rows(
|
|
148
151
|
src_pool, dst_pool, conf.tables, conf.schema_name, logger
|
|
149
152
|
),
|
|
153
|
+
compare_tables_without_pkeys(
|
|
154
|
+
src_pool, dst_pool, conf.tables, conf.schema_name, logger
|
|
155
|
+
),
|
|
150
156
|
)
|
|
151
157
|
finally:
|
|
152
158
|
await gather(*[p.close() for p in pools])
|
|
@@ -218,7 +224,7 @@ async def sync(
|
|
|
218
224
|
)
|
|
219
225
|
|
|
220
226
|
await gather(
|
|
221
|
-
|
|
227
|
+
compare_100_random_rows(
|
|
222
228
|
src_pool,
|
|
223
229
|
dst_owner_pool,
|
|
224
230
|
conf.tables,
|
|
@@ -90,8 +90,22 @@ async def _dump_table(config: DbupgradeConfig, table: str, logger: Logger) -> No
|
|
|
90
90
|
out = await _execute_subprocess(command, f"dumped {table}", logger)
|
|
91
91
|
content = out.decode("utf-8")
|
|
92
92
|
|
|
93
|
-
# Strip out unwanted lines, stupid PG17
|
|
94
|
-
keywords = [
|
|
93
|
+
# Strip out unwanted lines, stupid PG17
|
|
94
|
+
keywords = [
|
|
95
|
+
"transaction_timeout",
|
|
96
|
+
# "SET statement_timeout", # This one is fine
|
|
97
|
+
# "SET lock_timeout", # This one is fine
|
|
98
|
+
# "SET idle_in_transaction_session_timeout", # This one is fine
|
|
99
|
+
"SET client_encoding",
|
|
100
|
+
"SET standard_conforming_strings",
|
|
101
|
+
"SET check_function_bodies",
|
|
102
|
+
"SET xmloption",
|
|
103
|
+
"SET client_min_messages",
|
|
104
|
+
"SET row_security",
|
|
105
|
+
"pg_catalog.set_config", # Stupid search path, this should not be run.
|
|
106
|
+
"\\restrict",
|
|
107
|
+
"\\unrestrict",
|
|
108
|
+
]
|
|
95
109
|
lines = content.split("\n")
|
|
96
110
|
filtered_lines = [
|
|
97
111
|
line for line in lines if not any(keyword in line for keyword in keywords)
|
|
@@ -214,22 +214,23 @@ async def compare_data(
|
|
|
214
214
|
)
|
|
215
215
|
|
|
216
216
|
|
|
217
|
-
async def
|
|
217
|
+
async def compare_100_random_rows(
|
|
218
218
|
src_pool: Pool, dst_pool: Pool, tables: list[str], schema: str, logger: Logger
|
|
219
219
|
) -> None:
|
|
220
220
|
"""
|
|
221
221
|
Validate data between source and destination databases by doing the following:
|
|
222
222
|
1. Get all tables with primary keys
|
|
223
|
-
2. For each of those tables, select
|
|
223
|
+
2. For each of those tables, select 100 random rows
|
|
224
224
|
3. For each row, ensure the row in the destination is identical
|
|
225
225
|
"""
|
|
226
|
-
logger.info("Comparing 100 rows...")
|
|
226
|
+
logger.info("Comparing 100 random rows...")
|
|
227
227
|
|
|
228
228
|
query = """
|
|
229
229
|
SELECT * FROM
|
|
230
230
|
(
|
|
231
231
|
SELECT *
|
|
232
232
|
FROM {table}
|
|
233
|
+
ORDER BY RANDOM()
|
|
233
234
|
LIMIT 100
|
|
234
235
|
) AS T1
|
|
235
236
|
ORDER BY {order_by_pkeys};
|
|
@@ -259,6 +260,98 @@ async def compare_latest_100_rows(
|
|
|
259
260
|
await compare_data(src_pool, dst_pool, query, tables, schema, logger)
|
|
260
261
|
|
|
261
262
|
|
|
263
|
+
async def compare_tables_without_pkeys(
|
|
264
|
+
src_pool: Pool,
|
|
265
|
+
dst_pool: Pool,
|
|
266
|
+
tables: list[str],
|
|
267
|
+
schema: str,
|
|
268
|
+
logger: Logger,
|
|
269
|
+
) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Validate data for tables without primary keys by:
|
|
272
|
+
1. Getting the list of tables without primary keys
|
|
273
|
+
2. For each table, selecting 100 random rows from source
|
|
274
|
+
3. For each row, verifying it exists in destination by matching all columns
|
|
275
|
+
"""
|
|
276
|
+
logger.info("Comparing tables without primary keys...")
|
|
277
|
+
|
|
278
|
+
_, no_pkeys, _ = await analyze_table_pkeys(src_pool, schema, logger)
|
|
279
|
+
|
|
280
|
+
# Filter by tables list if provided
|
|
281
|
+
if tables:
|
|
282
|
+
no_pkeys = [t for t in no_pkeys if t in tables]
|
|
283
|
+
|
|
284
|
+
if not no_pkeys:
|
|
285
|
+
logger.info("No tables without primary keys to compare.")
|
|
286
|
+
return
|
|
287
|
+
|
|
288
|
+
src_old_extra_float_digits = await src_pool.fetchval("SHOW extra_float_digits;")
|
|
289
|
+
await src_pool.execute("SET extra_float_digits TO 0;")
|
|
290
|
+
|
|
291
|
+
dst_old_extra_float_digits = await dst_pool.fetchval("SHOW extra_float_digits;")
|
|
292
|
+
await dst_pool.execute("SET extra_float_digits TO 0;")
|
|
293
|
+
|
|
294
|
+
for table in no_pkeys:
|
|
295
|
+
full_table_name = f'{schema}."{table}"'
|
|
296
|
+
logger.debug(f"Validating table without primary key: {full_table_name}...")
|
|
297
|
+
|
|
298
|
+
# Select 100 random rows from source
|
|
299
|
+
query = f"""
|
|
300
|
+
SELECT * FROM {full_table_name}
|
|
301
|
+
ORDER BY RANDOM()
|
|
302
|
+
LIMIT 100;
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
src_rows = await src_pool.fetch(query)
|
|
306
|
+
|
|
307
|
+
if len(src_rows) == 0:
|
|
308
|
+
logger.debug(f"Table {full_table_name} is empty in source.")
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
# For each source row, check if it exists in destination
|
|
312
|
+
for src_row in src_rows:
|
|
313
|
+
# Build WHERE clause matching all columns
|
|
314
|
+
where_clauses = []
|
|
315
|
+
for key, value in src_row.items():
|
|
316
|
+
# Handle Decimal NaN values
|
|
317
|
+
if isinstance(value, Decimal) and value.is_nan():
|
|
318
|
+
value = None
|
|
319
|
+
|
|
320
|
+
if value is None:
|
|
321
|
+
where_clauses.append(f'"{key}" IS NULL')
|
|
322
|
+
elif isinstance(value, (int, float, Decimal)):
|
|
323
|
+
where_clauses.append(f'"{key}" = {value}')
|
|
324
|
+
elif isinstance(value, bool):
|
|
325
|
+
where_clauses.append(f'"{key}" = {str(value).upper()}')
|
|
326
|
+
elif isinstance(value, bytes):
|
|
327
|
+
hex_val = value.hex()
|
|
328
|
+
where_clauses.append(f"\"{key}\" = '\\x{hex_val}'")
|
|
329
|
+
else:
|
|
330
|
+
# Escape single quotes in string values
|
|
331
|
+
escaped_val = str(value).replace("'", "''")
|
|
332
|
+
where_clauses.append(f"\"{key}\" = '{escaped_val}'")
|
|
333
|
+
|
|
334
|
+
where_clause = " AND ".join(where_clauses)
|
|
335
|
+
check_query = (
|
|
336
|
+
f"SELECT 1 FROM {full_table_name} WHERE {where_clause} LIMIT 1;"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
dst_result = await dst_pool.fetch(check_query)
|
|
340
|
+
|
|
341
|
+
if len(dst_result) == 0:
|
|
342
|
+
raise AssertionError(
|
|
343
|
+
f"Row from source not found in destination.\n"
|
|
344
|
+
f"Table: {full_table_name}\n"
|
|
345
|
+
f"Source Row: {dict(src_row)}"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
logger.debug(f"Table {full_table_name} validated successfully.")
|
|
349
|
+
|
|
350
|
+
await src_pool.execute(f"SET extra_float_digits TO {src_old_extra_float_digits};")
|
|
351
|
+
await dst_pool.execute(f"SET extra_float_digits TO {dst_old_extra_float_digits};")
|
|
352
|
+
logger.info("Tables without primary keys validation complete!")
|
|
353
|
+
|
|
354
|
+
|
|
262
355
|
async def table_empty(pool: Pool, table: str, schema: str, logger: Logger) -> bool:
|
|
263
356
|
"""
|
|
264
357
|
return true if the table is empty
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pgbelt"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.3"
|
|
4
4
|
description = "A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication."
|
|
5
5
|
authors = ["Varjitt Jeeva <varjitt.jeeva@autodesk.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -23,7 +23,7 @@ pre-commit = "~4.5.1"
|
|
|
23
23
|
flake8 = "^7.3.0"
|
|
24
24
|
pytest-cov = "~6.2.1"
|
|
25
25
|
pytest = "^9.0.2"
|
|
26
|
-
coverage = {extras = ["toml"], version = "^7.
|
|
26
|
+
coverage = {extras = ["toml"], version = "^7.13"}
|
|
27
27
|
safety = "^3.6.1"
|
|
28
28
|
mypy = "^1.17"
|
|
29
29
|
xdoctest = {extras = ["colors"], version = "^1.2.0"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|