pgbelt 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pgbelt/cmd/sync.py CHANGED
@@ -12,8 +12,9 @@ from pgbelt.util.dump import dump_source_tables
12
12
  from pgbelt.util.dump import load_dumped_tables
13
13
  from pgbelt.util.logs import get_logger
14
14
  from pgbelt.util.postgres import analyze_table_pkeys
15
- from pgbelt.util.postgres import compare_100_rows
15
+ from pgbelt.util.postgres import compare_100_random_rows
16
16
  from pgbelt.util.postgres import compare_latest_100_rows
17
+ from pgbelt.util.postgres import compare_tables_without_pkeys
17
18
  from pgbelt.util.postgres import dump_sequences
18
19
  from pgbelt.util.postgres import load_sequences
19
20
  from pgbelt.util.postgres import run_analyze
@@ -143,10 +144,15 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
143
144
  try:
144
145
  logger = get_logger(conf.db, conf.dc, "sync")
145
146
  await gather(
146
- compare_100_rows(src_pool, dst_pool, conf.tables, conf.schema_name, logger),
147
+ compare_100_random_rows(
148
+ src_pool, dst_pool, conf.tables, conf.schema_name, logger
149
+ ),
147
150
  compare_latest_100_rows(
148
151
  src_pool, dst_pool, conf.tables, conf.schema_name, logger
149
152
  ),
153
+ compare_tables_without_pkeys(
154
+ src_pool, dst_pool, conf.tables, conf.schema_name, logger
155
+ ),
150
156
  )
151
157
  finally:
152
158
  await gather(*[p.close() for p in pools])
@@ -218,7 +224,7 @@ async def sync(
218
224
  )
219
225
 
220
226
  await gather(
221
- compare_100_rows(
227
+ compare_100_random_rows(
222
228
  src_pool,
223
229
  dst_owner_pool,
224
230
  conf.tables,
pgbelt/util/dump.py CHANGED
@@ -90,8 +90,22 @@ async def _dump_table(config: DbupgradeConfig, table: str, logger: Logger) -> No
90
90
  out = await _execute_subprocess(command, f"dumped {table}", logger)
91
91
  content = out.decode("utf-8")
92
92
 
93
- # Strip out unwanted lines, stupid PG17 adding transaction_timeout lines.
94
- keywords = ["transaction_timeout"]
93
+ # Strip out unwanted lines, stupid PG17
94
+ keywords = [
95
+ "transaction_timeout",
96
+ # "SET statement_timeout", # This one is fine
97
+ # "SET lock_timeout", # This one is fine
98
+ # "SET idle_in_transaction_session_timeout", # This one is fine
99
+ "SET client_encoding",
100
+ "SET standard_conforming_strings",
101
+ "SET check_function_bodies",
102
+ "SET xmloption",
103
+ "SET client_min_messages",
104
+ "SET row_security",
105
+ "pg_catalog.set_config", # Stupid search path, this should not be run.
106
+ "\\restrict",
107
+ "\\unrestrict",
108
+ ]
95
109
  lines = content.split("\n")
96
110
  filtered_lines = [
97
111
  line for line in lines if not any(keyword in line for keyword in keywords)
pgbelt/util/postgres.py CHANGED
@@ -214,22 +214,23 @@ async def compare_data(
214
214
  )
215
215
 
216
216
 
217
- async def compare_100_rows(
217
+ async def compare_100_random_rows(
218
218
  src_pool: Pool, dst_pool: Pool, tables: list[str], schema: str, logger: Logger
219
219
  ) -> None:
220
220
  """
221
221
  Validate data between source and destination databases by doing the following:
222
222
  1. Get all tables with primary keys
223
- 2. For each of those tables, select * limit 100
223
+ 2. For each of those tables, select 100 random rows
224
224
  3. For each row, ensure the row in the destination is identical
225
225
  """
226
- logger.info("Comparing 100 rows...")
226
+ logger.info("Comparing 100 random rows...")
227
227
 
228
228
  query = """
229
229
  SELECT * FROM
230
230
  (
231
231
  SELECT *
232
232
  FROM {table}
233
+ ORDER BY RANDOM()
233
234
  LIMIT 100
234
235
  ) AS T1
235
236
  ORDER BY {order_by_pkeys};
@@ -259,6 +260,113 @@ async def compare_latest_100_rows(
259
260
  await compare_data(src_pool, dst_pool, query, tables, schema, logger)
260
261
 
261
262
 
263
+ async def compare_tables_without_pkeys(
264
+ src_pool: Pool,
265
+ dst_pool: Pool,
266
+ tables: list[str],
267
+ schema: str,
268
+ logger: Logger,
269
+ ) -> None:
270
+ """
271
+ Validate data for tables without primary keys by:
272
+ 1. Getting the list of tables without primary keys
273
+ 2. For each table, selecting 100 random rows from source
274
+ 3. For each row, verifying it exists in destination by matching all columns
275
+ """
276
+ logger.info("Comparing tables without primary keys...")
277
+
278
+ _, no_pkeys, _ = await analyze_table_pkeys(src_pool, schema, logger)
279
+
280
+ # Filter by tables list if provided
281
+ if tables:
282
+ no_pkeys = [t for t in no_pkeys if t in tables]
283
+
284
+ if not no_pkeys:
285
+ logger.info("No tables without primary keys to compare.")
286
+ return
287
+
288
+ src_old_extra_float_digits = await src_pool.fetchval("SHOW extra_float_digits;")
289
+ await src_pool.execute("SET extra_float_digits TO 0;")
290
+
291
+ dst_old_extra_float_digits = await dst_pool.fetchval("SHOW extra_float_digits;")
292
+ await dst_pool.execute("SET extra_float_digits TO 0;")
293
+
294
+ for table in no_pkeys:
295
+ full_table_name = f'{schema}."{table}"'
296
+ logger.debug(f"Validating table without primary key: {full_table_name}...")
297
+
298
+ # Select 100 random rows from source
299
+ query = f"""
300
+ SELECT * FROM {full_table_name}
301
+ ORDER BY RANDOM()
302
+ LIMIT 100;
303
+ """
304
+
305
+ src_rows = await src_pool.fetch(query)
306
+
307
+ if len(src_rows) == 0:
308
+ logger.debug(f"Table {full_table_name} is empty in source.")
309
+ continue
310
+
311
+ # For each source row, check if it exists in destination
312
+ for src_row in src_rows:
313
+ # Build WHERE clause matching all columns
314
+ where_clauses = []
315
+ for key, value in src_row.items():
316
+ # Handle Decimal NaN values
317
+ if isinstance(value, Decimal) and value.is_nan():
318
+ value = None
319
+
320
+ if value is None:
321
+ where_clauses.append(f'"{key}" IS NULL')
322
+ elif isinstance(value, (int, float, Decimal)):
323
+ where_clauses.append(f'"{key}" = {value}')
324
+ elif isinstance(value, bool):
325
+ where_clauses.append(f'"{key}" = {str(value).upper()}')
326
+ elif isinstance(value, bytes):
327
+ hex_val = value.hex()
328
+ where_clauses.append(f"\"{key}\" = '\\x{hex_val}'")
329
+ elif isinstance(value, list):
330
+ # Handle PostgreSQL arrays - format as '{val1, val2, ...}'
331
+ # Need this and not in the PK comparison because these rows are compared by row here only, not by row there also broken by column.
332
+ escaped_elements = []
333
+ for elem in value:
334
+ if elem is None:
335
+ escaped_elements.append("NULL")
336
+ else:
337
+ # Escape double quotes and backslashes in array elements
338
+ escaped_elem = (
339
+ str(elem).replace("\\", "\\\\").replace('"', '\\"')
340
+ )
341
+ escaped_elements.append(f'"{escaped_elem}"')
342
+ array_literal = "{" + ",".join(escaped_elements) + "}"
343
+ where_clauses.append(f"\"{key}\" = '{array_literal}'")
344
+ else:
345
+ # Escape single quotes in string values
346
+ escaped_val = str(value).replace("'", "''")
347
+ where_clauses.append(f"\"{key}\" = '{escaped_val}'")
348
+
349
+ where_clause = " AND ".join(where_clauses)
350
+ check_query = (
351
+ f"SELECT 1 FROM {full_table_name} WHERE {where_clause} LIMIT 1;"
352
+ )
353
+
354
+ dst_result = await dst_pool.fetch(check_query)
355
+
356
+ if len(dst_result) == 0:
357
+ raise AssertionError(
358
+ f"Row from source not found in destination.\n"
359
+ f"Table: {full_table_name}\n"
360
+ f"Source Row: {dict(src_row)}"
361
+ )
362
+
363
+ logger.debug(f"Table {full_table_name} validated successfully.")
364
+
365
+ await src_pool.execute(f"SET extra_float_digits TO {src_old_extra_float_digits};")
366
+ await dst_pool.execute(f"SET extra_float_digits TO {dst_old_extra_float_digits};")
367
+ logger.info("Tables without primary keys validation complete!")
368
+
369
+
262
370
  async def table_empty(pool: Pool, table: str, schema: str, logger: Logger) -> bool:
263
371
  """
264
372
  return true if the table is empty
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pgbelt
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication.
5
5
  License-File: LICENSE
6
6
  Author: Varjitt Jeeva
@@ -7,7 +7,7 @@ pgbelt/cmd/preflight.py,sha256=GH2IE-yb1HvbpwSwm-jJG1dnRO9CXfSsTH5OqYEVyv4,21557
7
7
  pgbelt/cmd/schema.py,sha256=OuxJdUILxlNCvaBV71Si8035pX3LVwHm5Urt0vftO8E,4389
8
8
  pgbelt/cmd/setup.py,sha256=Jp5sqT9_whoVBiOzAlOzX1ubtXQADYBkBrJldch_fKk,6627
9
9
  pgbelt/cmd/status.py,sha256=8K1c2OMZ3uHNmEh-5a2a0fhTDmCU0RohzgjjVfXoKGo,5385
10
- pgbelt/cmd/sync.py,sha256=MrrBqCtsBqhL4nyzp78gKYp90xXxlzuHcqWQ5Nt6_hs,8156
10
+ pgbelt/cmd/sync.py,sha256=iLFrVNx5_rmhlSbLOMuPTgNmPhIdVx3DBT-QKjcNIpc,8400
11
11
  pgbelt/cmd/teardown.py,sha256=Nl37vpugxO7QHO0tKpTfZDV0KtsSm0SbWUbzb3k2N-0,3545
12
12
  pgbelt/config/__init__.py,sha256=SXok1aZcpMYJpX_hk5cuKO33CJ5s8IESkswNN9KsVSo,35
13
13
  pgbelt/config/config.py,sha256=Kw2H-G1Evfj0TXIbh3k06gE72dZEp_wXWJ2Icq_T54c,3817
@@ -16,12 +16,12 @@ pgbelt/config/remote.py,sha256=D9bOekVfMU1xX2Wy0OiJwSXetxJUdt9Tn5Fukwn9rnE,5307
16
16
  pgbelt/main.py,sha256=YiagBiGt8pbNlukkRxROXnQX1Tx6ax7c6riuHRCrPYU,186
17
17
  pgbelt/util/__init__.py,sha256=-6KkvVMz-yGNQfeoo4CZZrgWKXYmFd4CMyoiao8OnFE,40
18
18
  pgbelt/util/asyncfuncs.py,sha256=7i_GpBmUNNZ8RUGvU-q5nclsoaCm6Lx8jLP8usYvmZc,583
19
- pgbelt/util/dump.py,sha256=W814ULJPM9IBEhmvMFRD2E8DkqTuXZRuRtLXT8k4xhU,14691
19
+ pgbelt/util/dump.py,sha256=J0TXPv4LfsrRnt4ppVgxFK_E9tTueLcsAY4PQLVKXAI,15172
20
20
  pgbelt/util/logs.py,sha256=3Kk_dERXLsTwLxFS6tzI8fE0sRi5QrBYHJc4Al6ZCMA,2110
21
21
  pgbelt/util/pglogical.py,sha256=Y6KZBeiH85zhNSvhATqh0xozhfUMyQnPWN1HwRosZFo,13613
22
- pgbelt/util/postgres.py,sha256=ftBontoWkU7XaDXaBnsSJYYx7s2Su0xkcTyVCzwKJm0,20080
23
- pgbelt-0.9.2.dist-info/METADATA,sha256=we33iVu7QH6GjgfHHqzL9ISEfq73y2S8gf5wySkPHHQ,2933
24
- pgbelt-0.9.2.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
25
- pgbelt-0.9.2.dist-info/entry_points.txt,sha256=SCz_poPjkaVnWpJ-CeytAnDzbVc6l0WalOwitIqW_3g,40
26
- pgbelt-0.9.2.dist-info/licenses/LICENSE,sha256=FQ5cFkW02dKK3LmKH8z-rwn93tWSCh7lsxfNUiWcFsg,10758
27
- pgbelt-0.9.2.dist-info/RECORD,,
22
+ pgbelt/util/postgres.py,sha256=poUA8PJ71OJZupfCh7k-7qBAwVxocJYAzhZ2NajSfxk,24551
23
+ pgbelt-0.9.4.dist-info/METADATA,sha256=1MA0qdMJ1lDd_LQ1DO9xzJLybhVcC3AH9iUU0MjSgC4,2933
24
+ pgbelt-0.9.4.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
25
+ pgbelt-0.9.4.dist-info/entry_points.txt,sha256=SCz_poPjkaVnWpJ-CeytAnDzbVc6l0WalOwitIqW_3g,40
26
+ pgbelt-0.9.4.dist-info/licenses/LICENSE,sha256=FQ5cFkW02dKK3LmKH8z-rwn93tWSCh7lsxfNUiWcFsg,10758
27
+ pgbelt-0.9.4.dist-info/RECORD,,
File without changes