pgbelt 0.8.3__tar.gz → 0.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {pgbelt-0.8.3 → pgbelt-0.9.3}/PKG-INFO +7 -8
  2. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/preflight.py +158 -144
  3. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/sync.py +9 -3
  4. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/dump.py +47 -20
  5. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/postgres.py +97 -4
  6. {pgbelt-0.8.3 → pgbelt-0.9.3}/pyproject.toml +20 -20
  7. {pgbelt-0.8.3 → pgbelt-0.9.3}/LICENSE +0 -0
  8. {pgbelt-0.8.3 → pgbelt-0.9.3}/README.md +0 -0
  9. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/__init__.py +0 -0
  10. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/__init__.py +0 -0
  11. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/convenience.py +0 -0
  12. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/helpers.py +0 -0
  13. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/login.py +0 -0
  14. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/schema.py +0 -0
  15. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/setup.py +0 -0
  16. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/status.py +0 -0
  17. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/cmd/teardown.py +0 -0
  18. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/config/__init__.py +0 -0
  19. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/config/config.py +0 -0
  20. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/config/models.py +0 -0
  21. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/config/remote.py +0 -0
  22. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/main.py +0 -0
  23. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/__init__.py +0 -0
  24. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/asyncfuncs.py +0 -0
  25. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/logs.py +0 -0
  26. {pgbelt-0.8.3 → pgbelt-0.9.3}/pgbelt/util/pglogical.py +0 -0
@@ -1,21 +1,20 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: pgbelt
3
- Version: 0.8.3
3
+ Version: 0.9.3
4
4
  Summary: A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication.
5
+ License-File: LICENSE
5
6
  Author: Varjitt Jeeva
6
7
  Author-email: varjitt.jeeva@autodesk.com
7
- Requires-Python: >=3.9,<4.0
8
+ Requires-Python: >=3.12,<4.0
8
9
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
- Classifier: Programming Language :: Python :: 3.10
11
- Classifier: Programming Language :: Python :: 3.11
12
10
  Classifier: Programming Language :: Python :: 3.12
13
11
  Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
14
13
  Requires-Dist: aiofiles (>=0.8,<24.2)
15
- Requires-Dist: asyncpg (>=0.27,<0.31)
14
+ Requires-Dist: asyncpg (>=0.27,<0.32)
16
15
  Requires-Dist: pydantic (>=2.0,<3.0)
17
16
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
18
- Requires-Dist: typer (>=0.9,<0.16)
17
+ Requires-Dist: typer (>=0.9,<0.22)
19
18
  Description-Content-Type: text/markdown
20
19
 
21
20
  # Pgbelt
@@ -12,44 +12,47 @@ from typer import echo
12
12
  from typer import style
13
13
 
14
14
 
15
- def _summary_table(results: dict, compared_extensions: list[str] = None) -> list[list]:
15
+ def _summary_table(
16
+ results: list[dict], compared_results: list[dict] | None = None
17
+ ) -> list[list]:
16
18
  """
17
19
  Takes a dict of precheck results for all databases and returns a summary table for echo.
18
20
 
19
21
  The summary table alters slightly if the results are for a destination database.
20
22
 
21
- results format:
22
- [
23
- {
24
- "server_version": "9.6.20",
25
- "max_replication_slots": "10",
26
- "max_worker_processes": "10",
27
- "max_wal_senders": "10",
28
- "shared_preload_libraries": ["pg_stat_statements", ...],
29
- "rds.logical_replication": "on",
30
- "schema: "public",
31
- "extensions": ["uuid-ossp", ...],
32
- "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
33
- "root": {
34
- "rolname": "root",
35
- "rolcanlogin": True,
36
- "rolcreaterole": True,
37
- "rolinherit": True,
38
- "rolsuper": True,
39
- "memberof": ["rds_superuser", ...]
40
- },
41
- "owner": {
42
- "rolname": "owner",
43
- "rolcanlogin": True,
44
- "rolcreaterole": False,
45
- "rolinherit": True,
46
- "rolsuper": False,
47
- "memberof": ["rds_superuser", ...]
23
+ Example results format::
24
+
25
+ [
26
+ {
27
+ "server_version": "9.6.20",
28
+ "max_replication_slots": "10",
29
+ "max_worker_processes": "10",
30
+ "max_wal_senders": "10",
31
+ "shared_preload_libraries": ["pg_stat_statements", ...],
32
+ "rds.logical_replication": "on",
33
+ "schema: "public",
34
+ "extensions": ["uuid-ossp", ...],
35
+ "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
36
+ "root": {
37
+ "rolname": "root",
38
+ "rolcanlogin": True,
39
+ "rolcreaterole": True,
40
+ "rolinherit": True,
41
+ "rolsuper": True,
42
+ "memberof": ["rds_superuser", ...]
43
+ },
44
+ "owner": {
45
+ "rolname": "owner",
46
+ "rolcanlogin": True,
47
+ "rolcreaterole": False,
48
+ "rolinherit": True,
49
+ "rolsuper": False,
50
+ "memberof": ["rds_superuser", ...]
51
+ }
48
52
  }
49
- }
50
- },
51
- ...
52
- ]
53
+ },
54
+ ...
55
+ ]
53
56
  """
54
57
 
55
58
  summary_table = [
@@ -69,6 +72,11 @@ def _summary_table(results: dict, compared_extensions: list[str] = None) -> list
69
72
  ]
70
73
 
71
74
  results.sort(key=lambda d: d["db"])
75
+ compared_by_db = (
76
+ {entry["db"]: entry for entry in compared_results}
77
+ if compared_results is not None
78
+ else {}
79
+ )
72
80
 
73
81
  for r in results:
74
82
  root_ok = (
@@ -80,9 +88,9 @@ def _summary_table(results: dict, compared_extensions: list[str] = None) -> list
80
88
  or r["users"]["root"]["rolsuper"]
81
89
  )
82
90
 
83
- # Interestingly enough, we can tell if this is being run for a destination database if the compared_extensions is not None.
91
+ # Interestingly enough, we can tell if this is being run for a destination database if the compared_results is not None.
84
92
  # This is because it is only set when we are ensuring all source extensions are in the destination.
85
- is_dest_db = compared_extensions is not None
93
+ is_dest_db = compared_results is not None
86
94
 
87
95
  # If this is a destination database, we need to check if the owner can create objects.
88
96
 
@@ -149,9 +157,13 @@ def _summary_table(results: dict, compared_extensions: list[str] = None) -> list
149
157
  # If this is a destinatino DB, we are ensuring all source extensions are in the destination.
150
158
  # If not, we don't want this column in the table.
151
159
  if is_dest_db:
160
+ compare_entry = compared_by_db.get(r["db"])
161
+ if compare_entry is None:
162
+ summary_table[-1].append(style(False, "red"))
163
+ continue
152
164
  extensions_ok = all(
153
- [e in r["extensions"] for e in compared_extensions]
154
- ) and all([e in compared_extensions for e in r["extensions"]])
165
+ [e in r["extensions"] for e in compare_entry["extensions"]]
166
+ ) and all([e in compare_entry["extensions"] for e in r["extensions"]])
155
167
  summary_table[-1].append(
156
168
  style(extensions_ok, "green" if extensions_ok else "red")
157
169
  )
@@ -165,25 +177,26 @@ def _users_table(users: dict, is_dest_db: bool = False) -> list[list]:
165
177
 
166
178
  The users table alters slightly if the results are for a destination database.
167
179
 
168
- users format:
169
- {
170
- "root": {
171
- "rolname": "root",
172
- "rolcanlogin": True,
173
- "rolcreaterole": True,
174
- "rolinherit": True,
175
- "rolsuper": True,
176
- "memberof": ["rds_superuser", ...]
177
- },
178
- "owner": {
179
- "rolname": "owner",
180
- "rolcanlogin": True,
181
- "rolcreaterole": False,
182
- "rolinherit": True,
183
- "rolsuper": False,
184
- "memberof": ["rds_superuser", ...]
180
+ Example users format::
181
+
182
+ {
183
+ "root": {
184
+ "rolname": "root",
185
+ "rolcanlogin": True,
186
+ "rolcreaterole": True,
187
+ "rolinherit": True,
188
+ "rolsuper": True,
189
+ "memberof": ["rds_superuser", ...]
190
+ },
191
+ "owner": {
192
+ "rolname": "owner",
193
+ "rolcanlogin": True,
194
+ "rolcreaterole": False,
195
+ "rolinherit": True,
196
+ "rolsuper": False,
197
+ "memberof": ["rds_superuser", ...]
198
+ }
185
199
  }
186
- }
187
200
 
188
201
  See pgbelt.util.postgres.precheck_info results["users"] for more info..
189
202
  """
@@ -250,15 +263,16 @@ def _tables_table(
250
263
  """
251
264
  Takes a list of table dicts and returns a table of the tables for echo.
252
265
 
253
- tables format:
254
- [
255
- {
256
- "Name": "table_name",
257
- "Schema": "schema_name",
258
- "Owner": "owner_name"
259
- },
260
- ...
261
- ]
266
+ Example tables format::
267
+
268
+ [
269
+ {
270
+ "Name": "table_name",
271
+ "Schema": "schema_name",
272
+ "Owner": "owner_name"
273
+ },
274
+ ...
275
+ ]
262
276
  """
263
277
 
264
278
  tables_table = [
@@ -297,15 +311,16 @@ def _sequences_table(
297
311
  """
298
312
  Takes a list of sequence dicts and returns a table of the sequences for echo.
299
313
 
300
- sequences format:
301
- [
302
- {
303
- "Name": "sequence_name",
304
- "Schema": "schema_name",
305
- "Owner": "owner_name"
306
- },
307
- ...
308
- ]
314
+ Example sequences format::
315
+
316
+ [
317
+ {
318
+ "Name": "sequence_name",
319
+ "Schema": "schema_name",
320
+ "Owner": "owner_name"
321
+ },
322
+ ...
323
+ ]
309
324
  """
310
325
 
311
326
  sequences_table = [
@@ -339,11 +354,12 @@ def _extensions_table(
339
354
  Takes a list of source and destination extensions and returns a table of the extensions for echo.
340
355
  It will flag any extensions that are not in the destination database but are in the source database.
341
356
 
342
- <source/destination>_extensions format:
343
- [
344
- "uuid-ossp",
345
- ...
346
- ]
357
+ Example extensions format::
358
+
359
+ [
360
+ "uuid-ossp",
361
+ ...
362
+ ]
347
363
 
348
364
  """
349
365
 
@@ -374,71 +390,72 @@ async def _print_prechecks(results: list[dict]) -> list[list]:
374
390
  If there are multiple databases, only print the summary table.
375
391
  If there is only one database, print the summary table and more detailed info.
376
392
 
377
- results format:
378
- [
379
- {
380
- "db": "db_name",
381
- "src": {
382
- "server_version": "9.6.20",
383
- "max_replication_slots": "10",
384
- "max_worker_processes": "10",
385
- "max_wal_senders": "10",
386
- "pg_stat_statements": "installed",
387
- "pglogical": "installed",
388
- "rds.logical_replication": "on",
389
- "schema: "public",
390
- "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
391
- "root": {
392
- "rolname": "root",
393
- "rolcanlogin": True,
394
- "rolcreaterole": True,
395
- "rolinherit": True,
396
- "rolsuper": True,
397
- "memberof": ["rds_superuser", ...]
398
- },
399
- "owner": {
400
- "rolname": "owner",
401
- "rolcanlogin": True,
402
- "rolcreaterole": False,
403
- "rolinherit": True,
404
- "rolsuper": False,
405
- "memberof": ["rds_superuser", ...],
406
- "can_create": True
393
+ Example results format::
394
+
395
+ [
396
+ {
397
+ "db": "db_name",
398
+ "src": {
399
+ "server_version": "9.6.20",
400
+ "max_replication_slots": "10",
401
+ "max_worker_processes": "10",
402
+ "max_wal_senders": "10",
403
+ "pg_stat_statements": "installed",
404
+ "pglogical": "installed",
405
+ "rds.logical_replication": "on",
406
+ "schema: "public",
407
+ "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
408
+ "root": {
409
+ "rolname": "root",
410
+ "rolcanlogin": True,
411
+ "rolcreaterole": True,
412
+ "rolinherit": True,
413
+ "rolsuper": True,
414
+ "memberof": ["rds_superuser", ...]
415
+ },
416
+ "owner": {
417
+ "rolname": "owner",
418
+ "rolcanlogin": True,
419
+ "rolcreaterole": False,
420
+ "rolinherit": True,
421
+ "rolsuper": False,
422
+ "memberof": ["rds_superuser", ...],
423
+ "can_create": True
424
+ }
407
425
  }
408
- }
409
- },
410
- "dst": {
411
- "server_version": "9.6.20",
412
- "max_replication_slots": "10",
413
- "max_worker_processes": "10",
414
- "max_wal_senders": "10",
415
- "pg_stat_statements": "installed",
416
- "pglogical": "installed",
417
- "rds.logical_replication": "on",
418
- "schema: "public",
419
- "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
420
- "root": {
421
- "rolname": "root",
422
- "rolcanlogin": True,
423
- "rolcreaterole": True,
424
- "rolinherit": True,
425
- "rolsuper": True,
426
- "memberof": ["rds_superuser", ...]
427
- },
428
- "owner": {
429
- "rolname": "owner",
430
- "rolcanlogin": True,
431
- "rolcreaterole": False,
432
- "rolinherit": True,
433
- "rolsuper": False,
434
- "memberof": ["rds_superuser", ...],
435
- "can_create": True
426
+ },
427
+ "dst": {
428
+ "server_version": "9.6.20",
429
+ "max_replication_slots": "10",
430
+ "max_worker_processes": "10",
431
+ "max_wal_senders": "10",
432
+ "pg_stat_statements": "installed",
433
+ "pglogical": "installed",
434
+ "rds.logical_replication": "on",
435
+ "schema: "public",
436
+ "users": { // See pgbelt.util.postgres.precheck_info results["users"] for more info.
437
+ "root": {
438
+ "rolname": "root",
439
+ "rolcanlogin": True,
440
+ "rolcreaterole": True,
441
+ "rolinherit": True,
442
+ "rolsuper": True,
443
+ "memberof": ["rds_superuser", ...]
444
+ },
445
+ "owner": {
446
+ "rolname": "owner",
447
+ "rolcanlogin": True,
448
+ "rolcreaterole": False,
449
+ "rolinherit": True,
450
+ "rolsuper": False,
451
+ "memberof": ["rds_superuser", ...],
452
+ "can_create": True
453
+ }
436
454
  }
437
455
  }
438
- }
439
- },
440
- ...
441
- ]
456
+ },
457
+ ...
458
+ ]
442
459
  """
443
460
 
444
461
  src_summaries = []
@@ -448,12 +465,9 @@ async def _print_prechecks(results: list[dict]) -> list[list]:
448
465
  dst_summaries.append(r["dst"])
449
466
 
450
467
  src_summary_table = _summary_table(src_summaries)
451
- dst_summary_table = _summary_table(
452
- dst_summaries, compared_extensions=r["src"]["extensions"]
453
- )
468
+ dst_summary_table = _summary_table(dst_summaries, compared_results=src_summaries)
454
469
 
455
470
  if len(results) != 1:
456
-
457
471
  # For mulitple databases, we only print the summary table.
458
472
 
459
473
  src_multi_display_string = (
@@ -564,7 +578,7 @@ async def _print_prechecks(results: list[dict]) -> list[list]:
564
578
  return src_summary_table, dst_summary_table
565
579
 
566
580
 
567
- @run_with_configs(skip_dst=True, results_callback=_print_prechecks)
581
+ @run_with_configs(results_callback=_print_prechecks)
568
582
  async def precheck(config_future: Awaitable[DbupgradeConfig]) -> dict:
569
583
  """
570
584
  Report whether your source database meets the basic requirements for pgbelt.
@@ -12,8 +12,9 @@ from pgbelt.util.dump import dump_source_tables
12
12
  from pgbelt.util.dump import load_dumped_tables
13
13
  from pgbelt.util.logs import get_logger
14
14
  from pgbelt.util.postgres import analyze_table_pkeys
15
- from pgbelt.util.postgres import compare_100_rows
15
+ from pgbelt.util.postgres import compare_100_random_rows
16
16
  from pgbelt.util.postgres import compare_latest_100_rows
17
+ from pgbelt.util.postgres import compare_tables_without_pkeys
17
18
  from pgbelt.util.postgres import dump_sequences
18
19
  from pgbelt.util.postgres import load_sequences
19
20
  from pgbelt.util.postgres import run_analyze
@@ -143,10 +144,15 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
143
144
  try:
144
145
  logger = get_logger(conf.db, conf.dc, "sync")
145
146
  await gather(
146
- compare_100_rows(src_pool, dst_pool, conf.tables, conf.schema_name, logger),
147
+ compare_100_random_rows(
148
+ src_pool, dst_pool, conf.tables, conf.schema_name, logger
149
+ ),
147
150
  compare_latest_100_rows(
148
151
  src_pool, dst_pool, conf.tables, conf.schema_name, logger
149
152
  ),
153
+ compare_tables_without_pkeys(
154
+ src_pool, dst_pool, conf.tables, conf.schema_name, logger
155
+ ),
150
156
  )
151
157
  finally:
152
158
  await gather(*[p.close() for p in pools])
@@ -218,7 +224,7 @@ async def sync(
218
224
  )
219
225
 
220
226
  await gather(
221
- compare_100_rows(
227
+ compare_100_random_rows(
222
228
  src_pool,
223
229
  dst_owner_pool,
224
230
  conf.tables,
@@ -76,6 +76,47 @@ async def _execute_subprocess(
76
76
  return out
77
77
 
78
78
 
79
+ async def _dump_table(config: DbupgradeConfig, table: str, logger: Logger) -> None:
80
+ """
81
+ Dump a single table using pg_dump, strip unwanted lines, and save to file.
82
+ """
83
+ command = [
84
+ "pg_dump",
85
+ "--data-only",
86
+ f'--table={config.schema_name}."{table}"',
87
+ config.src.pglogical_dsn,
88
+ ]
89
+
90
+ out = await _execute_subprocess(command, f"dumped {table}", logger)
91
+ content = out.decode("utf-8")
92
+
93
+ # Strip out unwanted lines, stupid PG17
94
+ keywords = [
95
+ "transaction_timeout",
96
+ # "SET statement_timeout", # This one is fine
97
+ # "SET lock_timeout", # This one is fine
98
+ # "SET idle_in_transaction_session_timeout", # This one is fine
99
+ "SET client_encoding",
100
+ "SET standard_conforming_strings",
101
+ "SET check_function_bodies",
102
+ "SET xmloption",
103
+ "SET client_min_messages",
104
+ "SET row_security",
105
+ "pg_catalog.set_config", # Stupid search path, this should not be run.
106
+ "\\restrict",
107
+ "\\unrestrict",
108
+ ]
109
+ lines = content.split("\n")
110
+ filtered_lines = [
111
+ line for line in lines if not any(keyword in line for keyword in keywords)
112
+ ]
113
+ filtered_content = "\n".join(filtered_lines)
114
+
115
+ # Write the filtered content to file
116
+ async with aopen(table_file(config.db, config.dc, table), "w") as f:
117
+ await f.write(filtered_content)
118
+
119
+
79
120
  async def dump_source_tables(
80
121
  config: DbupgradeConfig, tables: list[str], logger: Logger
81
122
  ) -> None:
@@ -88,21 +129,7 @@ async def dump_source_tables(
88
129
 
89
130
  dumps = []
90
131
  for table in tables:
91
- dumps.append(
92
- _execute_subprocess(
93
- [
94
- "pg_dump",
95
- "--data-only",
96
- f'--table={config.schema_name}."{table}"',
97
- "-Fc",
98
- "-f",
99
- table_file(config.db, config.dc, table),
100
- config.src.pglogical_dsn,
101
- ],
102
- f"dumped {table}",
103
- logger,
104
- )
105
- )
132
+ dumps.append(_dump_table(config, table, logger))
106
133
 
107
134
  await asyncio.gather(*dumps)
108
135
 
@@ -137,9 +164,9 @@ async def load_dumped_tables(
137
164
  loads.append(
138
165
  _execute_subprocess(
139
166
  [
140
- "pg_restore",
141
- "-d",
167
+ "psql",
142
168
  config.dst.owner_dsn,
169
+ "-f",
143
170
  file,
144
171
  ],
145
172
  f"loaded {file}",
@@ -204,7 +231,7 @@ async def dump_source_schema(config: DbupgradeConfig, logger: Logger) -> None:
204
231
  schema_file(config.db, config.dc, NO_INVALID_NO_INDEX), "w"
205
232
  ) as out:
206
233
  for command in commands:
207
- if not ("NOT VALID" in command) and not (
234
+ if "NOT VALID" not in command and not (
208
235
  "CREATE" in command and "INDEX" in command
209
236
  ):
210
237
  await out.write(command)
@@ -305,13 +332,13 @@ async def remove_dst_not_valid_constraints(
305
332
  if not regex_matches:
306
333
  continue
307
334
  table = regex_matches.groupdict()["table"]
308
- constraint = table = regex_matches.groupdict()["constraint"]
335
+ constraint = regex_matches.groupdict()["constraint"]
309
336
 
310
337
  if (config.tables and table in config.tables) or not config.tables:
311
338
  queries = queries + f"ALTER TABLE {table} DROP CONSTRAINT {constraint};"
312
339
 
313
340
  if queries != "":
314
- command = ["psql", config.dst.owner_dsn, "-c", f"'{queries}'"]
341
+ command = ["psql", config.dst.owner_dsn, "-c", queries]
315
342
 
316
343
  await _execute_subprocess(
317
344
  command, "Finished removing NOT VALID constraints from the target.", logger
@@ -214,22 +214,23 @@ async def compare_data(
214
214
  )
215
215
 
216
216
 
217
- async def compare_100_rows(
217
+ async def compare_100_random_rows(
218
218
  src_pool: Pool, dst_pool: Pool, tables: list[str], schema: str, logger: Logger
219
219
  ) -> None:
220
220
  """
221
221
  Validate data between source and destination databases by doing the following:
222
222
  1. Get all tables with primary keys
223
- 2. For each of those tables, select * limit 100
223
+ 2. For each of those tables, select 100 random rows
224
224
  3. For each row, ensure the row in the destination is identical
225
225
  """
226
- logger.info("Comparing 100 rows...")
226
+ logger.info("Comparing 100 random rows...")
227
227
 
228
228
  query = """
229
229
  SELECT * FROM
230
230
  (
231
231
  SELECT *
232
232
  FROM {table}
233
+ ORDER BY RANDOM()
233
234
  LIMIT 100
234
235
  ) AS T1
235
236
  ORDER BY {order_by_pkeys};
@@ -259,6 +260,98 @@ async def compare_latest_100_rows(
259
260
  await compare_data(src_pool, dst_pool, query, tables, schema, logger)
260
261
 
261
262
 
263
+ async def compare_tables_without_pkeys(
264
+ src_pool: Pool,
265
+ dst_pool: Pool,
266
+ tables: list[str],
267
+ schema: str,
268
+ logger: Logger,
269
+ ) -> None:
270
+ """
271
+ Validate data for tables without primary keys by:
272
+ 1. Getting the list of tables without primary keys
273
+ 2. For each table, selecting 100 random rows from source
274
+ 3. For each row, verifying it exists in destination by matching all columns
275
+ """
276
+ logger.info("Comparing tables without primary keys...")
277
+
278
+ _, no_pkeys, _ = await analyze_table_pkeys(src_pool, schema, logger)
279
+
280
+ # Filter by tables list if provided
281
+ if tables:
282
+ no_pkeys = [t for t in no_pkeys if t in tables]
283
+
284
+ if not no_pkeys:
285
+ logger.info("No tables without primary keys to compare.")
286
+ return
287
+
288
+ src_old_extra_float_digits = await src_pool.fetchval("SHOW extra_float_digits;")
289
+ await src_pool.execute("SET extra_float_digits TO 0;")
290
+
291
+ dst_old_extra_float_digits = await dst_pool.fetchval("SHOW extra_float_digits;")
292
+ await dst_pool.execute("SET extra_float_digits TO 0;")
293
+
294
+ for table in no_pkeys:
295
+ full_table_name = f'{schema}."{table}"'
296
+ logger.debug(f"Validating table without primary key: {full_table_name}...")
297
+
298
+ # Select 100 random rows from source
299
+ query = f"""
300
+ SELECT * FROM {full_table_name}
301
+ ORDER BY RANDOM()
302
+ LIMIT 100;
303
+ """
304
+
305
+ src_rows = await src_pool.fetch(query)
306
+
307
+ if len(src_rows) == 0:
308
+ logger.debug(f"Table {full_table_name} is empty in source.")
309
+ continue
310
+
311
+ # For each source row, check if it exists in destination
312
+ for src_row in src_rows:
313
+ # Build WHERE clause matching all columns
314
+ where_clauses = []
315
+ for key, value in src_row.items():
316
+ # Handle Decimal NaN values
317
+ if isinstance(value, Decimal) and value.is_nan():
318
+ value = None
319
+
320
+ if value is None:
321
+ where_clauses.append(f'"{key}" IS NULL')
322
+ elif isinstance(value, (int, float, Decimal)):
323
+ where_clauses.append(f'"{key}" = {value}')
324
+ elif isinstance(value, bool):
325
+ where_clauses.append(f'"{key}" = {str(value).upper()}')
326
+ elif isinstance(value, bytes):
327
+ hex_val = value.hex()
328
+ where_clauses.append(f"\"{key}\" = '\\x{hex_val}'")
329
+ else:
330
+ # Escape single quotes in string values
331
+ escaped_val = str(value).replace("'", "''")
332
+ where_clauses.append(f"\"{key}\" = '{escaped_val}'")
333
+
334
+ where_clause = " AND ".join(where_clauses)
335
+ check_query = (
336
+ f"SELECT 1 FROM {full_table_name} WHERE {where_clause} LIMIT 1;"
337
+ )
338
+
339
+ dst_result = await dst_pool.fetch(check_query)
340
+
341
+ if len(dst_result) == 0:
342
+ raise AssertionError(
343
+ f"Row from source not found in destination.\n"
344
+ f"Table: {full_table_name}\n"
345
+ f"Source Row: {dict(src_row)}"
346
+ )
347
+
348
+ logger.debug(f"Table {full_table_name} validated successfully.")
349
+
350
+ await src_pool.execute(f"SET extra_float_digits TO {src_old_extra_float_digits};")
351
+ await dst_pool.execute(f"SET extra_float_digits TO {dst_old_extra_float_digits};")
352
+ logger.info("Tables without primary keys validation complete!")
353
+
354
+
262
355
  async def table_empty(pool: Pool, table: str, schema: str, logger: Logger) -> bool:
263
356
  """
264
357
  return true if the table is empty
@@ -551,7 +644,7 @@ async def initialization_progress(
551
644
  if src_dataset_size["db_size"] == 0 and dst_dataset_size["db_size"] == 0:
552
645
  progress = "0 %"
553
646
  else:
554
- progress = f"{str(round(int(dst_dataset_size['db_size'])/int(src_dataset_size['db_size'])*100 ,1))} %"
647
+ progress = f"{str(round(int(dst_dataset_size['db_size']) / int(src_dataset_size['db_size']) * 100, 1))} %"
555
648
 
556
649
  status = {
557
650
  "src_dataset_size": src_dataset_size["db_size_pretty"] or "0 bytes",
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pgbelt"
3
- version = "0.8.3"
3
+ version = "0.9.3"
4
4
  description = "A CLI tool used to manage Postgres data migrations from beginning to end, for a single database or a fleet, leveraging pglogical replication."
5
5
  authors = ["Varjitt Jeeva <varjitt.jeeva@autodesk.com>"]
6
6
  readme = "README.md"
@@ -10,35 +10,35 @@ packages = [
10
10
  ]
11
11
 
12
12
  [tool.poetry.dependencies]
13
- python = ">=3.9,<4.0"
13
+ python = ">=3.12,<4.0"
14
14
  aiofiles = ">=0.8,<24.2"
15
- asyncpg = ">=0.27,<0.31"
15
+ asyncpg = ">=0.27,<0.32"
16
16
  pydantic = ">=2.0,<3.0"
17
17
  tabulate = "^0.9.0"
18
- typer = ">=0.9,<0.16"
18
+ typer = ">=0.9,<0.22"
19
19
 
20
20
  [tool.poetry.dev-dependencies]
21
- black = "~25.1.0"
22
- pre-commit = "~4.1.0"
23
- flake8 = "^7.1.1"
24
- pytest-cov = "~6.0.0"
25
- pytest = "^8.3.4"
26
- coverage = {extras = ["toml"], version = "^7.6"}
27
- safety = "^3.2.14"
28
- mypy = "^1.14"
21
+ black = "~26.1.0"
22
+ pre-commit = "~4.5.1"
23
+ flake8 = "^7.3.0"
24
+ pytest-cov = "~6.2.1"
25
+ pytest = "^9.0.2"
26
+ coverage = {extras = ["toml"], version = "^7.13"}
27
+ safety = "^3.6.1"
28
+ mypy = "^1.17"
29
29
  xdoctest = {extras = ["colors"], version = "^1.2.0"}
30
30
  flake8-bandit = "~4.1.1"
31
31
  flake8-bugbear = ">=21.9.2"
32
32
  flake8-docstrings = "^1.6.0"
33
- flake8-rst-docstrings = "^0.3.0"
34
- pep8-naming = "^0.14.1"
33
+ flake8-rst-docstrings = "^0.4.0"
34
+ pep8-naming = "^0.15.1"
35
35
  darglint = "^1.8.1"
36
- reorder-python-imports = "^3.14.0"
37
- pre-commit-hooks = "^5.0.0"
38
- Pygments = "^2.19.1"
39
- pyupgrade = "^3.19.1"
40
- pylint = "^3.3.4"
41
- pytest-asyncio = "~0.25.3"
36
+ reorder-python-imports = "^3.15.0"
37
+ pre-commit-hooks = "^6.0.0"
38
+ Pygments = "^2.19.2"
39
+ pyupgrade = "^3.20.0"
40
+ pylint = "^4.0.4"
41
+ pytest-asyncio = "~1.3.0"
42
42
 
43
43
  [build-system]
44
44
  requires = ["poetry-core>=1.0.0", "setuptools"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes