pgbelt 0.6.2__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pgbelt/cmd/setup.py CHANGED
@@ -29,15 +29,23 @@ async def _dump_and_load_schema(
29
29
  async def _setup_src_node(
30
30
  conf: DbupgradeConfig, src_root_pool: Pool, src_logger: Logger
31
31
  ) -> None:
32
+ """
33
+ Configure the pglogical node and replication set on the Source database.
34
+ """
35
+
32
36
  await configure_node(src_root_pool, "pg1", conf.src.pglogical_dsn, src_logger)
33
37
  async with create_pool(conf.src.pglogical_uri, min_size=1) as src_pglogical_pool:
34
- pkey_tables, _, _ = await analyze_table_pkeys(src_pglogical_pool, src_logger)
38
+ pkey_tables, _, _ = await analyze_table_pkeys(
39
+ src_pglogical_pool, conf.schema_name, src_logger
40
+ )
35
41
 
36
42
  pglogical_tables = pkey_tables
37
43
  if conf.tables:
38
44
  pglogical_tables = [t for t in pkey_tables if t in conf.tables]
39
45
 
40
- await configure_replication_set(src_root_pool, pglogical_tables, src_logger)
46
+ await configure_replication_set(
47
+ src_root_pool, pglogical_tables, conf.schema_name, src_logger
48
+ )
41
49
 
42
50
 
43
51
  @run_with_configs
@@ -65,33 +73,40 @@ async def setup(
65
73
  try:
66
74
  src_logger = get_logger(conf.db, conf.dc, "setup.src")
67
75
  dst_logger = get_logger(conf.db, conf.dc, "setup.dst")
76
+
77
+ # Configure Source for pglogical (before we can configure the plugin)
68
78
  await configure_pgl(
69
79
  src_root_pool,
70
80
  conf.src.pglogical_user.pw,
71
81
  src_logger,
72
82
  conf.src.owner_user.name,
73
83
  )
74
- await grant_pgl(src_owner_pool, conf.tables, src_logger)
84
+ await grant_pgl(src_owner_pool, conf.tables, conf.schema_name, src_logger)
75
85
 
86
+ # Load schema into destination
76
87
  schema_load_task = None
77
88
  if schema:
78
89
  schema_load_task = create_task(
79
90
  _dump_and_load_schema(conf, src_logger, dst_logger)
80
91
  )
81
92
 
93
+ # Configure Pglogical plugin on Source
82
94
  src_node_task = create_task(_setup_src_node(conf, src_root_pool, src_logger))
83
95
 
84
- # we need to wait for the schema to exist in the target before setting up pglogical there
96
+ # We need to wait for the schema to exist in the target before setting up pglogical there
85
97
  if schema_load_task is not None:
86
98
  await schema_load_task
87
99
 
100
+ # Configure Destination for pglogical (before we can configure the plugin)
88
101
  await configure_pgl(
89
102
  dst_root_pool,
90
103
  conf.dst.pglogical_user.pw,
91
104
  dst_logger,
92
105
  conf.dst.owner_user.name,
93
106
  )
94
- await grant_pgl(dst_owner_pool, conf.tables, dst_logger)
107
+ await grant_pgl(dst_owner_pool, conf.tables, conf.schema_name, dst_logger)
108
+
109
+ # Also configure the node on the destination... of itself. #TODO: This is a bit weird, confirm if this is necessary.
95
110
  await configure_node(dst_root_pool, "pg2", conf.dst.pglogical_dsn, dst_logger)
96
111
 
97
112
  # The source node must be set up before we create a subscription
@@ -123,14 +138,18 @@ async def setup_back_replication(config_future: Awaitable[DbupgradeConfig]) -> N
123
138
 
124
139
  try:
125
140
  src_logger = get_logger(conf.db, conf.dc, "setup.src")
126
- pkeys, _, _ = await analyze_table_pkeys(src_pglogical_pool, src_logger)
141
+ pkeys, _, _ = await analyze_table_pkeys(
142
+ src_pglogical_pool, conf.schema_name, src_logger
143
+ )
127
144
  dst_logger = get_logger(conf.db, conf.dc, "setup.src")
128
145
 
129
146
  pglogical_tables = pkeys
130
147
  if conf.tables:
131
148
  pglogical_tables = [t for t in pkeys if t in conf.tables]
132
149
 
133
- await configure_replication_set(dst_root_pool, pglogical_tables, dst_logger)
150
+ await configure_replication_set(
151
+ dst_root_pool, pglogical_tables, conf.schema_name, dst_logger
152
+ )
134
153
  await configure_subscription(
135
154
  src_root_pool, "pg2_pg1", conf.dst.pglogical_dsn, src_logger
136
155
  )
pgbelt/cmd/status.py CHANGED
@@ -7,6 +7,8 @@ from pgbelt.config.models import DbupgradeConfig
7
7
  from pgbelt.util import get_logger
8
8
  from pgbelt.util.pglogical import dst_status
9
9
  from pgbelt.util.pglogical import src_status
10
+ from pgbelt.util.postgres import initialization_progress
11
+ from pgbelt.util.postgres import analyze_table_pkeys
10
12
  from tabulate import tabulate
11
13
  from typer import echo
12
14
  from typer import style
@@ -22,6 +24,9 @@ async def _print_status_table(results: list[dict[str, str]]) -> list[list[str]]:
22
24
  style("flush_lag", "yellow"),
23
25
  style("write_lag", "yellow"),
24
26
  style("replay_lag", "yellow"),
27
+ style("src_dataset_size", "yellow"),
28
+ style("dst_dataset_size", "yellow"),
29
+ style("progress", "yellow"),
25
30
  ]
26
31
  ]
27
32
 
@@ -41,6 +46,9 @@ async def _print_status_table(results: list[dict[str, str]]) -> list[list[str]]:
41
46
  style(r["flush_lag"], "green" if r["flush_lag"] == "0" else "red"),
42
47
  style(r["write_lag"], "green" if r["write_lag"] == "0" else "red"),
43
48
  style(r["replay_lag"], "green" if r["replay_lag"] == "0" else "red"),
49
+ style(r["src_dataset_size"], "green"),
50
+ style(r["dst_dataset_size"], "green"),
51
+ style(r["progress"], "green"),
44
52
  ]
45
53
  )
46
54
 
@@ -77,14 +85,42 @@ async def status(conf_future: Awaitable[DbupgradeConfig]) -> dict[str, str]:
77
85
  )
78
86
  src_pool, dst_pool = pools
79
87
 
88
+ # Get the list of targeted tables by first getting all tables, then filtering whatever is in the config.
89
+ pkey_tables, non_pkey_tables, _ = await analyze_table_pkeys(
90
+ src_pool, conf.schema_name, src_logger
91
+ )
92
+ all_tables = pkey_tables + non_pkey_tables
93
+ target_tables = all_tables
94
+ if conf.tables:
95
+ target_tables = [t for t in all_tables if t in conf.tables]
96
+
97
+ if not target_tables:
98
+ raise ValueError(
99
+ f"Targeted tables not found in the source database. Please check your config's schema and tables. DB: {conf.db} DC: {conf.dc}, SCHEMA: {conf.schema_name} TABLES: {conf.tables}."
100
+ )
101
+
80
102
  try:
81
103
  result = await gather(
82
104
  src_status(src_pool, src_logger),
83
105
  dst_status(dst_pool, dst_logger),
106
+ initialization_progress(
107
+ target_tables,
108
+ conf.schema_name,
109
+ conf.schema_name,
110
+ src_pool,
111
+ dst_pool,
112
+ src_logger,
113
+ dst_logger,
114
+ ),
84
115
  )
85
116
 
86
117
  result[0].update(result[1])
87
118
  result[0]["db"] = conf.db
119
+ if result[0]["pg1_pg2"] == "replicating":
120
+ result[2]["src_dataset_size"] = "n/a"
121
+ result[2]["dst_dataset_size"] = "n/a"
122
+ result[2]["progress"] = "n/a"
123
+ result[0].update(result[2])
88
124
  return result[0]
89
125
  finally:
90
126
  await gather(*[p.close() for p in pools])
pgbelt/cmd/sync.py CHANGED
@@ -22,12 +22,14 @@ from typer import Option
22
22
 
23
23
  async def _sync_sequences(
24
24
  targeted_sequences: list[str],
25
+ schema: str,
25
26
  src_pool: Pool,
26
27
  dst_pool: Pool,
27
28
  src_logger: Logger,
28
29
  dst_logger: Logger,
29
30
  ) -> None:
30
- seq_vals = await dump_sequences(src_pool, targeted_sequences, src_logger)
31
+
32
+ seq_vals = await dump_sequences(src_pool, targeted_sequences, schema, src_logger)
31
33
  await load_sequences(dst_pool, seq_vals, dst_logger)
32
34
 
33
35
 
@@ -47,7 +49,7 @@ async def sync_sequences(config_future: Awaitable[DbupgradeConfig]) -> None:
47
49
  src_logger = get_logger(conf.db, conf.dc, "sync.src")
48
50
  dst_logger = get_logger(conf.db, conf.dc, "sync.dst")
49
51
  await _sync_sequences(
50
- conf.sequences, src_pool, dst_pool, src_logger, dst_logger
52
+ conf.sequences, conf.schema_name, src_pool, dst_pool, src_logger, dst_logger
51
53
  )
52
54
  finally:
53
55
  await gather(*[p.close() for p in pools])
@@ -72,7 +74,7 @@ async def dump_tables(
72
74
  tables = tables.split(",")
73
75
  else:
74
76
  async with create_pool(conf.src.pglogical_uri, min_size=1) as src_pool:
75
- _, tables, _ = await analyze_table_pkeys(src_pool, logger)
77
+ _, tables, _ = await analyze_table_pkeys(src_pool, conf.schema_name, logger)
76
78
 
77
79
  if conf.tables:
78
80
  tables = [t for t in tables if t in conf.tables]
@@ -127,7 +129,9 @@ async def sync_tables(
127
129
  dump_tables = tables.split(",")
128
130
  else:
129
131
  async with create_pool(conf.src.pglogical_uri, min_size=1) as src_pool:
130
- _, dump_tables, _ = await analyze_table_pkeys(src_pool, src_logger)
132
+ _, dump_tables, _ = await analyze_table_pkeys(
133
+ src_pool, conf.schema_name, src_logger
134
+ )
131
135
 
132
136
  if conf.tables:
133
137
  dump_tables = [t for t in dump_tables if t in conf.tables]
@@ -167,8 +171,10 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
167
171
  try:
168
172
  logger = get_logger(conf.db, conf.dc, "sync")
169
173
  await gather(
170
- compare_100_rows(src_pool, dst_pool, conf.tables, logger),
171
- compare_latest_100_rows(src_pool, dst_pool, conf.tables, logger),
174
+ compare_100_rows(src_pool, dst_pool, conf.tables, conf.schema_name, logger),
175
+ compare_latest_100_rows(
176
+ src_pool, dst_pool, conf.tables, conf.schema_name, logger
177
+ ),
172
178
  )
173
179
  finally:
174
180
  await gather(*[p.close() for p in pools])
@@ -177,7 +183,7 @@ async def validate_data(config_future: Awaitable[DbupgradeConfig]) -> None:
177
183
  async def _dump_and_load_all_tables(
178
184
  conf: DbupgradeConfig, src_pool: Pool, src_logger: Logger, dst_logger: Logger
179
185
  ) -> None:
180
- _, tables, _ = await analyze_table_pkeys(src_pool, src_logger)
186
+ _, tables, _ = await analyze_table_pkeys(src_pool, conf.schema_name, src_logger)
181
187
  if conf.tables:
182
188
  tables = [t for t in tables if t in conf.tables]
183
189
  await dump_source_tables(conf, tables, src_logger)
@@ -185,7 +191,9 @@ async def _dump_and_load_all_tables(
185
191
 
186
192
 
187
193
  @run_with_configs
188
- async def sync(config_future: Awaitable[DbupgradeConfig]) -> None:
194
+ async def sync(
195
+ config_future: Awaitable[DbupgradeConfig], no_schema: bool = False
196
+ ) -> None:
189
197
  """
190
198
  Sync and validate all data that is not replicated with pglogical. This includes all
191
199
  tables without primary keys and all sequences. Also loads any previously omitted
@@ -210,7 +218,12 @@ async def sync(config_future: Awaitable[DbupgradeConfig]) -> None:
210
218
 
211
219
  await gather(
212
220
  _sync_sequences(
213
- conf.sequences, src_pool, dst_root_pool, src_logger, dst_logger
221
+ conf.sequences,
222
+ conf.schema_name,
223
+ src_pool,
224
+ dst_root_pool,
225
+ src_logger,
226
+ dst_logger,
214
227
  ),
215
228
  _dump_and_load_all_tables(conf, src_pool, src_logger, dst_logger),
216
229
  )
@@ -218,15 +231,27 @@ async def sync(config_future: Awaitable[DbupgradeConfig]) -> None:
218
231
  # Creating indexes should run before validations and ANALYZE, but after all the data exists
219
232
  # in the destination database.
220
233
 
221
- await gather(
222
- apply_target_constraints(conf, dst_logger),
223
- create_target_indexes(conf, dst_logger, during_sync=True),
224
- )
234
+ # Do not load NOT VALID constraints or create INDEXes for exodus-style migrations
235
+ if not no_schema:
236
+ await gather(
237
+ apply_target_constraints(conf, dst_logger),
238
+ create_target_indexes(conf, dst_logger, during_sync=True),
239
+ )
225
240
 
226
241
  await gather(
227
- compare_100_rows(src_pool, dst_owner_pool, conf.tables, validation_logger),
242
+ compare_100_rows(
243
+ src_pool,
244
+ dst_owner_pool,
245
+ conf.tables,
246
+ conf.schema_name,
247
+ validation_logger,
248
+ ),
228
249
  compare_latest_100_rows(
229
- src_pool, dst_owner_pool, conf.tables, validation_logger
250
+ src_pool,
251
+ dst_owner_pool,
252
+ conf.tables,
253
+ conf.schema_name,
254
+ validation_logger,
230
255
  ),
231
256
  run_analyze(dst_owner_pool, dst_logger),
232
257
  )
pgbelt/cmd/teardown.py CHANGED
@@ -80,8 +80,8 @@ async def teardown(
80
80
  teardown_node(dst_root_pool, "pg2", dst_logger),
81
81
  )
82
82
  await gather(
83
- revoke_pgl(src_root_pool, conf.tables, src_logger),
84
- revoke_pgl(dst_root_pool, conf.tables, dst_logger),
83
+ revoke_pgl(src_root_pool, conf.tables, conf.schema_name, src_logger),
84
+ revoke_pgl(dst_root_pool, conf.tables, conf.schema_name, dst_logger),
85
85
  )
86
86
 
87
87
  if full:
pgbelt/config/models.py CHANGED
@@ -49,7 +49,7 @@ class DbConfig(BaseModel):
49
49
  db: str The dbname to operate on. If you want to migrate multiple dbs in a single instance set up a separate config.
50
50
  port: str The port to connect to.
51
51
  root_user: User A superuser. Usually the postgres user.
52
- owner_user: User A user who owns all the data in the public schema or who has equivalent permissions. # noqa: RST301
52
+ owner_user: User A user who owns all the data in the your specified schema or who has equivalent permissions. # noqa: RST301
53
53
  This user will end up owning all the data if this is describing the target instance.
54
54
  pglogical_user: User A user for use with pglogical. Will be created if it does not exist.
55
55
  other_users: list[User] A list of other users whose passwords we might not know.
@@ -105,6 +105,9 @@ class DbupgradeConfig(BaseModel):
105
105
  dc: str A name used to identify the environment this database pair is in. Used in cli commands.
106
106
  src: DbConfig The database we are moving data out of.
107
107
  dst: DbConfig The database we are moving data into.
108
+ tables: Optional[list[str]] A list of tables to replicate. If not provided all tables in the named schema will be replicated.
109
+ sequences: Optional[list[str]] A list of sequences to replicate. If not provided all sequences in the named schema will be replicated.
110
+ schema_name: Optional[str] The schema to operate on. Defaults to "public".
108
111
  """
109
112
 
110
113
  db: str
@@ -113,6 +116,7 @@ class DbupgradeConfig(BaseModel):
113
116
  dst: Optional[DbConfig] = None
114
117
  tables: Optional[list[str]] = None
115
118
  sequences: Optional[list[str]] = None
119
+ schema_name: Optional[str] = "public"
116
120
 
117
121
  _not_empty = validator("db", "dc", allow_reuse=True)(not_empty)
118
122
 
pgbelt/util/dump.py CHANGED
@@ -93,7 +93,7 @@ async def dump_source_tables(
93
93
  [
94
94
  "pg_dump",
95
95
  "--data-only",
96
- f"--table={table}",
96
+ f"--table={config.schema_name}.{table}",
97
97
  "-Fc",
98
98
  "-f",
99
99
  table_file(config.db, config.dc, table),
@@ -125,7 +125,7 @@ async def load_dumped_tables(
125
125
  async with create_pool(config.dst.root_uri, min_size=1) as pool:
126
126
  to_load = []
127
127
  for t in tables:
128
- if await table_empty(pool, t, logger):
128
+ if await table_empty(pool, t, config.schema_name, logger):
129
129
  to_load.append(table_file(config.db, config.dc, t))
130
130
  else:
131
131
  logger.warning(
@@ -164,10 +164,14 @@ async def dump_source_schema(config: DbupgradeConfig, logger: Logger) -> None:
164
164
  "--schema-only",
165
165
  "--no-owner",
166
166
  "-n",
167
- "public",
167
+ config.schema_name,
168
168
  config.src.pglogical_dsn,
169
169
  ]
170
170
 
171
+ # TODO: We should exclude the creation of a schema in the schema dump and load, and made that the responsibility of the user.
172
+ # Confirm if the CREATE SCHEMA statement is included in the schema dump, and if yes, exclude it.
173
+ # This will reveal itself in the integration test.
174
+
171
175
  out = await _execute_subprocess(command, "Retrieved source schema", logger)
172
176
 
173
177
  commands_raw = _parse_dump_commands(out.decode("utf-8"))
@@ -239,7 +243,7 @@ async def dump_dst_not_valid_constraints(
239
243
  "--schema-only",
240
244
  "--no-owner",
241
245
  "-n",
242
- "public",
246
+ config.schema_name,
243
247
  config.dst.pglogical_dsn,
244
248
  ]
245
249
 
@@ -345,7 +349,7 @@ async def dump_dst_create_index(config: DbupgradeConfig, logger: Logger) -> None
345
349
  "--schema-only",
346
350
  "--no-owner",
347
351
  "-n",
348
- "public",
352
+ config.schema_name,
349
353
  config.dst.pglogical_dsn,
350
354
  ]
351
355
 
pgbelt/util/pglogical.py CHANGED
@@ -66,7 +66,7 @@ async def configure_pgl(
66
66
  )
67
67
 
68
68
 
69
- async def grant_pgl(pool: Pool, tables: list[str], logger: Logger) -> None:
69
+ async def grant_pgl(pool: Pool, tables: list[str], schema: str, logger: Logger) -> None:
70
70
  """
71
71
  Grant pglogical access to the data
72
72
 
@@ -78,21 +78,22 @@ async def grant_pgl(pool: Pool, tables: list[str], logger: Logger) -> None:
78
78
  async with pool.acquire() as conn:
79
79
  async with conn.transaction():
80
80
  if tables:
81
+ tables_with_schema = [f"{schema}.{table}" for table in tables]
81
82
  await conn.execute(
82
- f"GRANT ALL ON TABLE {','.join(tables)} TO pglogical;"
83
+ f"GRANT ALL ON TABLE {','.join(tables_with_schema)} TO pglogical;"
83
84
  )
84
85
  else:
85
86
  await conn.execute(
86
- "GRANT ALL ON ALL TABLES IN SCHEMA public TO pglogical;"
87
+ f"GRANT ALL ON ALL TABLES IN SCHEMA {schema} TO pglogical;"
87
88
  )
88
89
  await conn.execute(
89
- "GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO pglogical;"
90
+ f"GRANT ALL ON ALL SEQUENCES IN SCHEMA {schema} TO pglogical;"
90
91
  )
91
92
  logger.debug("pglogical data grants complete")
92
93
 
93
94
 
94
95
  async def configure_replication_set(
95
- pool: Pool, tables: list[str], logger: Logger
96
+ pool: Pool, tables: list[str], schema: str, logger: Logger
96
97
  ) -> None:
97
98
  """
98
99
  Add each table in the given list to the default replication set
@@ -105,17 +106,23 @@ async def configure_replication_set(
105
106
  except Exception as e:
106
107
  logger.debug(f"Could not create replication set 'pgbelt': {e}")
107
108
 
108
- logger.info(f"Configuring 'pgbelt' replication set with tables: {tables}")
109
+ logger.info(
110
+ f"Configuring 'pgbelt' replication set with tables from schema {schema}: {tables}"
111
+ )
109
112
  for table in tables:
110
113
  async with pool.acquire() as conn:
111
114
  async with conn.transaction():
112
115
  try:
113
116
  await conn.execute(
114
- f"SELECT pglogical.replication_set_add_table('pgbelt', '\"{table}\"');"
117
+ f"SELECT pglogical.replication_set_add_table('pgbelt', '\"{schema}\".\"{table}\"');"
118
+ )
119
+ logger.debug(
120
+ f"Table '{table}' added to 'pgbelt' replication set from schema {schema}"
115
121
  )
116
- logger.debug(f"Table '{table}' added to 'pgbelt' replication set")
117
122
  except UniqueViolationError:
118
- logger.debug(f"Table '{table}' already in 'pgbelt' replication set")
123
+ logger.debug(
124
+ f"Table '{table}' already in 'pgbelt' replication set from schema {schema}"
125
+ )
119
126
 
120
127
 
121
128
  async def configure_node(pool: Pool, name: str, dsn: str, logger: Logger) -> None:
@@ -202,7 +209,7 @@ async def teardown_replication_set(pool: Pool, logger: Logger) -> None:
202
209
  """
203
210
  Tear down the replication_set
204
211
  """
205
- logger.info("Dropping replication set 'default'...")
212
+ logger.info("Dropping replication set 'pgbelt'...")
206
213
  async with pool.acquire() as conn:
207
214
  async with conn.transaction():
208
215
  try:
@@ -213,10 +220,16 @@ async def teardown_replication_set(pool: Pool, logger: Logger) -> None:
213
220
  UndefinedFunctionError,
214
221
  InternalServerError,
215
222
  ):
216
- logger.debug("Replication set 'default' does not exist")
223
+ logger.debug("Replication set 'pgbelt' does not exist")
224
+ except ObjectNotInPrerequisiteStateError:
225
+ logger.debug(
226
+ "pglogical node was already dropped, so we can't drop the replication set. This is okay, keep going."
227
+ )
217
228
 
218
229
 
219
- async def revoke_pgl(pool: Pool, tables: list[str], logger: Logger) -> None:
230
+ async def revoke_pgl(
231
+ pool: Pool, tables: list[str], schema: str, logger: Logger
232
+ ) -> None:
220
233
  """
221
234
  Revoke data access permissions from pglogical, and drop the pglogical role
222
235
  """
@@ -225,10 +238,10 @@ async def revoke_pgl(pool: Pool, tables: list[str], logger: Logger) -> None:
225
238
  async with conn.transaction():
226
239
  try:
227
240
  await conn.execute(
228
- "REVOKE ALL ON ALL TABLES IN SCHEMA public FROM pglogical;"
241
+ f"REVOKE ALL ON ALL TABLES IN SCHEMA {schema} FROM pglogical;"
229
242
  )
230
243
  await conn.execute(
231
- "REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM pglogical;"
244
+ f"REVOKE ALL ON ALL SEQUENCES IN SCHEMA {schema} FROM pglogical;"
232
245
  )
233
246
  logger.debug("Data access permissions revoked")
234
247
  except UndefinedObjectError as e: