pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ # A PostgreSQL session-level advisory lock used to stop two `pcrd migrate`
5
+ # processes from running against the same replication slot at once — which
6
+ # would corrupt checkpoint/LSN progress and fight over the slot.
7
+ #
8
+ # The lock is taken on the source database (where the slot and publication
9
+ # live, the truly shared resource) and is keyed by the slot name. Being
10
+ # session-level, it is released by #release or automatically when the
11
+ # connection closes, so a crashed run does not leave it stuck.
12
+ class AdvisoryLock
13
+ NAMESPACE = "pcrd-migrate"
14
+
15
+ def initialize(pool:, name:)
16
+ @pool = pool
17
+ @name = name
18
+ @held = false
19
+ end
20
+
21
+ # Tries to take the lock without blocking. Returns true if acquired, false
22
+ # if another session already holds it.
23
+ def try_acquire
24
+ row = @pool.exec("SELECT pg_try_advisory_lock(hashtext($1)::bigint) AS locked", [key])
25
+ @held = (row[0]["locked"] == "t")
26
+ @held
27
+ end
28
+
29
+ # Releases the lock if held. Best-effort: a closed connection has already
30
+ # dropped it.
31
+ def release
32
+ return unless @held
33
+
34
+ @pool.exec("SELECT pg_advisory_unlock(hashtext($1)::bigint)", [key])
35
+ @held = false
36
+ rescue Connection::Error
37
+ nil
38
+ end
39
+
40
+ def held?
41
+ @held
42
+ end
43
+
44
+ private
45
+
46
+ def key
47
+ "#{NAMESPACE}:#{@name}"
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Apply
5
+ # Applies transactions from the WAL consumer queue to the target cluster.
6
+ #
7
+ # Each transaction is a Replication::Consumer::Transaction containing a list
8
+ # of Insert/Update/Delete events. Events for tables not in the migration spec
9
+ # are silently skipped (other tables may be in the publication).
10
+ #
11
+ # INSERT events use ON CONFLICT DO UPDATE so they are safe during the
12
+ # backfill/streaming overlap window — if backfill already wrote a row,
13
+ # the WAL replay will update it to the latest version instead of failing.
14
+ #
15
+ # UPDATE events are implemented as upserts (same SQL as INSERT) because:
16
+ # - The row may not yet exist on the target (if the WAL event precedes
17
+ # the backfill batch that covers that key range).
18
+ # - Upsert semantics are always correct here.
19
+ #
20
+ # DELETE events use the primary-key values from old_tuple (key columns).
21
+ class Engine
22
+ # Per-table execution plan built at initialisation time.
23
+ TablePlan = Data.define(
24
+ :table_name,
25
+ :transformer,
26
+ :pk_source_cols, # Array<String>: pk column names in source schema
27
+ :pk_target_cols, # Array<String>: pk column names after renames
28
+ :upsert_sql, # prebuilt SQL string
29
+ :delete_sql # prebuilt SQL string
30
+ )
31
+
32
+ def initialize(target_pool:, config:, parser:, source_schema:)
33
+ @target_pool = target_pool
34
+ @parser = parser
35
+ @plans = build_plans(config, source_schema)
36
+ end
37
+
38
+ # Applies one complete transaction to the target inside a single DB transaction.
39
+ # Returns the commit LSN string.
40
+ def apply(txn)
41
+ @target_pool.transaction do
42
+ txn.events.each { |event| apply_event(event) }
43
+ end
44
+ txn.commit_lsn
45
+ end
46
+
47
+ private
48
+
49
+ def apply_event(event)
50
+ rel = @parser.relation(event.relation_id)
51
+ return unless rel
52
+
53
+ # Route by schema-qualified relation name. Keying on the bare table name
54
+ # would mis-route events when two schemas hold a same-named table that
55
+ # the publication happens to include.
56
+ plan = @plans[relation_key(rel.namespace, rel.name)]
57
+ return unless plan
58
+
59
+ case event
60
+ when Replication::Pgoutput::Messages::Insert
61
+ # INSERT always carries every column ('t'/'n', never 'u'), so the
62
+ # full-row upsert is always safe here.
63
+ apply_upsert(plan, event.new_tuple)
64
+ when Replication::Pgoutput::Messages::Update
65
+ apply_update(plan, event.new_tuple)
66
+ when Replication::Pgoutput::Messages::Delete
67
+ apply_delete(plan, event.old_tuple)
68
+ end
69
+ end
70
+
71
+ def apply_upsert(plan, tuple)
72
+ transformed = plan.transformer.transform(tuple)
73
+ @target_pool.exec(plan.upsert_sql, transformed.values)
74
+ end
75
+
76
+ # An UPDATE's new tuple may contain :toast sentinels for TOASTed columns
77
+ # whose value did not change — PostgreSQL does not re-send those values.
78
+ # Writing the sentinel through the upsert would corrupt the column with a
79
+ # literal "toast". When no column is unchanged-TOAST, the full-row upsert
80
+ # is correct and idempotent; otherwise we emit a partial UPDATE that sets
81
+ # only the changed columns, leaving the existing target value in place.
82
+ def apply_update(plan, tuple)
83
+ transformed = plan.transformer.transform(tuple)
84
+ if transformed.value?(:toast)
85
+ apply_partial_update(plan, transformed)
86
+ else
87
+ @target_pool.exec(plan.upsert_sql, transformed.values)
88
+ end
89
+ end
90
+
91
+ # Builds an UPDATE that excludes unchanged-TOAST columns (and the PK) from
92
+ # the SET list, keyed by primary key. If the row has not been backfilled
93
+ # yet this updates zero rows, which is fine: backfill reads live rows and
94
+ # will copy the current value later, and replayed upserts are idempotent,
95
+ # so the target still converges.
96
+ def apply_partial_update(plan, transformed)
97
+ set_cols = transformed.reject do |col, val|
98
+ val == :toast || plan.pk_target_cols.include?(col)
99
+ end
100
+ return if set_cols.empty? # only PK and unchanged-TOAST columns present
101
+
102
+ assignments = set_cols.keys.each_with_index
103
+ .map { |c, i| "#{Sql.quote_ident(c)} = $#{i + 1}" }
104
+ .join(", ")
105
+ where = plan.pk_target_cols.each_with_index
106
+ .map { |c, i| "#{Sql.quote_ident(c)} = $#{set_cols.size + i + 1}" }
107
+ .join(" AND ")
108
+ sql = "UPDATE #{Sql.quote_table(plan.table_name)} SET #{assignments} WHERE #{where}"
109
+ pk_vals = plan.pk_target_cols.map { |c| transformed[c] }
110
+ @target_pool.exec(sql, set_cols.values + pk_vals)
111
+ end
112
+
113
+ def apply_delete(plan, tuple)
114
+ pk_values = plan.pk_source_cols.map { tuple[_1] }
115
+ @target_pool.exec(plan.delete_sql, pk_values)
116
+ end
117
+
118
+ # ── plan building ────────────────────────────────────────────────────
119
+
120
+ # Tables configured today are all in the public schema (there is no
121
+ # per-table schema field yet). When that lands, key off table_config.schema.
122
+ DEFAULT_SCHEMA = "public"
123
+
124
+ def relation_key(namespace, name)
125
+ "#{namespace}.#{name}"
126
+ end
127
+
128
+ def build_plans(config, source_schema)
129
+ (config.migrate&.tables || []).each_with_object({}) do |table_config, plans|
130
+ schema = source_schema[table_config.name]
131
+ next unless schema
132
+
133
+ source_cols = schema[:columns]
134
+ pk_source = schema[:pk_columns]
135
+ transformer = Transform::RowTransformer.new(table_config, source_cols)
136
+ pk_target = map_pk_to_target(pk_source, table_config)
137
+ target_cols = transformer.target_column_names
138
+
139
+ plans[relation_key(DEFAULT_SCHEMA, table_config.name)] = TablePlan.new(
140
+ table_name: table_config.name,
141
+ transformer: transformer,
142
+ pk_source_cols: pk_source,
143
+ pk_target_cols: pk_target,
144
+ upsert_sql: build_upsert_sql(table_config.name, target_cols, pk_target),
145
+ delete_sql: build_delete_sql(table_config.name, pk_target)
146
+ )
147
+ end
148
+ end
149
+
150
+ def map_pk_to_target(pk_source_cols, table_config)
151
+ pk_source_cols.map do |src|
152
+ spec = table_config.columns&.[](src) || table_config.columns&.[](src.to_sym)
153
+ spec&.rename || src
154
+ end
155
+ end
156
+
157
+ def build_upsert_sql(table_name, target_cols, pk_target_cols)
158
+ tbl = Sql.quote_table(table_name)
159
+ cols = Sql.quote_columns(target_cols)
160
+ phs = target_cols.each_index.map { "$#{_1 + 1}" }.join(", ")
161
+ pk = Sql.quote_columns(pk_target_cols)
162
+
163
+ set_pairs = target_cols
164
+ .reject { |c| pk_target_cols.include?(c) }
165
+ .map { |c| "#{Sql.quote_ident(c)} = EXCLUDED.#{Sql.quote_ident(c)}" }
166
+ .join(", ")
167
+
168
+ if set_pairs.empty?
169
+ "INSERT INTO #{tbl} (#{cols}) VALUES (#{phs}) ON CONFLICT (#{pk}) DO NOTHING"
170
+ else
171
+ "INSERT INTO #{tbl} (#{cols}) VALUES (#{phs}) ON CONFLICT (#{pk}) DO UPDATE SET #{set_pairs}"
172
+ end
173
+ end
174
+
175
+ def build_delete_sql(table_name, pk_target_cols)
176
+ tbl = Sql.quote_table(table_name)
177
+ cond = pk_target_cols.each_with_index
178
+ .map { |c, i| "#{Sql.quote_ident(c)} = $#{i + 1}" }
179
+ .join(" AND ")
180
+ "DELETE FROM #{tbl} WHERE #{cond}"
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Apply
5
+ # Drains buffered transactions from the WAL consumer queue and applies them
6
+ # to the target, in its own thread, concurrently with backfill.
7
+ #
8
+ # This is what makes streaming run *alongside* the bulk copy instead of
9
+ # after it: the consumer fills a bounded queue, this worker empties it, and
10
+ # the source slot can keep advancing so WAL is not retained for the whole
11
+ # backfill.
12
+ #
13
+ # Threading contract:
14
+ # - The Apply::Engine here MUST use a target connection that is not shared
15
+ # with backfill — a Connection::Client wraps a single PG connection and is
16
+ # not safe to use from two threads at once.
17
+ # - on_committed is invoked (on this thread) after each transaction is
18
+ # durably applied, with the commit LSN. Wire it to checkpoint + the
19
+ # consumer's LSN acknowledgement so WAL is only released after apply.
20
+ #
21
+ # Lifecycle:
22
+ # start — launch the background thread
23
+ # stop — drain whatever is already queued, then exit and join
24
+ # failed?/error — surface a fatal apply error to the supervising thread
25
+ # last_applied_lsn — most recent commit LSN handed to on_committed
26
+ class Worker
27
+ POLL_INTERVAL = 0.05 # seconds to wait when the queue is momentarily empty
28
+
29
+ def initialize(engine:, queue:, on_committed: nil)
30
+ @engine = engine
31
+ @queue = queue
32
+ @on_committed = on_committed
33
+ @stop = false
34
+ @mutex = Mutex.new
35
+ @error = nil
36
+ @last_lsn = nil
37
+ @thread = nil
38
+ end
39
+
40
+ def start
41
+ @thread = Thread.new { run_loop }
42
+ self
43
+ end
44
+
45
+ # Signals the worker to finish: it keeps applying until the queue is
46
+ # empty, then exits. Joins the thread before returning.
47
+ def stop
48
+ @mutex.synchronize { @stop = true }
49
+ @thread&.join
50
+ end
51
+
52
+ def failed?
53
+ @mutex.synchronize { !@error.nil? }
54
+ end
55
+
56
+ def last_applied_lsn
57
+ @mutex.synchronize { @last_lsn }
58
+ end
59
+
60
+ attr_reader :error
61
+
62
+ private
63
+
64
+ def run_loop
65
+ loop do
66
+ txn = pop_nonblocking
67
+
68
+ if txn
69
+ process(txn)
70
+ elsif stopped?
71
+ break # stop requested and nothing left to drain
72
+ else
73
+ sleep POLL_INTERVAL
74
+ end
75
+ end
76
+ rescue => e
77
+ @mutex.synchronize { @error = e }
78
+ end
79
+
80
+ def process(txn)
81
+ @engine.apply(txn)
82
+ @on_committed&.call(txn.commit_lsn)
83
+ @mutex.synchronize { @last_lsn = txn.commit_lsn }
84
+ end
85
+
86
+ def pop_nonblocking
87
+ @queue.pop(true)
88
+ rescue ThreadError
89
+ nil
90
+ end
91
+
92
+ def stopped?
93
+ @mutex.synchronize { @stop }
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Backfill
5
+ # Executes one backfill batch: SELECT a page of rows from source,
6
+ # transform them, and COPY to target.
7
+ #
8
+ # Returns a result hash with row_count, duration_ms, start_key, end_key.
9
+ # Returns nil when the source page is empty (signals end-of-table to Engine).
10
+ class Batch
11
+ # PostgreSQL COPY TEXT format: tab-delimited, \N for NULL.
12
+ # Using text format avoids CSV quoting edge cases and is marginally faster.
13
+ NULL_MARKER = "\\N"
14
+ DELIMITER = "\t"
15
+
16
+ def initialize(source_pool:, target_pool:, transformer:, table_name:,
17
+ pk_columns:, batch_size:, schema_name: "public")
18
+ @source_pool = source_pool
19
+ @target_pool = target_pool
20
+ @transformer = transformer
21
+ @table_name = table_name
22
+ @pk_columns = pk_columns
23
+ @batch_size = batch_size
24
+ @schema_name = schema_name
25
+ @quoted_table = "#{source_pool.quote_ident(schema_name)}.#{source_pool.quote_ident(table_name)}"
26
+ end
27
+
28
+ # Copies one page starting after `after_key`.
29
+ # after_key: nil (first page), a scalar, or an Array for composite PKs.
30
+ #
31
+ # Returns Hash or nil.
32
+ def execute(after_key:)
33
+ t0 = monotonic_ms
34
+ rows = fetch_source_rows(after_key)
35
+ return nil if rows.empty?
36
+
37
+ transformed = rows.map { |r| @transformer.transform(r) }
38
+ copy_to_target(transformed)
39
+
40
+ duration_ms = monotonic_ms - t0
41
+
42
+ {
43
+ row_count: rows.size,
44
+ duration_ms: duration_ms,
45
+ start_key: extract_key(rows.first),
46
+ end_key: extract_key(rows.last)
47
+ }
48
+ end
49
+
50
+ private
51
+
52
+ # ── source SELECT ────────────────────────────────────────────────────
53
+
54
+ def fetch_source_rows(after_key)
55
+ src_cols = @transformer.source_column_names_kept
56
+ col_list = src_cols.map { |c| @source_pool.quote_ident(c) }.join(", ")
57
+ pk_quoted = @pk_columns.map { |c| @source_pool.quote_ident(c) }.join(", ")
58
+
59
+ if after_key.nil?
60
+ sql = "SELECT #{col_list} FROM #{@quoted_table} ORDER BY #{pk_quoted} LIMIT $1"
61
+ params = [@batch_size]
62
+ elsif @pk_columns.size == 1
63
+ sql = "SELECT #{col_list} FROM #{@quoted_table} " \
64
+ "WHERE #{@source_pool.quote_ident(@pk_columns.first)} > $1 " \
65
+ "ORDER BY #{pk_quoted} LIMIT $2"
66
+ params = [after_key, @batch_size]
67
+ else
68
+ # Composite PK: row-value comparison
69
+ pk_placeholders = @pk_columns.each_with_index.map { |_, i| "$#{i + 1}" }.join(", ")
70
+ sql = "SELECT #{col_list} FROM #{@quoted_table} " \
71
+ "WHERE (#{pk_quoted}) > (#{pk_placeholders}) " \
72
+ "ORDER BY #{pk_quoted} LIMIT $#{@pk_columns.size + 1}"
73
+ params = Array(after_key) + [@batch_size]
74
+ end
75
+
76
+ result = @source_pool.exec(sql, params)
77
+ result.to_a
78
+ end
79
+
80
+ # ── target COPY ──────────────────────────────────────────────────────
81
+
82
+ # COPY the batch into a session-local staging table, then merge into the
83
+ # real target with ON CONFLICT DO NOTHING.
84
+ #
85
+ # COPY itself has no conflict handling, so copying straight into the
86
+ # PK-constrained target would abort the moment the apply worker has
87
+ # already written a row in this key range during the backfill/streaming
88
+ # overlap. Merging via staging makes the bulk load idempotent and safe to
89
+ # run concurrently with apply: any row the worker already wrote (an
90
+ # insert/update replayed for a post-slot change) is left untouched. WAL
91
+ # replay is authoritative for changes after slot creation; backfill only
92
+ # fills the rows it has not seen.
93
+ def copy_to_target(transformed_rows)
94
+ target_cols = @transformer.target_column_names
95
+ col_list = target_cols.map { |c| @target_pool.quote_ident(c) }.join(", ")
96
+
97
+ ensure_stage_table
98
+ @target_pool.exec_sql("TRUNCATE #{stage_ident}")
99
+
100
+ copy_sql = "COPY #{stage_ident} (#{col_list}) FROM STDIN WITH (FORMAT text)"
101
+ @target_pool.copy_data(copy_sql) do |conn|
102
+ transformed_rows.each do |row|
103
+ values = target_cols.map { |col| encode_copy_value(row[col]) }
104
+ conn.put_copy_data(values.join(DELIMITER) + "\n")
105
+ end
106
+ end
107
+
108
+ @target_pool.exec_sql(
109
+ "INSERT INTO #{quoted_target} (#{col_list}) " \
110
+ "SELECT #{col_list} FROM #{stage_ident} ON CONFLICT DO NOTHING"
111
+ )
112
+ end
113
+
114
+ def quoted_target
115
+ @quoted_target ||=
116
+ "#{@target_pool.quote_ident(@schema_name)}.#{@target_pool.quote_ident(@table_name)}"
117
+ end
118
+
119
+ # Session-local TEMP table (pg_temp resolves before the search_path, so it
120
+ # needs no schema qualifier). Created once per Batch and reused across this
121
+ # table's batches; TRUNCATEd before each load.
122
+ def stage_ident
123
+ @stage_ident ||= @target_pool.quote_ident("pcrd_stage_#{@table_name}")
124
+ end
125
+
126
+ def ensure_stage_table
127
+ return if @stage_ready
128
+
129
+ @target_pool.exec_sql(
130
+ "CREATE TEMP TABLE IF NOT EXISTS #{stage_ident} " \
131
+ "(LIKE #{quoted_target} INCLUDING DEFAULTS)"
132
+ )
133
+ @stage_ready = true
134
+ end
135
+
136
+ # ── helpers ──────────────────────────────────────────────────────────
137
+
138
+ def encode_copy_value(val)
139
+ return NULL_MARKER if val.nil?
140
+
141
+ val.to_s
142
+ .gsub("\\", "\\\\")
143
+ .gsub("\t", "\\t")
144
+ .gsub("\n", "\\n")
145
+ .gsub("\r", "\\r")
146
+ end
147
+
148
+ def extract_key(pg_row)
149
+ keys = @pk_columns.map { |col| pg_row[col] }
150
+ keys.size == 1 ? keys.first : keys
151
+ end
152
+
153
+ def monotonic_ms
154
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Backfill
5
+ # Drives the full backfill loop for all tables in the migration spec.
6
+ #
7
+ # For each table:
8
+ # 1. Reads last_completed_key from the checkpoint store (nil = fresh start)
9
+ # 2. Loops: execute one Batch, record it in checkpoint, call on_batch
10
+ # 3. Stops when the batch returns no rows (end of table) or stop! is called
11
+ #
12
+ # Thread safety: stop! can be called from any thread; the engine checks
13
+ # @stop between batches and exits cleanly after the current batch finishes.
14
+ class Engine
15
+ Result = Data.define(:table_name, :rows_copied, :batch_count, :duration_ms, :stopped_early)
16
+
17
+ # Seconds to pause after a batch to hold the average copy rate at or below
18
+ # `cap` rows/sec. Returns 0 when unthrottled or already slower than the cap.
19
+ def self.throttle_delay(row_count, duration_ms, cap)
20
+ return 0.0 unless cap && cap.positive?
21
+
22
+ needed = row_count.to_f / cap
23
+ elapsed = duration_ms / 1000.0
24
+ [needed - elapsed, 0.0].max
25
+ end
26
+
27
+ def initialize(source_pool:, target_pool:, config:, checkpoint:, source_schema: {})
28
+ @source_pool = source_pool
29
+ @target_pool = target_pool
30
+ @config = config
31
+ @checkpoint = checkpoint
32
+ @source_schema = source_schema # Hash<table_name, { columns:, pk_columns: }>
33
+ @stop = false
34
+ @mutex = Mutex.new
35
+ end
36
+
37
+ # Runs backfill for all configured tables.
38
+ #
39
+ # on_batch: optional Proc called after each batch with a stats Hash:
40
+ # { table:, batch_num:, row_count:, rows_so_far:, duration_ms:, last_key: }
41
+ #
42
+ # Returns Array<Result>.
43
+ def run(on_batch: nil)
44
+ @checkpoint.set_phase(:backfill)
45
+ @checkpoint.set_started_at(Time.now.iso8601)
46
+
47
+ @config.migrate.tables.map do |table_config|
48
+ run_table(table_config, on_batch: on_batch)
49
+ end
50
+ end
51
+
52
+ # Signal the engine to stop cleanly after the current batch.
53
+ def stop!
54
+ @mutex.synchronize { @stop = true }
55
+ end
56
+
57
+ def stopped?
58
+ @mutex.synchronize { @stop }
59
+ end
60
+
61
+ private
62
+
63
+ def run_table(table_config, on_batch:)
64
+ table_name = table_config.name
65
+ schema_info = @source_schema[table_name] ||
66
+ fetch_schema(table_name)
67
+ source_cols = schema_info[:columns]
68
+ pk_cols = schema_info[:pk_columns]
69
+
70
+ transformer = Transform::RowTransformer.new(table_config, source_cols)
71
+
72
+ batch_runner = Batch.new(
73
+ source_pool: @source_pool,
74
+ target_pool: @target_pool,
75
+ transformer: transformer,
76
+ table_name: table_name,
77
+ pk_columns: pk_cols,
78
+ batch_size: @config.migrate.batch_size
79
+ )
80
+
81
+ last_key = @checkpoint.last_completed_key(table: table_name)
82
+ batch_num = @checkpoint.batch_stats(table: table_name)[:batch_count]
83
+ rows_so_far = @checkpoint.total_rows_copied(table: table_name)
84
+ t_start = monotonic_ms
85
+
86
+ loop do
87
+ break if stopped?
88
+
89
+ result = batch_runner.execute(after_key: last_key)
90
+ break unless result # empty page — end of table
91
+
92
+ batch_num += 1
93
+ rows_so_far += result[:row_count]
94
+ last_key = result[:end_key]
95
+
96
+ @checkpoint.record_batch(
97
+ table: table_name,
98
+ start_key: result[:start_key],
99
+ end_key: result[:end_key],
100
+ row_count: result[:row_count],
101
+ duration_ms: result[:duration_ms]
102
+ )
103
+
104
+ on_batch&.call(
105
+ table: table_name,
106
+ batch_num: batch_num,
107
+ row_count: result[:row_count],
108
+ rows_so_far: rows_so_far,
109
+ duration_ms: result[:duration_ms],
110
+ last_key: last_key
111
+ )
112
+
113
+ delay = self.class.throttle_delay(
114
+ result[:row_count], result[:duration_ms], @config.migrate.max_rows_per_second
115
+ )
116
+ interruptible_sleep(delay) if delay.positive?
117
+ end
118
+
119
+ Result.new(
120
+ table_name: table_name,
121
+ rows_copied: rows_so_far,
122
+ batch_count: batch_num,
123
+ duration_ms: monotonic_ms - t_start,
124
+ stopped_early: stopped?
125
+ )
126
+ end
127
+
128
+ def fetch_schema(table_name)
129
+ reader = Schema::Reader.new(@source_pool)
130
+ {
131
+ columns: reader.read(table_name),
132
+ pk_columns: reader.primary_key_columns(table_name)
133
+ }
134
+ end
135
+
136
+ def monotonic_ms
137
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
138
+ end
139
+
140
+ # Sleeps for `seconds`, in small slices, so a throttle pause still honors
141
+ # a stop request promptly instead of blocking until the full delay elapses.
142
+ def interruptible_sleep(seconds)
143
+ deadline = monotonic_ms + (seconds * 1000)
144
+ while monotonic_ms < deadline
145
+ break if stopped?
146
+
147
+ remaining = (deadline - monotonic_ms) / 1000.0
148
+ sleep [0.1, remaining].min
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end